Commit 5027228b authored by jakirkham's avatar jakirkham

Merge pull request #115 from parente/spark-1.6.0-toree

Bump to Spark 1.6.0 with Apache Toree
parents 55d5ca6b 83f9f834
...@@ -10,11 +10,15 @@ USER root ...@@ -10,11 +10,15 @@ USER root
RUN apt-get -y update && apt-get -y install jq RUN apt-get -y update && apt-get -y install jq
# Spark dependencies # Spark dependencies
ENV APACHE_SPARK_VERSION 1.5.1 ENV APACHE_SPARK_VERSION 1.6.0
RUN apt-get -y update && \ RUN apt-get -y update && \
apt-get install -y --no-install-recommends openjdk-7-jre-headless && \ apt-get install -y --no-install-recommends openjdk-7-jre-headless && \
apt-get clean apt-get clean
RUN wget -qO - http://d3kbcqa49mib13.cloudfront.net/spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz | tar -xz -C /usr/local/ RUN cd /tmp && \
wget -q http://d3kbcqa49mib13.cloudfront.net/spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz && \
echo "439fe7793e0725492d3d36448adcd1db38f438dd1392bffd556b58bb9a3a2601 *spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz" | sha256sum -c - && \
tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz -C /usr/local && \
rm spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz
RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6 spark RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6 spark
# Mesos dependencies # Mesos dependencies
...@@ -29,17 +33,18 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF && \ ...@@ -29,17 +33,18 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF && \
# Scala Spark kernel (build and cleanup) # Scala Spark kernel (build and cleanup)
RUN cd /tmp && \ RUN cd /tmp && \
echo deb http://dl.bintray.com/sbt/debian / > /etc/apt/sources.list.d/sbt.list && \ echo deb http://dl.bintray.com/sbt/debian / > /etc/apt/sources.list.d/sbt.list && \
apt-key adv --keyserver keyserver.ubuntu.com --recv 99E82A75642AC823 && \
apt-get update && \ apt-get update && \
git clone https://github.com/ibm-et/spark-kernel.git && \ git clone https://github.com/apache/incubator-toree.git && \
apt-get install -yq --force-yes --no-install-recommends sbt && \ apt-get install -yq --force-yes --no-install-recommends sbt && \
cd spark-kernel && \ cd incubator-toree && \
git checkout 3905e47815 && \ git checkout 846292233c && \
make dist SHELL=/bin/bash && \ make dist SHELL=/bin/bash && \
mv dist/spark-kernel /opt/spark-kernel && \ mv dist/toree-kernel /opt/toree-kernel && \
chmod +x /opt/spark-kernel && \ chmod +x /opt/toree-kernel && \
rm -rf ~/.ivy2 && \ rm -rf ~/.ivy2 && \
rm -rf ~/.sbt && \ rm -rf ~/.sbt && \
rm -rf /tmp/spark-kernel && \ rm -rf /tmp/incubator-toree && \
apt-get remove -y sbt && \ apt-get remove -y sbt && \
apt-get clean apt-get clean
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Scala 2.10.x * Scala 2.10.x
* pyspark, pandas, matplotlib, scipy, seaborn, scikit-learn pre-installed for Python * pyspark, pandas, matplotlib, scipy, seaborn, scikit-learn pre-installed for Python
* ggplot2, rcurl preinstalled for R * ggplot2, rcurl preinstalled for R
* Spark 1.5.1 for use in local mode or to connect to a cluster of Spark workers * Spark 1.6.0 for use in local mode or to connect to a cluster of Spark workers
* Mesos client 0.22 binary that can communicate with a Mesos master * Mesos client 0.22 binary that can communicate with a Mesos master
* Unprivileged user `jovyan` (uid=1000, configurable, see options) in group `users` (gid=100) with ownership over `/home/jovyan` and `/opt/conda` * Unprivileged user `jovyan` (uid=1000, configurable, see options) in group `users` (gid=100) with ownership over `/home/jovyan` and `/opt/conda`
* [tini](https://github.com/krallin/tini) as the container entrypoint and [start-notebook.sh](../minimal-notebook/start-notebook.sh) as the default command * [tini](https://github.com/krallin/tini) as the container entrypoint and [start-notebook.sh](../minimal-notebook/start-notebook.sh) as the default command
...@@ -106,8 +106,8 @@ conf = pyspark.SparkConf() ...@@ -106,8 +106,8 @@ conf = pyspark.SparkConf()
# point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos) # point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos)
conf.setMaster("mesos://10.10.10.10:5050") conf.setMaster("mesos://10.10.10.10:5050")
# point to spark binary package in HDFS or on local filesystem on all slave # point to spark binary package in HDFS or on local filesystem on all slave
# nodes (e.g., file:///opt/spark/spark-1.5.1-bin-hadoop2.6.tgz) # nodes (e.g., file:///opt/spark/spark-1.6.0-bin-hadoop2.6.tgz)
conf.set("spark.executor.uri", "hdfs://10.10.10.10/spark/spark-1.5.1-bin-hadoop2.6.tgz") conf.set("spark.executor.uri", "hdfs://10.10.10.10/spark/spark-1.6.0-bin-hadoop2.6.tgz")
# set other options as desired # set other options as desired
conf.set("spark.executor.memory", "8g") conf.set("spark.executor.memory", "8g")
conf.set("spark.core.connection.ack.wait.timeout", "1200") conf.set("spark.core.connection.ack.wait.timeout", "1200")
...@@ -139,10 +139,10 @@ library(SparkR) ...@@ -139,10 +139,10 @@ library(SparkR)
# point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos)\ # point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos)\
# as the first argument # as the first argument
# point to spark binary package in HDFS or on local filesystem on all slave # point to spark binary package in HDFS or on local filesystem on all slave
# nodes (e.g., file:///opt/spark/spark-1.5.1-bin-hadoop2.6.tgz) in sparkEnvir # nodes (e.g., file:///opt/spark/spark-1.6.0-bin-hadoop2.6.tgz) in sparkEnvir
# set other options in sparkEnvir # set other options in sparkEnvir
sc <- sparkR.init("mesos://10.10.10.10:5050", sparkEnvir=list( sc <- sparkR.init("mesos://10.10.10.10:5050", sparkEnvir=list(
spark.executor.uri="hdfs://10.10.10.10/spark/spark-1.5.1-bin-hadoop2.6.tgz", spark.executor.uri="hdfs://10.10.10.10/spark/spark-1.6.0-bin-hadoop2.6.tgz",
spark.executor.memory="8g" spark.executor.memory="8g"
) )
) )
...@@ -176,7 +176,7 @@ For instance, a kernel spec file with information about a Mesos master, Spark bi ...@@ -176,7 +176,7 @@ For instance, a kernel spec file with information about a Mesos master, Spark bi
"--master=mesos://10.10.10.10:5050" "--master=mesos://10.10.10.10:5050"
], ],
"env": { "env": {
"SPARK_CONFIGURATION": "spark.executor.memory=8g,spark.executor.uri=hdfs://10.10.10.10/spark/spark-1.5.1-bin-hadoop2.6.tgz" "SPARK_CONFIGURATION": "spark.executor.memory=8g,spark.executor.uri=hdfs://10.10.10.10/spark/spark-1.6.0-bin-hadoop2.6.tgz"
} }
} }
``` ```
......
{ {
"display_name": "Scala 2.10.4", "display_name": "Apache Toree (Scala 2.10.4)",
"language": "scala", "language": "scala",
"argv": [ "argv": [
"/opt/spark-kernel/bin/spark-kernel", "/opt/toree-kernel/bin/toree-kernel",
"--profile", "--profile",
"{connection_file}" "{connection_file}"
] ]
} }
\ No newline at end of file
...@@ -10,11 +10,15 @@ USER root ...@@ -10,11 +10,15 @@ USER root
RUN apt-get -y update && apt-get -y install jq RUN apt-get -y update && apt-get -y install jq
# Spark dependencies # Spark dependencies
ENV APACHE_SPARK_VERSION 1.5.1 ENV APACHE_SPARK_VERSION 1.6.0
RUN apt-get -y update && \ RUN apt-get -y update && \
apt-get install -y --no-install-recommends openjdk-7-jre-headless && \ apt-get install -y --no-install-recommends openjdk-7-jre-headless && \
apt-get clean apt-get clean
RUN wget -qO - http://d3kbcqa49mib13.cloudfront.net/spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz | tar -xz -C /usr/local/ RUN cd /tmp && \
wget -q http://d3kbcqa49mib13.cloudfront.net/spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz && \
echo "439fe7793e0725492d3d36448adcd1db38f438dd1392bffd556b58bb9a3a2601 *spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz" | sha256sum -c - && \
tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz -C /usr/local && \
rm spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz
RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6 spark RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6 spark
# Mesos dependencies # Mesos dependencies
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* Jupyter Notebook 4.1.x * Jupyter Notebook 4.1.x
* Conda Python 3.x and Python 2.7.x environments * Conda Python 3.x and Python 2.7.x environments
* pyspark, pandas, matplotlib, scipy, seaborn, scikit-learn pre-installed * pyspark, pandas, matplotlib, scipy, seaborn, scikit-learn pre-installed
* Spark 1.5.1 for use in local mode or to connect to a cluster of Spark workers * Spark 1.6.0 for use in local mode or to connect to a cluster of Spark workers
* Mesos client 0.22 binary that can communicate with a Mesos master * Mesos client 0.22 binary that can communicate with a Mesos master
* Unprivileged user `jovyan` (uid=1000, configurable, see options) in group `users` (gid=100) with ownership over `/home/jovyan` and `/opt/conda` * Unprivileged user `jovyan` (uid=1000, configurable, see options) in group `users` (gid=100) with ownership over `/home/jovyan` and `/opt/conda`
* [tini](https://github.com/krallin/tini) as the container entrypoint and [start-notebook.sh](../minimal-notebook/start-notebook.sh) as the default command * [tini](https://github.com/krallin/tini) as the container entrypoint and [start-notebook.sh](../minimal-notebook/start-notebook.sh) as the default command
...@@ -64,8 +64,8 @@ conf = pyspark.SparkConf() ...@@ -64,8 +64,8 @@ conf = pyspark.SparkConf()
# point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos) # point to mesos master or zookeeper entry (e.g., zk://10.10.10.10:2181/mesos)
conf.setMaster("mesos://10.10.10.10:5050") conf.setMaster("mesos://10.10.10.10:5050")
# point to spark binary package in HDFS or on local filesystem on all slave # point to spark binary package in HDFS or on local filesystem on all slave
# nodes (e.g., file:///opt/spark/spark-1.5.1-bin-hadoop2.6.tgz) # nodes (e.g., file:///opt/spark/spark-1.6.0-bin-hadoop2.6.tgz)
conf.set("spark.executor.uri", "hdfs://10.122.193.209/spark/spark-1.5.1-bin-hadoop2.6.tgz") conf.set("spark.executor.uri", "hdfs://10.122.193.209/spark/spark-1.6.0-bin-hadoop2.6.tgz")
# set other options as desired # set other options as desired
conf.set("spark.executor.memory", "8g") conf.set("spark.executor.memory", "8g")
conf.set("spark.core.connection.ack.wait.timeout", "1200") conf.set("spark.core.connection.ack.wait.timeout", "1200")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment