Commit 229c7fea authored by Peter Parente's avatar Peter Parente Committed by GitHub

Merge pull request #1115 from Bidek56/miniconda-py38

Miniconda py38 and Spark 3.0
parents 54462805 391fdcce
...@@ -88,7 +88,7 @@ lint-build-test-all: $(foreach I,$(ALL_IMAGES),lint/$(I) arch_patch/$(I) build/$ ...@@ -88,7 +88,7 @@ lint-build-test-all: $(foreach I,$(ALL_IMAGES),lint/$(I) arch_patch/$(I) build/$
lint-install: ## install hadolint lint-install: ## install hadolint
@echo "Installing hadolint at $(HADOLINT) ..." @echo "Installing hadolint at $(HADOLINT) ..."
@curl -sL -o $(HADOLINT) "https://github.com/hadolint/hadolint/releases/download/v1.17.6/hadolint-$(shell uname -s)-$(shell uname -m)" @curl -sL -o $(HADOLINT) "https://github.com/hadolint/hadolint/releases/download/v1.18.0/hadolint-$(shell uname -s)-$(shell uname -m)"
@chmod 700 $(HADOLINT) @chmod 700 $(HADOLINT)
@echo "Installation done!" @echo "Installation done!"
@$(HADOLINT) --version @$(HADOLINT) --version
......
...@@ -33,16 +33,6 @@ RUN conda install --quiet --yes \ ...@@ -33,16 +33,6 @@ RUN conda install --quiet --yes \
fix-permissions "${CONDA_DIR}" && \ fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}" fix-permissions "/home/${NB_USER}"
# Apache Toree kernel
# hadolint ignore=DL3013
RUN pip install --no-cache-dir \
https://dist.apache.org/repos/dist/release/incubator/toree/0.3.0-incubating/toree-pip/toree-0.3.0.tar.gz \
&& \
jupyter toree install --sys-prefix && \
rm -rf "/home/${NB_USER}/.local" && \
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
# Spylon-kernel # Spylon-kernel
RUN conda install --quiet --yes 'spylon-kernel=0.4*' && \ RUN conda install --quiet --yes 'spylon-kernel=0.4*' && \
conda clean --all -f -y && \ conda clean --all -f -y && \
......
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Waiting for a Spark session to start..."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"spark://master:7077\n"
]
}
],
"source": [
"// should print the value of --master in the kernel spec\n",
"println(sc.master)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Waiting for a Spark session to start..."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"rdd = ParallelCollectionRDD[0] at parallelize at <console>:28\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"5050.0"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"// Sum of the first 100 whole numbers\n",
"val rdd = sc.parallelize(0 to 100)\n",
"rdd.sum()\n",
"// 5050"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Apache Toree - Scala",
"language": "scala",
"name": "apache_toree_scala"
},
"language_info": {
"codemirror_mode": "text/x-scala",
"file_extension": ".scala",
"mimetype": "text/x-scala",
"name": "scala",
"pygments_lexer": "scala",
"version": "2.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
\ No newline at end of file
...@@ -12,7 +12,7 @@ LOGGER = logging.getLogger(__name__) ...@@ -12,7 +12,7 @@ LOGGER = logging.getLogger(__name__)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"test_file", "test_file",
# TODO: add local_sparklyr # TODO: add local_sparklyr
["local_pyspark", "local_spylon", "local_toree", "local_sparkR"], ["local_pyspark", "local_spylon", "local_sparkR"],
) )
def test_nbconvert(container, test_file): def test_nbconvert(container, test_file):
"""Check if Spark notebooks can be executed""" """Check if Spark notebooks can be executed"""
......
...@@ -76,14 +76,14 @@ RUN mkdir /home/$NB_USER/work && \ ...@@ -76,14 +76,14 @@ RUN mkdir /home/$NB_USER/work && \
# Install conda as jovyan and check the md5 sum provided on the download site # Install conda as jovyan and check the md5 sum provided on the download site
ENV MINICONDA_VERSION=4.8.2 \ ENV MINICONDA_VERSION=4.8.2 \
MINICONDA_MD5=87e77f097f6ebb5127c77662dfc3165e \ MINICONDA_MD5=cbda751e713b5a95f187ae70b509403f \
CONDA_VERSION=4.8.2 CONDA_VERSION=4.8.2
WORKDIR /tmp WORKDIR /tmp
RUN wget --quiet https://repo.continuum.io/miniconda/Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh && \ RUN wget --quiet https://repo.continuum.io/miniconda/Miniconda3-py38_${MINICONDA_VERSION}-Linux-x86_64.sh && \
echo "${MINICONDA_MD5} *Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh" | md5sum -c - && \ echo "${MINICONDA_MD5} *Miniconda3-py38_${MINICONDA_VERSION}-Linux-x86_64.sh" | md5sum -c - && \
/bin/bash Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh -f -b -p $CONDA_DIR && \ /bin/bash Miniconda3-py38_${MINICONDA_VERSION}-Linux-x86_64.sh -f -b -p $CONDA_DIR && \
rm Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh && \ rm Miniconda3-py38_${MINICONDA_VERSION}-Linux-x86_64.sh && \
echo "conda ${CONDA_VERSION}" >> $CONDA_DIR/conda-meta/pinned && \ echo "conda ${CONDA_VERSION}" >> $CONDA_DIR/conda-meta/pinned && \
conda config --system --prepend channels conda-forge && \ conda config --system --prepend channels conda-forge && \
conda config --system --set auto_update_conda false && \ conda config --system --set auto_update_conda false && \
...@@ -115,7 +115,7 @@ RUN conda install --quiet --yes 'tini=0.18.0' && \ ...@@ -115,7 +115,7 @@ RUN conda install --quiet --yes 'tini=0.18.0' && \
RUN conda install --quiet --yes \ RUN conda install --quiet --yes \
'notebook=6.0.3' \ 'notebook=6.0.3' \
'jupyterhub=1.1.0' \ 'jupyterhub=1.1.0' \
'jupyterlab=2.1.3' && \ 'jupyterlab=2.1.4' && \
conda clean --all -f -y && \ conda clean --all -f -y && \
npm cache clean --force && \ npm cache clean --force && \
jupyter notebook --generate-config && \ jupyter notebook --generate-config && \
......
...@@ -11,19 +11,20 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] ...@@ -11,19 +11,20 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"]
USER root USER root
# Spark dependencies # Spark dependencies
ENV APACHE_SPARK_VERSION=2.4.5 \ ENV APACHE_SPARK_VERSION=3.0.0 \
HADOOP_VERSION=2.7 HADOOP_VERSION=3.2
RUN apt-get -y update && \ RUN apt-get -y update && \
apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \ apt-get install --no-install-recommends -y openjdk-11-jre-headless ca-certificates-java && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Using the preferred mirror to download Spark # Using the preferred mirror to download Spark
WORKDIR /tmp WORKDIR /tmp
# hadolint ignore=SC2046 # hadolint ignore=SC2046
RUN wget -q $(wget -qO- https://www.apache.org/dyn/closer.lua/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz\?as_json | \ RUN wget -q $(wget -qO- https://www.apache.org/dyn/closer.lua/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz\?as_json | \
python -c "import sys, json; content=json.load(sys.stdin); print(content['preferred']+content['path_info'])") && \ python -c "import sys, json; content=json.load(sys.stdin); print(content['preferred']+content['path_info'])") && \
echo "2426a20c548bdfc07df288cd1d18d1da6b3189d0b78dee76fa034c52a4e02895f0ad460720c526f163ba63a17efae4764c46a1cd8f9b04c60f9937a554db85d2 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \ echo "BFE45406C67CC4AE00411AD18CC438F51E7D4B6F14EB61E7BF6B5450897C2E8D3AB020152657C0239F253735C263512FFABF538AC5B9FFFA38B8295736A9C387 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
tar xzf "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" -C /usr/local --owner root --group root --no-same-owner && \ tar xzf "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" -C /usr/local --owner root --group root --no-same-owner && \
rm "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" rm "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz"
...@@ -32,7 +33,7 @@ RUN ln -s "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" spark ...@@ -32,7 +33,7 @@ RUN ln -s "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" spark
# Configure Spark # Configure Spark
ENV SPARK_HOME=/usr/local/spark ENV SPARK_HOME=/usr/local/spark
ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip \ ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip \
SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" \ SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" \
PATH=$PATH:$SPARK_HOME/bin PATH=$PATH:$SPARK_HOME/bin
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment