Merge pull request #1115 from Bidek56/miniconda-py38

Miniconda py38 and Spark 3.0

Merge pull request #1115 from Bidek56/miniconda-py38
Miniconda py38 and Spark 3.0
229c7fea · Peter Parente · GitHub · 54462805 · 391fdcce · 229c7fea
Commit 229c7fea authored Jul 04, 2020 by Peter Parente Committed by GitHub Jul 04, 2020
6 changed files
--- a/Makefile
+++ b/Makefile
@@ -88,7 +88,7 @@ lint-build-test-all: $(foreach I,$(ALL_IMAGES),lint/$(I) arch_patch/$(I) build/$
 lint-install: ## install hadolint
 	@echo "Installing hadolint at $(HADOLINT) ..."
-	@curl -sL -o $(HADOLINT) "https://github.com/hadolint/hadolint/releases/download/v1.17.6/hadolint-$(shell uname -s)-$(shell uname -m)"
+	@curl -sL -o $(HADOLINT) "https://github.com/hadolint/hadolint/releases/download/v1.18.0/hadolint-$(shell uname -s)-$(shell uname -m)"
 	@chmod 700 $(HADOLINT)
 	@echo "Installation done!"
 	@$(HADOLINT) --version	

--- a/all-spark-notebook/Dockerfile
+++ b/all-spark-notebook/Dockerfile
@@ -33,16 +33,6 @@ RUN conda install --quiet --yes \
    fix-permissions "${CONDA_DIR}" && \
    fix-permissions "/home/${NB_USER}"
-# Apache Toree kernel
-# hadolint ignore=DL3013
-RUN pip install --no-cache-dir \
-    https://dist.apache.org/repos/dist/release/incubator/toree/0.3.0-incubating/toree-pip/toree-0.3.0.tar.gz \
-    && \
-    jupyter toree install --sys-prefix && \
-    rm -rf "/home/${NB_USER}/.local" && \
-    fix-permissions "${CONDA_DIR}" && \
-    fix-permissions "/home/${NB_USER}"
 # Spylon-kernel
 RUN conda install --quiet --yes 'spylon-kernel=0.4*' && \
    conda clean --all -f -y && \

--- a/all-spark-notebook/test/data/local_toree.ipynb
+++ b/all-spark-notebook/test/data/local_toree.ipynb
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Waiting for a Spark session to start..."
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "spark://master:7077\n"
-     ]
-    }
-   ],
-   "source": [
-    "// should print the value of --master in the kernel spec\n",
-    "println(sc.master)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Waiting for a Spark session to start..."
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "rdd = ParallelCollectionRDD[0] at parallelize at <console>:28\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "5050.0"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "// Sum of the first 100 whole numbers\n",
-    "val rdd = sc.parallelize(0 to 100)\n",
-    "rdd.sum()\n",
-    "// 5050"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Apache Toree - Scala",
-   "language": "scala",
-   "name": "apache_toree_scala"
-  },
-  "language_info": {
-   "codemirror_mode": "text/x-scala",
-   "file_extension": ".scala",
-   "mimetype": "text/x-scala",
-   "name": "scala",
-   "pygments_lexer": "scala",
-   "version": "2.11.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
\ No newline at end of file
--- a/all-spark-notebook/test/test_spark_notebooks.py
+++ b/all-spark-notebook/test/test_spark_notebooks.py
@@ -12,7 +12,7 @@ LOGGER = logging.getLogger(__name__)
 @pytest.mark.parametrize(
    "test_file",
    # TODO: add local_sparklyr
-    ["local_pyspark", "local_spylon", "local_toree", "local_sparkR"],
+    ["local_pyspark", "local_spylon", "local_sparkR"],
 )
 def test_nbconvert(container, test_file):
    """Check if Spark notebooks can be executed"""

--- a/base-notebook/Dockerfile
+++ b/base-notebook/Dockerfile
@@ -76,14 +76,14 @@ RUN mkdir /home/$NB_USER/work && \
 # Install conda as jovyan and check the md5 sum provided on the download site
 ENV MINICONDA_VERSION=4.8.2 \
-    MINICONDA_MD5=87e77f097f6ebb5127c77662dfc3165e \
+    MINICONDA_MD5=cbda751e713b5a95f187ae70b509403f \
    CONDA_VERSION=4.8.2
 WORKDIR /tmp
-RUN wget --quiet https://repo.continuum.io/miniconda/Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh && \
+RUN wget --quiet https://repo.continuum.io/miniconda/Miniconda3-py38_${MINICONDA_VERSION}-Linux-x86_64.sh && \
-    echo "${MINICONDA_MD5} *Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh" | md5sum -c - && \
+    echo "${MINICONDA_MD5} *Miniconda3-py38_${MINICONDA_VERSION}-Linux-x86_64.sh" | md5sum -c - && \
-    /bin/bash Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh -f -b -p $CONDA_DIR && \
+    /bin/bash Miniconda3-py38_${MINICONDA_VERSION}-Linux-x86_64.sh -f -b -p $CONDA_DIR && \
-    rm Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh && \
+    rm Miniconda3-py38_${MINICONDA_VERSION}-Linux-x86_64.sh && \
    echo "conda ${CONDA_VERSION}" >> $CONDA_DIR/conda-meta/pinned && \
    conda config --system --prepend channels conda-forge && \
    conda config --system --set auto_update_conda false && \
@@ -115,7 +115,7 @@ RUN conda install --quiet --yes 'tini=0.18.0' && \
 RUN conda install --quiet --yes \
    'notebook=6.0.3' \
    'jupyterhub=1.1.0' \
-    'jupyterlab=2.1.3' && \
+    'jupyterlab=2.1.4' && \
    conda clean --all -f -y && \
    npm cache clean --force && \
    jupyter notebook --generate-config && \

--- a/pyspark-notebook/Dockerfile
+++ b/pyspark-notebook/Dockerfile
@@ -11,19 +11,20 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 USER root
 # Spark dependencies
-ENV APACHE_SPARK_VERSION=2.4.5 \
+ENV APACHE_SPARK_VERSION=3.0.0 \
-    HADOOP_VERSION=2.7
+    HADOOP_VERSION=3.2
 RUN apt-get -y update && \
-    apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \
+    apt-get install --no-install-recommends -y openjdk-11-jre-headless ca-certificates-java && \
    rm -rf /var/lib/apt/lists/*
 # Using the preferred mirror to download Spark
 WORKDIR /tmp
 # hadolint ignore=SC2046
 RUN wget -q $(wget -qO- https://www.apache.org/dyn/closer.lua/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz\?as_json | \
    python -c "import sys, json; content=json.load(sys.stdin); print(content['preferred']+content['path_info'])") && \
-    echo "2426a20c548bdfc07df288cd1d18d1da6b3189d0b78dee76fa034c52a4e02895f0ad460720c526f163ba63a17efae4764c46a1cd8f9b04c60f9937a554db85d2 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
+    echo "BFE45406C67CC4AE00411AD18CC438F51E7D4B6F14EB61E7BF6B5450897C2E8D3AB020152657C0239F253735C263512FFABF538AC5B9FFFA38B8295736A9C387 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
    tar xzf "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" -C /usr/local --owner root --group root --no-same-owner && \
    rm "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz"
@@ -32,7 +33,7 @@ RUN ln -s "spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" spark
 # Configure Spark
 ENV SPARK_HOME=/usr/local/spark
-ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip \
+ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip \
    SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" \
    PATH=$PATH:$SPARK_HOME/bin