From ee76ab2d1e908cd470010e07eff0496c3a9d23bd Mon Sep 17 00:00:00 2001 From: Tristen Date: Wed, 10 Apr 2024 22:24:17 -0400 Subject: [PATCH] updated to delta-spark 3.5.1, added tree --- Dockerfile_delta_quickstart | 14 +++++++------- startup.sh | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Dockerfile_delta_quickstart b/Dockerfile_delta_quickstart index b200483..5f6a27c 100644 --- a/Dockerfile_delta_quickstart +++ b/Dockerfile_delta_quickstart @@ -21,22 +21,22 @@ # This docker image uses the official Docker image of [OSS] Apache Spark v3.5.0 as the base container # Note: Python version in this image is 3.9.2 and is available as `python3`. # Note: PySpark v3.5.0 (https://spark.apache.org/docs/latest/api/python/getting_started/install.html#dependencies) -ARG BASE_CONTAINER=spark:3.5.0-scala2.12-java11-python3-ubuntu +ARG BASE_CONTAINER=spark:3.5.1-scala2.12-java17-python3-ubuntu FROM $BASE_CONTAINER as spark FROM spark as delta # Authors (add your name when updating the Dockerfile) -LABEL authors="Prashanth Babu,Denny Lee,Andrew Bauman, Scott Haines" +LABEL authors="Prashanth Babu, Denny Lee, Andrew Bauman, Scott Haines, Tristen Wentling" # Docker image was created and tested with the versions of following packages. USER root -ARG DELTA_SPARK_VERSION="3.0.0" +ARG DELTA_SPARK_VERSION="3.1.0" # Note: for 3.0.0 https://pypi.org/project/deltalake/ -ARG DELTALAKE_VERSION="0.12.0" +ARG DELTALAKE_VERSION="0.16.4" ARG JUPYTERLAB_VERSION="4.0.7" # requires pandas >1.0.5, py4j>=0.10.9.7, pyarrow>=4.0.0 -ARG PANDAS_VERSION="1.5.3" -ARG ROAPI_VERSION="0.9.0" +ARG PANDAS_VERSION="2.2.2" +ARG ROAPI_VERSION="0.11.1" # We are explicitly pinning the versions of various libraries which this Docker image runs on. RUN pip install --quiet --no-cache-dir delta-spark==${DELTA_SPARK_VERSION} \ @@ -53,7 +53,7 @@ ENV DELTA_PACKAGE_VERSION=delta-spark_2.12:${DELTA_SPARK_VERSION} # OS Installations Configurations RUN groupadd -r ${GROUP} && useradd -r -m -g ${GROUP} ${NBuser} RUN apt -qq update -RUN apt -qq -y install vim curl +RUN apt -qq -y install vim curl tree # Configure ownership COPY --chown=${NBuser} startup.sh "${WORKDIR}" diff --git a/startup.sh b/startup.sh index 04025b5..fe262d8 100644 --- a/startup.sh +++ b/startup.sh @@ -4,7 +4,7 @@ source "$HOME/.cargo/env" export PYSPARK_DRIVER_PYTHON=jupyter export PYSPARK_DRIVER_PYTHON_OPTS='lab --ip=0.0.0.0' -export DELTA_SPARK_VERSION='3.0.0' +export DELTA_SPARK_VERSION='3.1.0' export DELTA_PACKAGE_VERSION=delta-spark_2.12:${DELTA_SPARK_VERSION} $SPARK_HOME/bin/pyspark --packages io.delta:${DELTA_PACKAGE_VERSION} \