From 0f6436c6d49a420f52541f2c761ec7834240b746 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 22 Jul 2025 10:41:09 +0200 Subject: [PATCH 1/2] feat: add Spark 4.0.0 --- spark-k8s/Dockerfile | 79 ++++++++++++++----- .../4.0.0/0001-Update-CycloneDX-plugin.patch | 38 +++++++++ .../stackable/patches/4.0.0/patchable.toml | 2 + spark-k8s/versions.py | 18 +++++ 4 files changed, 116 insertions(+), 21 deletions(-) create mode 100644 spark-k8s/stackable/patches/4.0.0/0001-Update-CycloneDX-plugin.patch create mode 100644 spark-k8s/stackable/patches/4.0.0/patchable.toml diff --git a/spark-k8s/Dockerfile b/spark-k8s/Dockerfile index bdd4c7925..113b1d0ae 100644 --- a/spark-k8s/Dockerfile +++ b/spark-k8s/Dockerfile @@ -61,6 +61,17 @@ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patche COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR} RUN <>> Build spark -# Compiling the tests takes a lot of time, so we skip them -# -Dmaven.test.skip=true skips both the compilation and execution of tests -# -DskipTests skips only the execution RUN <]' '{print $3}') mkdir -p dist/connect cd dist/connect - cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" . - cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" . - cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" . - - # The Spark operator expects a file named spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar without the -stackable${RELEASE} suffix. + case "${PRODUCT}" in + 4*) + cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" . + cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" . + cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" . + ;; + *) + cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" . + cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" . + cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" . + ;; + esac + + # This link is needed by the operator and is kept for backwards compatibility. + # TODO: remove it at some time in the future. ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar" + # Link to the spark-connect jar without the stackable suffix and scala version. + # This link supersedes the previous link. + ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect-${PRODUCT}.jar" EOF # <<< Build spark diff --git a/spark-k8s/stackable/patches/4.0.0/0001-Update-CycloneDX-plugin.patch b/spark-k8s/stackable/patches/4.0.0/0001-Update-CycloneDX-plugin.patch new file mode 100644 index 000000000..db7b12530 --- /dev/null +++ b/spark-k8s/stackable/patches/4.0.0/0001-Update-CycloneDX-plugin.patch @@ -0,0 +1,38 @@ +From 2da5608928018dd017c91b904eb8f84a4f6df78a Mon Sep 17 00:00:00 2001 +From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> +Date: Fri, 4 Jul 2025 15:54:55 +0200 +Subject: Update CycloneDX plugin + +--- + dev/make-distribution.sh | 1 - + pom.xml | 5 +++++ + 2 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh +index 16607e45ae..44e345a245 100755 +--- a/dev/make-distribution.sh ++++ b/dev/make-distribution.sh +@@ -176,7 +176,6 @@ BUILD_COMMAND=("$MVN" clean package \ + -Dmaven.javadoc.skip=true \ + -Dmaven.scaladoc.skip=true \ + -Dmaven.source.skip \ +- -Dcyclonedx.skip=true \ + $@) + + # Actually build the jar +diff --git a/pom.xml b/pom.xml +index 443d46a430..632920f100 100644 +--- a/pom.xml ++++ b/pom.xml +@@ -3327,6 +3327,11 @@ + org.cyclonedx + cyclonedx-maven-plugin + 2.8.0 ++ ++ application ++ 1.5 ++ false ++ + + + package diff --git a/spark-k8s/stackable/patches/4.0.0/patchable.toml b/spark-k8s/stackable/patches/4.0.0/patchable.toml new file mode 100644 index 000000000..24d7204e9 --- /dev/null +++ b/spark-k8s/stackable/patches/4.0.0/patchable.toml @@ -0,0 +1,2 @@ +base = "fa33ea000a0bda9e5a3fa1af98e8e85b8cc5e4d4" +mirror = "https://github.com/stackabletech/spark.git" diff --git a/spark-k8s/versions.py b/spark-k8s/versions.py index fe6405702..c1ff1eb3d 100644 --- a/spark-k8s/versions.py +++ b/spark-k8s/versions.py @@ -35,4 +35,22 @@ "tini": "0.19.0", "hbase_connector": "1.0.1", }, + { + "product": "4.0.0", + "java-base": "17", + "java-devel": "17", + "python": "3.11", + "hadoop/hadoop": "3.4.1", + "hbase": "2.6.2", + "aws_java_sdk_bundle": "2.24.6", + "azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.4 + "azure_keyvault_core": "1.0.0", # https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 + "jackson_dataformat_xml": "2.15.2", # https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/3.5.1 + "stax2_api": "4.2.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 + "woodstox_core": "6.5.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 + "vector": "0.47.0", + "jmx_exporter": "1.3.0", + "tini": "0.19.0", + "hbase_connector": "1.0.1", + }, ] From 0d6a4516d6f7a1033f4e2d402514b6e3d310c43b Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 22 Jul 2025 17:23:52 +0200 Subject: [PATCH 2/2] update spark client image version --- spark-connect-client/versions.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/spark-connect-client/versions.py b/spark-connect-client/versions.py index be7977a85..98188f344 100644 --- a/spark-connect-client/versions.py +++ b/spark-connect-client/versions.py @@ -5,4 +5,10 @@ "java-base": "17", "python": "3.11", }, + { + "product": "4.0.0", + "spark-k8s": "4.0.0", + "java-base": "17", + "python": "3.11", + }, ]