Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ All notable changes to this project will be documented in this file.
- hadoop: Add `3.4.2` ([#1291]).
- zookeeper: Add `3.9.4` ([#1292]).
- nifi: Add `2.6.0` ([#1293]).
- hive: Add `4.1.0` ([#1295]).

### Changed

Expand Down Expand Up @@ -84,6 +85,7 @@ All notable changes to this project will be documented in this file.
[#1291]: https://github.com/stackabletech/docker-images/pull/1291
[#1292]: https://github.com/stackabletech/docker-images/pull/1292
[#1293]: https://github.com/stackabletech/docker-images/pull/1293
[#1295]: https://github.com/stackabletech/docker-images/pull/1295

## [25.7.0] - 2025-07-23

Expand Down
65 changes: 58 additions & 7 deletions hive/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,23 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/patched-li
USER ${STACKABLE_USER_UID}
WORKDIR /stackable

ENV NEW_VERSION="${PRODUCT_VERSION}-stackable${RELEASE_VERSION}"

# Let's have patchable as a dedicated step, as it fetches the Hive sourcecode over the network,
# thus taking a bit (which is annoying while development)
RUN /stackable/patchable --images-repo-root=src checkout hive ${PRODUCT_VERSION} > /tmp/HIVE_SOURCE_DIR

# Make expensive maven build a separate layer for better caching
# Cache mounts are owned by root by default
# We need to explicitly give the uid to use
RUN --mount=type=cache,id=maven-hive-${PRODUCT_VERSION},uid=${STACKABLE_USER_UID},target=/stackable/.m2/repository <<EOF
BUILD_SRC_DIR="$(/stackable/patchable --images-repo-root=src checkout hive ${PRODUCT_VERSION})"
BUILD_SRC_DIR="$(cat /tmp/HIVE_SOURCE_DIR)" || exit 1
rm /tmp/HIVE_SOURCE_DIR
cd "$BUILD_SRC_DIR"

# Make Maven aware of custom Stackable libraries
cp -r /stackable/patched-libs/maven/* /stackable/.m2/repository

NEW_VERSION="${PRODUCT_VERSION}-stackable${RELEASE_VERSION}"

# generateBackupPoms=false is needed for the Hive 4.0.0 build to succeed, otherwise it fails with the obscure reason: `Too many files with unapproved license`
mvn versions:set -DnewVersion=$NEW_VERSION -DartifactId=* -DgroupId=* -DgenerateBackupPoms=false

Expand All @@ -62,7 +68,7 @@ if [[ "${PRODUCT_VERSION}" == "3.1.3" ]] ; then
--projects standalone-metastore
mv standalone-metastore/target/apache-hive-metastore-${NEW_VERSION}-bin/apache-hive-metastore-${NEW_VERSION}-bin /stackable
mv standalone-metastore/target/bom.json /stackable/apache-hive-metastore-${NEW_VERSION}-bin/apache-hive-metastore-${NEW_VERSION}.cdx.json
else
elif [[ "${PRODUCT_VERSION}" == 4.0.* ]]; then
(
# https://issues.apache.org/jira/browse/HIVE-20451 switched the metastore server packaging starting with 4.0.0
mvn \
Expand All @@ -78,16 +84,34 @@ else
# The schemaTool.sh is still pointing to the class location from Hive < 4.0.0, it seems like it was forgotten to update it
sed -i -e 's/CLASS=org.apache.hadoop.hive.metastore.tools.MetastoreSchemaTool/CLASS=org.apache.hadoop.hive.metastore.tools.schematool.MetastoreSchemaTool/' /stackable/apache-hive-metastore-${NEW_VERSION}-bin/bin/ext/schemaTool.sh
)
else
# Starting with 4.1.0 the build process changed again in https://github.com/apache/hive/pull/5936 (HIVE-29062)
mvn \
clean package \
-Dhadoop.version=${HADOOP_VERSION}-stackable${RELEASE_VERSION} \
-DskipTests \
-Pdist
# Looks like we can not filter the projects using "--projects standalone-metastore/metastore-server --also-make",
# as this does not build a *.tar.gz

# We only seem to get a .tar.gz archive, so let's extract that to the correct location
tar --extract --directory=/stackable -f standalone-metastore/packaging/target/hive-standalone-metastore-${NEW_VERSION}-bin.tar.gz
mv standalone-metastore/metastore-server/target/bom.json /stackable/apache-hive-metastore-${NEW_VERSION}-bin/hive-standalone-metastore-${NEW_VERSION}.cdx.json
fi

# Remove sourcecode
cd /stackable
rm -rf "$BUILD_SRC_DIR"
EOF

RUN << EOF
cd /stackable
mkdir /stackable/jmx
curl "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER_VERSION}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER_VERSION}.jar"
ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER_VERSION}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar

# Needed to run housekeeping jobs, see footnote <1> below
cp /stackable/patched-libs/maven/org/apache/hadoop/hadoop-mapreduce-client-core/${HADOOP_VERSION}-stackable${RELEASE_VERSION}/hadoop-mapreduce-client-core-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/

# The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards
# This way the build will fail should one of the files not be available anymore in a later Hadoop version!

Expand All @@ -96,8 +120,11 @@ cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/

# According to https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html, the jar filename has changed from
# aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar to bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar. In future, you might need to do:
# cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/
if [[ "${PRODUCT_VERSION}" == "3.1.3" || "${PRODUCT_VERSION}" == 4.0.* ]]; then
cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/
else
cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/
fi

# Add Azure ABFS support (support for abfs://)
cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/hadoop-azure-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar /stackable/apache-hive-metastore-${NEW_VERSION}-bin/lib/
Expand All @@ -118,7 +145,6 @@ fi
chmod --recursive g=u /stackable
EOF


FROM local-image/java-base AS final

ARG PRODUCT_VERSION
Expand Down Expand Up @@ -215,3 +241,28 @@ ENV HADOOP_MAPRED_HOME=/stackable/hadoop

WORKDIR /stackable/hive-metastore
# Start command is set by operator to something like "bin/start-metastore --config /stackable/config --db-type postgres --hive-bin-dir bin"



# <1>: org.apache.hadoop.mapred.JobConf need
# 2025-10-06T08:42:04,137 ERROR [Metastore threads starter thread] metastore.HiveMetaStore: Failure when starting the leader tasks, Compaction or Housekeeping tasks may not happen
# java.lang.NoClassDefFoundError: org/apache/hadoop/mapred/JobConf
# at org.apache.hadoop.hive.conf.HiveConf.initialize(HiveConf.java:6601) ~[hive-common-4.1.0.jar:4.1.0]
# at org.apache.hadoop.hive.conf.HiveConf.<init>(HiveConf.java:6569) ~[hive-common-4.1.0.jar:4.1.0]
# at org.apache.hadoop.hive.ql.txn.compactor.CompactorThread.setConf(CompactorThread.java:68) ~[hive-exec-4.1.0-core.jar:4.1.0]
# at org.apache.hadoop.hive.metastore.leader.CompactorTasks.takeLeadership(CompactorTasks.java:139) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.lambda$notifyListener$0(LeaseLeaderElection.java:141) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at java.base/java.util.ArrayList.forEach(Unknown Source) ~[?:?]
# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.notifyListener(LeaseLeaderElection.java:138) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.doWork(LeaseLeaderElection.java:120) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.tryBeLeader(LeaseLeaderElection.java:181) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at org.apache.hadoop.hive.metastore.leader.LeaseLeaderElection.tryBeLeader(LeaseLeaderElection.java:63) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at org.apache.hadoop.hive.metastore.leader.LeaderElectionContext.lambda$start$2(LeaderElectionContext.java:125) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at java.base/java.lang.Thread.run(Unknown Source) ~[?:?]
# at org.apache.hadoop.hive.metastore.leader.LeaderElectionContext.start(LeaderElectionContext.java:136) ~[hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# at org.apache.hadoop.hive.metastore.HiveMetaStore$8.run(HiveMetaStore.java:856) [hive-standalone-metastore-server-4.1.0-stackable0.0.0-dev.jar:4.1.0-stackable0.0.0-dev]
# Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.mapred.JobConf
# at java.base/jdk.internal.loader.BuiltinClassLoader.loadClass(Unknown Source) ~[?:?]
# at java.base/jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass(Unknown Source) ~[?:?]
# at java.base/java.lang.ClassLoader.loadClass(Unknown Source) ~[?:?]
# ... 14 more
17 changes: 16 additions & 1 deletion hive/boil-config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ azure-storage-version = "7.0.1"
azure-keyvault-core-version = "1.0.0"

[versions."4.0.1".local-images]
# Hive 4 must be built with Java 8 (according to GitHub README) but seems to run on Java 11
# Hive 4.0 must be built with Java 8 (according to GitHub README) but seems to run on Java 11
java-base = "11"
java-devel = "8"
"hadoop/hadoop" = "3.3.6"
Expand All @@ -36,3 +36,18 @@ jmx-exporter-version = "1.3.0"
aws-java-sdk-bundle-version = "1.12.367"
azure-storage-version = "7.0.1"
azure-keyvault-core-version = "1.0.0"

[versions."4.1.0".local-images]
# Hive 4.1 requires Java 17 (according to GitHub README)
java-base = "17"
java-devel = "17"
"hadoop/hadoop" = "3.4.2"

[versions."4.1.0".build-arguments]
jmx-exporter-version = "1.3.0"
# Keep consistent with the dependency from hadoop-aws: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.2
aws-java-sdk-bundle-version = "2.29.52"
# Keep consistent with the dependency from hadoop-azure: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.2
azure-storage-version = "7.0.1"
# Keep consistent with the dependency from azure-storage: https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1
azure-keyvault-core-version = "1.0.0"