-
Notifications
You must be signed in to change notification settings - Fork 234
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Unify python examples and update run scripts (#111)
* Unify the PySpark examples (idk why they were seperated) and add a script to run them same as the SQL ex. * Try and make run more flex * Change how we trigger OOMing. * Skip doctest of OOM since it puts SparkContext into a bad state. * Add a quote and disable SC2046
- Loading branch information
Showing
8 changed files
with
102 additions
and
79 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -77,4 +77,5 @@ warehouse/ | |
metastore_db/ | ||
|
||
# Misc internal stuff | ||
sql/*.sql.out | ||
sql/*.sql.out | ||
python/examples/*.py.out |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#!/bin/bash | ||
|
||
|
||
# Download Spark and iceberg if not present | ||
SPARK_MAJOR="3.4" | ||
SPARK_VERSION=3.4.1 | ||
HADOOP_VERSION="3" | ||
SPARK_PATH="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" | ||
SPARK_FILE="spark-${SPARK_VERSION}-bin-hadoop3.tgz" | ||
ICEBERG_VERSION="1.3.1" | ||
if [ ! -f "${SPARK_FILE}" ]; then | ||
wget "https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/${SPARK_FILE}" & | ||
fi | ||
# Download Icberg if not present | ||
ICEBERG_FILE="iceberg-spark-runtime-${SPARK_MAJOR}_2.13-${ICEBERG_VERSION}.jar" | ||
if [ ! -f "${ICEBERG_FILE}" ]; then | ||
wget "https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-${SPARK_MAJOR}_2.13/${ICEBERG_VERSION}/${ICEBERG_FILE}" -O "${ICEBERG_FILE}" & | ||
fi | ||
wait | ||
# Setup the env | ||
if [ ! -d "${SPARK_PATH}" ]; then | ||
tar -xf ${SPARK_FILE} | ||
fi | ||
if [ ! -f "${SPARK_PATH}/jars/${ICEBERG_FILE}" ]; then | ||
cp "${ICEBERG_FILE}" "${SPARK_PATH}/jars/${ICEBERG_FILE}" | ||
fi | ||
|
||
# Set up for running pyspark and friends | ||
export PATH=${SPARK_PATH}:${SPARK_PATH}/python:${SPARK_PATH}/bin:${SPARK_PATH}/sbin:${PATH} | ||
|
||
# Make sure we have a history directory | ||
mkdir -p /tmp/spark-events | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#!/bin/bash | ||
|
||
source env_setup.sh | ||
|
||
pip install -r ./python/requirements.txt | ||
|
||
for ex in python/examples/*.py; do | ||
# shellcheck disable=SC2046 | ||
spark-submit \ | ||
--master local[5] \ | ||
--conf spark.eventLog.enabled=true \ | ||
--conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \ | ||
--conf spark.sql.catalog.spark_catalog.type=hive \ | ||
--conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \ | ||
--conf spark.sql.catalog.local.type=hadoop \ | ||
--conf "spark.sql.catalog.local.warehouse=$PWD/warehouse" \ | ||
$(cat "${ex}.conf" || echo "") \ | ||
--name "${ex}" \ | ||
"${ex}" 2>&1 | tee -a "${ex}.out" | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters