Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions connector/kafka-0-10-sql/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,26 @@
<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>3.6.1</version>
<executions>
<execution>
<id>write-classpath</id>
<goals>
<goal>build-classpath</goal>
</goals>
<phase>generate-resources</phase>
<configuration>
<includeScope>runtime</includeScope>
<outputFile>${project.build.directory}/classpath.txt</outputFile>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

</project>
5 changes: 5 additions & 0 deletions dev/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ openpyxl
# PySpark test dependencies (optional)
coverage

# Kafka streaming test dependencies (optional)
# Required for running Kafka integration tests with Docker test containers
testcontainers[kafka]>=3.7.0
kafka-python>=2.0.2

# Linter
ruff==0.14.8
mypy==1.8.0
Expand Down
4 changes: 3 additions & 1 deletion dev/spark-test-image/python-311/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,13 @@ RUN apt-get update && apt-get install -y \
ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 pystack psutil"
# Python deps for Spark Connect
ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
# Python deps for Kafka streaming tests
ARG KAFKA_TEST_PKGS="testcontainers[kafka]>=3.7.0 kafka-python>=2.0.2"

# Install Python 3.11 packages
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
RUN python3.11 -m pip install --ignore-installed 'blinker>=1.6.2' # mlflow needs this
RUN python3.11 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \
RUN python3.11 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS $KAFKA_TEST_PKGS && \
python3.11 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
python3.11 -m pip install deepspeed torcheval && \
python3.11 -m pip cache purge
4 changes: 3 additions & 1 deletion dev/spark-test-image/python-312/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,13 @@ RUN apt-get update && apt-get install -y \
ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
# Python deps for Spark Connect
ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
# Python deps for Kafka streaming tests
ARG KAFKA_TEST_PKGS="testcontainers[kafka]>=3.7.0 kafka-python>=2.0.2"

# Install Python 3.12 packages
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
RUN python3.12 -m pip install --ignore-installed 'blinker>=1.6.2' # mlflow needs this
RUN python3.12 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS lxml && \
RUN python3.12 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS $KAFKA_TEST_PKGS lxml && \
python3.12 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
python3.12 -m pip install torcheval && \
python3.12 -m pip cache purge
4 changes: 3 additions & 1 deletion dev/spark-test-image/python-313/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,13 @@ RUN apt-get update && apt-get install -y \
ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
# Python deps for Spark Connect
ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
# Python deps for Kafka streaming tests
ARG KAFKA_TEST_PKGS="testcontainers[kafka]>=3.7.0 kafka-python>=2.0.2"

# Install Python 3.13 packages
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.13
RUN python3.13 -m pip install --ignore-installed 'blinker>=1.6.2' # mlflow needs this
RUN python3.13 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS lxml && \
RUN python3.13 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS $KAFKA_TEST_PKGS lxml && \
python3.13 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
python3.13 -m pip install torcheval && \
python3.13 -m pip cache purge
4 changes: 3 additions & 1 deletion dev/spark-test-image/python-314/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,13 @@ RUN apt-get update && apt-get install -y \
ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
# Python deps for Spark Connect
ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
# Python deps for Kafka streaming tests
ARG KAFKA_TEST_PKGS="testcontainers[kafka]>=3.7.0 kafka-python>=2.0.2"

# Install Python 3.14 packages
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.14
RUN python3.14 -m pip install --ignore-installed 'blinker>=1.6.2' # mlflow needs this
RUN python3.14 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS lxml && \
RUN python3.14 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS $KAFKA_TEST_PKGS lxml && \
python3.14 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
python3.14 -m pip install torcheval && \
python3.14 -m pip cache purge
3 changes: 2 additions & 1 deletion dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,7 +646,7 @@ def __hash__(self):

pyspark_structured_streaming = Module(
name="pyspark-structured-streaming",
dependencies=[pyspark_core, pyspark_streaming, pyspark_sql],
dependencies=[pyspark_core, pyspark_streaming, pyspark_sql, sql_kafka],
source_file_regexes=[
"python/pyspark/sql/streaming",
"python/pyspark/sql/pandas",
Expand All @@ -662,6 +662,7 @@ def __hash__(self):
"pyspark.sql.tests.streaming.test_streaming",
"pyspark.sql.tests.streaming.test_streaming_foreach",
"pyspark.sql.tests.streaming.test_streaming_foreach_batch",
"pyspark.sql.tests.streaming.test_streaming_kafka_rtm",
"pyspark.sql.tests.streaming.test_streaming_listener",
"pyspark.sql.tests.pandas.test_pandas_grouped_map_with_state",
"pyspark.sql.tests.pandas.streaming.test_pandas_transform_with_state",
Expand Down
Loading