Skip to content

Commit

Permalink
Merge remote-tracking branch 'apache/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
SemyonSinchenko committed Sep 21, 2024
2 parents 71fe59c + 0b05b1a commit 07a7233
Show file tree
Hide file tree
Showing 1,106 changed files with 51,782 additions and 29,556 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
path: "**/target/unit-tests.log"
path: "**/target/*.log"

infra-image:
name: "Base image build"
Expand Down Expand Up @@ -723,7 +723,7 @@ jobs:
# See 'ipython_genutils' in SPARK-38517
# See 'docutils<0.18.0' in SPARK-39421
python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
ipython ipython_genutils sphinx_plotly_directive 'numpy==1.26.4' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_python_connect.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ jobs:
python packaging/connect/setup.py sdist
cd dist
pip install pyspark*connect-*.tar.gz
pip install 'six==1.16.0' 'pandas<=2.2.2' scipy 'plotly>=4.8' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' torch torchvision torcheval deepspeed unittest-xml-reporting
pip install 'six==1.16.0' 'pandas<=2.2.2' scipy 'plotly>=4.8' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' torch torchvision torcheval deepspeed unittest-xml-reporting 'plotly>=4.8'
- name: Run tests
env:
SPARK_TESTING: 1
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_sparkr_window.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ jobs:
shell: cmd
env:
NOT_CRAN: true
SPARK_TESTING: 1
SPARKR_SUPPRESS_DEPRECATION_WARNING: 1
# See SPARK-27848. Currently installing some dependent packages causes
# "(converted from warning) unable to identify current timezone 'C':" for an unknown reason.
# This environment variable works around to test SparkR against a higher version.
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test_report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Download test results to report
uses: dawidd6/action-download-artifact@09385b76de790122f4da9c82b17bccf858b9557c # pin@v2
uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11 # pin @v6
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
workflow: ${{ github.event.workflow_run.workflow_id }}
commit: ${{ github.event.workflow_run.head_commit.id }}
workflow_conclusion: completed
- name: Publish test report
uses: scacap/action-surefire-report@482f012643ed0560e23ef605a79e8e87ca081648 # pin@v1
uses: scacap/action-surefire-report@a2911bd1a4412ec18dde2d93b1758b3e56d2a880 # pin @v1.8.0
with:
check_name: Report test results
github_token: ${{ secrets.GITHUB_TOKEN }}
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
*.swp
*~
.java-version
.python-version
.DS_Store
.ammonite
.bloop
Expand All @@ -26,6 +27,7 @@
.scala_dependencies
.settings
.vscode
artifacts/
/lib/
R-unit-tests.log
R/unit-tests.out
Expand Down
Empty file added .nojekyll
Empty file.
2 changes: 1 addition & 1 deletion R/pkg/R/sparkR.R
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ sparkR.session <- function(
enableHiveSupport = TRUE,
...) {

if (Sys.getenv("SPARK_TESTING") == "") {
if (Sys.getenv("SPARKR_SUPPRESS_DEPRECATION_WARNING") == "") {
warning(
"SparkR is deprecated from Apache Spark 4.0.0 and will be removed in a future version.")
}
Expand Down
4 changes: 2 additions & 2 deletions R/run-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ if [[ $(echo $SPARK_AVRO_JAR_PATH | wc -l) -eq 1 ]]; then
fi

if [ -z "$SPARK_JARS" ]; then
SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
else
SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --jars $SPARK_JARS --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --jars $SPARK_JARS --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
fi

FAILED=$((PIPESTATUS[0]||$FAILED))
Expand Down
12 changes: 10 additions & 2 deletions assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@
<!--
Because we don't shade dependencies anymore, we need to restore Guava to compile scope so
that the libraries Spark depend on have it available. We'll package the version that Spark
uses (14.0.1) which is not the same as Hadoop dependencies, but works.
uses which is not the same as Hadoop dependencies, but works.
-->
<dependency>
<groupId>com.google.guava</groupId>
Expand Down Expand Up @@ -200,7 +200,7 @@
<configuration>
<executable>cp</executable>
<arguments>
<argument>${basedir}/../connector/connect/client/jvm/target/spark-connect-client-jvm_${scala.binary.version}-${version}.jar</argument>
<argument>${basedir}/../connector/connect/client/jvm/target/spark-connect-client-jvm_${scala.binary.version}-${project.version}.jar</argument>
<argument>${basedir}/target/scala-${scala.binary.version}/jars/connect-repl</argument>
</arguments>
</configuration>
Expand Down Expand Up @@ -339,6 +339,14 @@
</properties>
</profile>

<!-- Pull in jjwt-impl and jjwt-jackson jars -->
<profile>
<id>jjwt</id>
<properties>
<jjwt.deps.scope>compile</jjwt.deps.scope>
</properties>
</profile>

<!--
Pull in spark-hadoop-cloud and its associated JARs,
-->
Expand Down
2 changes: 1 addition & 1 deletion build/mvn
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ install_app() {
local local_checksum="${local_tarball}.${checksum_suffix}"
local remote_checksum="https://archive.apache.org/dist/${url_path}.${checksum_suffix}"

local curl_opts="--retry 3 --retry-all-errors --silent --show-error -L"
local curl_opts="--retry 3 --silent --show-error -L"
local wget_opts="--no-verbose"

if [ ! -f "$binary" ]; then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@

import java.io.*;
import java.util.concurrent.TimeUnit;
import java.util.zip.Adler32;
import java.util.zip.CRC32;
import java.util.zip.CheckedInputStream;
import java.util.zip.Checksum;
import java.util.zip.*;

import com.google.common.io.ByteStreams;

Expand Down Expand Up @@ -66,6 +63,13 @@ private static Checksum[] getChecksumsByAlgorithm(int num, String algorithm) {
}
}

case "CRC32C" -> {
checksums = new CRC32C[num];
for (int i = 0; i < num; i++) {
checksums[i] = new CRC32C();
}
}

default -> throw new UnsupportedOperationException(
"Unsupported shuffle checksum algorithm: " + algorithm);
}
Expand Down
Loading

0 comments on commit 07a7233

Please sign in to comment.