From 0d4d6bcd3e00d0589e2ec40f80de7c33c0908181 Mon Sep 17 00:00:00 2001 From: Tim Lee Date: Fri, 20 Sep 2024 11:27:09 -0700 Subject: [PATCH 1/3] add config --- docs/configuration.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/configuration.md b/docs/configuration.md index 6833d4e54fd03..63f8c8fb71cfb 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1010,6 +1010,19 @@ Apart from these, the following properties are also available, and may be useful 2.2.1 + + spark.shuffle.accurateBlockSkewedFactor + -1.0 + + A shuffle block is considered as skewed and will be accurately recorded in + HighlyCompressedMapStatus if its size is larger than this factor multiplying + the median shuffle block size or spark.shuffle.accurateBlockThreshold. It is + recommended to set this parameter to be the same as + spark.sql.adaptive.skewJoin.skewedPartitionFactor. Set to -1.0 to disable this + feature by default. + + 3.3.0 + spark.shuffle.compress true From 8c59fc63f62d95a575b25bf13328fcd6fc5d6e4d Mon Sep 17 00:00:00 2001 From: Tim Lee Date: Fri, 20 Sep 2024 12:07:09 -0700 Subject: [PATCH 2/3] move config location --- docs/configuration.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 63f8c8fb71cfb..f0372ec9233a9 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1010,19 +1010,6 @@ Apart from these, the following properties are also available, and may be useful 2.2.1 - - spark.shuffle.accurateBlockSkewedFactor - -1.0 - - A shuffle block is considered as skewed and will be accurately recorded in - HighlyCompressedMapStatus if its size is larger than this factor multiplying - the median shuffle block size or spark.shuffle.accurateBlockThreshold. It is - recommended to set this parameter to be the same as - spark.sql.adaptive.skewJoin.skewedPartitionFactor. Set to -1.0 to disable this - feature by default. - - 3.3.0 - spark.shuffle.compress true @@ -1235,6 +1222,19 @@ Apart from these, the following properties are also available, and may be useful 2.2.1 + + spark.shuffle.accurateBlockSkewedFactor + -1.0 + + A shuffle block is considered as skewed and will be accurately recorded in + HighlyCompressedMapStatus if its size is larger than this factor multiplying + the median shuffle block size or spark.shuffle.accurateBlockThreshold. It is + recommended to set this parameter to be the same as + spark.sql.adaptive.skewJoin.skewedPartitionFactor. Set to -1.0 to disable this + feature by default. + + 3.3.0 + spark.shuffle.registration.timeout 5000 From 0e9ac0262824068f8743b3d5eb21d48ed1183392 Mon Sep 17 00:00:00 2001 From: Tim Lee Date: Fri, 20 Sep 2024 21:03:13 -0700 Subject: [PATCH 3/3] remove internal --- .../main/scala/org/apache/spark/internal/config/package.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 9fcd9ba529c16..c0ea430ef0ef6 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -1357,7 +1357,6 @@ package object config { private[spark] val SHUFFLE_ACCURATE_BLOCK_SKEWED_FACTOR = ConfigBuilder("spark.shuffle.accurateBlockSkewedFactor") - .internal() .doc("A shuffle block is considered as skewed and will be accurately recorded in " + "HighlyCompressedMapStatus if its size is larger than this factor multiplying " + "the median shuffle block size or SHUFFLE_ACCURATE_BLOCK_THRESHOLD. It is " +