From c58148da5496245403b55c3fc423d35f3a669c79 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Sat, 31 Aug 2024 21:35:57 -0700 Subject: [PATCH] [SPARK-49015][CORE] Connect Server should respect `spark.log.structuredLogging.enabled` ### What changes were proposed in this pull request? Currently, structured logging is constantly activated no matter value of `spark.log.structuredLogging.enabled`. ### Why are the changes needed? Bug fix, to make `spark.log.structuredLogging.enabled` works with connect server. ### Does this PR introduce _any_ user-facing change? No, structured logging is an unreleased feature. ### How was this patch tested? ``` sbin/start-connect-server.sh --conf spark.log.structuredLogging.enabled=true ``` ``` starting org.apache.spark.sql.connect.service.SparkConnectServer, logging to /home/chengpan/Projects/apache-spark/logs/spark-chengpan-org.apache.spark.sql.connect.service.SparkConnectServer-1-HIH-D-25944Z.out Spark Command: /home/chengpan/.sdkman/candidates/java/current/bin/java -cp hive-jackson/*:/home/chengpan/Projects/apache-spark/conf/:/home/chengpan/Projects/apache-spark/assembly/target/scala-2.13/jars/slf4j-api-2.0.13.jar:/home/chengpan/Projects/apache-spark/assembly/target/scala-2.13/jars/* -Xmx1g -XX:+IgnoreUnrecognizedVMOptions --add-modules=jdk.incubator.vector --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false -Dio.netty.tryReflectionSetAccessible=true -Dderby.connection.requireAuthentication=false org.apache.spark.deploy.SparkSubmit --conf spark.log.structuredLogging.enabled=true --class org.apache.spark.sql.connect.service.SparkConnectServer --name Spark Connect server spark-internal ======================================== WARNING: Using incubator modules: jdk.incubator.vector Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties {"ts":"2024-07-26T11:40:40.779Z","level":"WARN","msg":"Your hostname, HIH-D-25944Z, resolves to a loopback address: 127.0.1.1; using 10.219.36.108 instead (on interface enp2s0)","context":{"host":"HIH-D-25944Z","host_port":"127.0.1.1","host_port2":"10.219.36.108","network_if":"enp2s0"},"logger":"Utils"} {"ts":"2024-07-26T11:40:40.782Z","level":"WARN","msg":"Set SPARK_LOCAL_IP if you need to bind to another address","logger":"Utils"} Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties {"ts":"2024-07-26T11:40:41.044Z","level":"INFO","msg":"Starting Spark session.","logger":"SparkConnectServer"} {"ts":"2024-07-26T11:40:41.067Z","level":"INFO","msg":"Running Spark version 4.0.0-SNAPSHOT","context":{"spark_version":"4.0.0-SNAPSHOT"},"logger":"SparkContext"} {"ts":"2024-07-26T11:40:41.068Z","level":"INFO","msg":"OS info Linux, 6.5.0-44-generic, amd64","context":{"os_arch":"amd64","os_name":"Linux","os_version":"6.5.0-44-generic"},"logger":"SparkContext"} {"ts":"2024-07-26T11:40:41.069Z","level":"INFO","msg":"Java version 17.0.8","context":{"java_version":"17.0.8"},"logger":"SparkContext"} ... ``` ``` sbin/start-connect-server.sh --conf spark.log.structuredLogging.enabled=false ``` ``` starting org.apache.spark.sql.connect.service.SparkConnectServer, logging to /home/chengpan/Projects/apache-spark/logs/spark-chengpan-org.apache.spark.sql.connect.service.SparkConnectServer-1-HIH-D-25944Z.out Spark Command: /home/chengpan/.sdkman/candidates/java/current/bin/java -cp hive-jackson/*:/home/chengpan/Projects/apache-spark/conf/:/home/chengpan/Projects/apache-spark/assembly/target/scala-2.13/jars/slf4j-api-2.0.13.jar:/home/chengpan/Projects/apache-spark/assembly/target/scala-2.13/jars/* -Xmx1g -XX:+IgnoreUnrecognizedVMOptions --add-modules=jdk.incubator.vector --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false -Dio.netty.tryReflectionSetAccessible=true -Dderby.connection.requireAuthentication=false org.apache.spark.deploy.SparkSubmit --conf spark.log.structuredLogging.enabled=false --class org.apache.spark.sql.connect.service.SparkConnectServer --name Spark Connect server spark-internal ======================================== WARNING: Using incubator modules: jdk.incubator.vector Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties {"ts":"2024-07-26T11:41:23.459Z","level":"WARN","msg":"Your hostname, HIH-D-25944Z, resolves to a loopback address: 127.0.1.1; using 10.219.36.108 instead (on interface enp2s0)","context":{"host":"HIH-D-25944Z","host_port":"127.0.1.1","host_port2":"10.219.36.108","network_if":"enp2s0"},"logger":"Utils"} {"ts":"2024-07-26T11:41:23.463Z","level":"WARN","msg":"Set SPARK_LOCAL_IP if you need to bind to another address","logger":"Utils"} Using Spark's default log4j profile: org/apache/spark/log4j2-pattern-layout-defaults.properties 24/07/26 19:41:23 INFO SparkConnectServer: Starting Spark session. 24/07/26 19:41:23 INFO SparkContext: Running Spark version 4.0.0-SNAPSHOT 24/07/26 19:41:23 INFO SparkContext: OS info Linux, 6.5.0-44-generic, amd64 24/07/26 19:41:23 INFO SparkContext: Java version 17.0.8 ... ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #47500 from pan3793/SPARK-49015. Authored-by: Cheng Pan Signed-off-by: Gengliang Wang --- .../src/main/scala/org/apache/spark/internal/Logging.scala | 3 +-- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala index 7ffaef0855fd1..7471b764bd2b3 100644 --- a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala +++ b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala @@ -334,7 +334,7 @@ trait Logging { // If Log4j 2 is used but is initialized by default configuration, // load a default properties file // scalastyle:off println - if (Logging.islog4j2DefaultConfigured()) { + if (Logging.defaultSparkLog4jConfig || Logging.islog4j2DefaultConfigured()) { Logging.defaultSparkLog4jConfig = true val defaultLogProps = if (Logging.isStructuredLoggingEnabled) { "org/apache/spark/log4j2-defaults.properties" @@ -424,7 +424,6 @@ private[spark] object Logging { def uninitialize(): Unit = initLock.synchronized { if (isLog4j2()) { if (defaultSparkLog4jConfig) { - defaultSparkLog4jConfig = false val context = LogManager.getContext(false).asInstanceOf[LoggerContext] context.reconfigure() } else { diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 1745589ca39b3..f3833e85a482e 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -85,6 +85,10 @@ private[spark] class SparkSubmit extends Logging { Logging.disableStructuredLogging() } } + + // We should initialize log again after `spark.log.structuredLogging.enabled` effected + Logging.uninitialize() + // Initialize logging if it hasn't been done yet. Keep track of whether logging needs to // be reset before the application starts. val uninitLog = initializeLogIfNecessary(true, silent = true)