-
Notifications
You must be signed in to change notification settings - Fork 28.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-51436][CORE][SQL][K8s][SS] Fix bug that cancel Future specified mayInterruptIfRunning with true #50209
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -250,7 +250,7 @@ private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock) | |
|
||
override def onStop(): Unit = { | ||
if (timeoutCheckingTask != null) { | ||
timeoutCheckingTask.cancel(true) | ||
timeoutCheckingTask.cancel(false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There is no method called in the task that will throw |
||
} | ||
eventLoopThread.shutdownNow() | ||
killExecutorThread.shutdownNow() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -277,7 +277,7 @@ private[spark] class StandaloneAppClient( | |
|
||
override def onStop(): Unit = { | ||
if (registrationRetryTimer.get != null) { | ||
registrationRetryTimer.get.cancel(true) | ||
registrationRetryTimer.get.cancel(false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There is no method called in the task that will throw |
||
} | ||
registrationRetryThread.shutdownNow() | ||
registerMasterFutures.get.foreach(_.cancel(true)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -214,10 +214,10 @@ private[deploy] class Master( | |
applicationMetricsSystem.report() | ||
// prevent the CompleteRecovery message sending to restarted master | ||
if (recoveryCompletionTask != null) { | ||
recoveryCompletionTask.cancel(true) | ||
recoveryCompletionTask.cancel(false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There is no method called in the task that will throw |
||
} | ||
if (checkForWorkerTimeOutTask != null) { | ||
checkForWorkerTimeOutTask.cancel(true) | ||
checkForWorkerTimeOutTask.cancel(false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There is no method called in the task that will throw |
||
} | ||
forwardMessageThread.shutdownNow() | ||
webUi.stop() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -403,7 +403,7 @@ private[deploy] class Worker( | |
// We have exceeded the initial registration retry threshold | ||
// All retries from now on should use a higher interval | ||
if (connectionAttemptCount == INITIAL_REGISTRATION_RETRIES) { | ||
registrationRetryTimer.foreach(_.cancel(true)) | ||
registrationRetryTimer.foreach(_.cancel(false)) | ||
registrationRetryTimer = Some( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
ditto |
||
forwardMessageScheduler.scheduleAtFixedRate( | ||
() => Utils.tryLogNonFatalError { self.send(ReregisterWithMaster) }, | ||
|
@@ -426,7 +426,7 @@ private[deploy] class Worker( | |
registerMasterFutures.foreach(_.cancel(true)) | ||
registerMasterFutures = null | ||
} | ||
registrationRetryTimer.foreach(_.cancel(true)) | ||
registrationRetryTimer.foreach(_.cancel(false)) | ||
registrationRetryTimer = None | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,7 +63,7 @@ class ExecutorPodsPollingSnapshotSource( | |
@Since("3.1.3") | ||
def stop(): Unit = { | ||
if (pollingFuture != null) { | ||
pollingFuture.cancel(true) | ||
pollingFuture.cancel(false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto. |
||
pollingFuture = null | ||
} | ||
ThreadUtils.shutdown(pollingExecutor) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -108,7 +108,7 @@ trait BroadcastExchangeLike extends Exchange { | |
case Some(r) => sparkContext.cancelJobsWithTag(this.jobTag, r) | ||
case None => sparkContext.cancelJobsWithTag(this.jobTag) | ||
} | ||
this.relationFuture.cancel(true) | ||
this.relationFuture.cancel(false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
} | ||
} | ||
|
||
|
@@ -257,7 +257,7 @@ case class BroadcastExchangeExec( | |
logError(log"Could not execute broadcast in ${MDC(TIMEOUT, timeout)} secs.", ex) | ||
if (!relationFuture.isDone) { | ||
sparkContext.cancelJobsWithTag(jobTag, "The corresponding broadcast query has failed.") | ||
relationFuture.cancel(true) | ||
relationFuture.cancel(false) | ||
} | ||
throw QueryExecutionErrors.executeBroadcastTimeoutError(timeout, Some(ex)) | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is no method called in the task that will throw
InterruptdEException
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why change this though? if it were interruptible, we'd want to interrupt for sure.
What is the issue if you try to interrupt something and it doesn't do anything?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These tasks can't response interrupt even if we want to interrupt them.
cancel(true)
causes thread interrupt, but it will not affect these tasks that can't response interrupt. In addition, interrupting operations have extra overhead.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If true, then this change doesn't do anything, right? I wonder what problem this change is solving.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This PR want avoid the overhead of the invalid thread interruption.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there overhead? it just sets the interrupted status of the thread.
I'm worried that, if the code changed in some way that would make it interruptible, then we lose the ability to interrupt on cancel.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One hand, I want avoid the overhead of the invalid thread interruption, even if the overhead is not big enough.
On the other hand, I suggest the reasonable use of interrupts: use interrupt mechanisms only when needed to avoid excessive dependence.