Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/StackExchange.Redis/PhysicalBridge.cs
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ internal void OnHeartbeat(bool ifConnectedOnly)
Interlocked.Exchange(ref connectTimeoutRetryCount, 0);
tmp.BridgeCouldBeNull?.ServerEndPoint?.ClearUnselectable(UnselectableFlags.DidNotRespond);
}
int timedOutThisHeartbeat = tmp.OnBridgeHeartbeat();
tmp.OnBridgeHeartbeat(out int asyncTimeoutThisHeartbeat, out int syncTimeoutThisHeartbeat);
int writeEverySeconds = ServerEndPoint.WriteEverySeconds;
bool configCheckDue = ServerEndPoint.ConfigCheckSeconds > 0 && ServerEndPoint.LastInfoReplicationCheckSecondsAgo >= ServerEndPoint.ConfigCheckSeconds;

Expand Down Expand Up @@ -664,14 +664,15 @@ internal void OnHeartbeat(bool ifConnectedOnly)
}

// This is an "always" check - we always want to evaluate a dead connection from a non-responsive sever regardless of the need to heartbeat above
if (timedOutThisHeartbeat > 0
var totalTimeoutThisHeartbeat = asyncTimeoutThisHeartbeat + syncTimeoutThisHeartbeat;
if ((totalTimeoutThisHeartbeat > 0)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: this is comparing sync timeouts but using async thresholds in the line below, so we might be introducing a case in which we've violating async timeout * 4, but only had 1 sync timeout that's not severe, creating more disconnects.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, fixed in 0ec7c4a

&& tmp.LastReadSecondsAgo * 1_000 > (tmp.BridgeCouldBeNull?.Multiplexer.AsyncTimeoutMilliseconds * 4))
{
// If we've received *NOTHING* on the pipe in 4 timeouts worth of time and we're timing out commands, issue a connection failure so that we reconnect
// This is meant to address the scenario we see often in Linux configs where TCP retries will happen for 15 minutes.
// To us as a client, we'll see the socket as green/open/fine when writing but we'll bet getting nothing back.
// Since we can't depend on the pipe to fail in that case, we want to error here based on the criteria above so we reconnect broken clients much faster.
tmp.BridgeCouldBeNull?.Multiplexer.Logger?.LogWarningDeadSocketDetected(tmp.LastReadSecondsAgo, timedOutThisHeartbeat);
tmp.BridgeCouldBeNull?.Multiplexer.Logger?.LogWarningDeadSocketDetected(tmp.LastReadSecondsAgo, totalTimeoutThisHeartbeat);
OnDisconnected(ConnectionFailureType.SocketFailure, tmp, out _, out State oldState);
tmp.Dispose(); // Cleanup the existing connection/socket if any, otherwise it will wait reading indefinitely
}
Expand Down
17 changes: 12 additions & 5 deletions src/StackExchange.Redis/PhysicalConnection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -746,10 +746,12 @@ internal void GetStormLog(StringBuilder sb)
/// <summary>
/// Runs on every heartbeat for a bridge, timing out any commands that are overdue and returning an integer of how many we timed out.
/// </summary>
/// <returns>How many commands were overdue and threw timeout exceptions.</returns>
internal int OnBridgeHeartbeat()
/// <param name="asyncTimeoutDetected">How many async commands were overdue and threw timeout exceptions.</param>
/// <param name="syncTimeoutDetected">How many sync commands were overdue. No exception are thrown for these commands here.</param>
internal void OnBridgeHeartbeat(out int asyncTimeoutDetected, out int syncTimeoutDetected)
{
var result = 0;
asyncTimeoutDetected = 0;
syncTimeoutDetected = 0;
var now = Environment.TickCount;
Interlocked.Exchange(ref lastBeatTickCount, now);

Expand All @@ -776,7 +778,13 @@ internal int OnBridgeHeartbeat()
multiplexer.OnMessageFaulted(msg, timeoutEx);
msg.SetExceptionAndComplete(timeoutEx, bridge); // tell the message that it is doomed
multiplexer.OnAsyncTimeout();
result++;
asyncTimeoutDetected++;
}
else
{
// Only count how many sync timeouts we detect here.
// The actual timeout is handled in ConnectionMultiplexer.ExecuteSyncImpl().
syncTimeoutDetected++;
}
}
else
Expand All @@ -791,7 +799,6 @@ internal int OnBridgeHeartbeat()
}
}
}
return result;
}

internal void OnInternalError(Exception exception, [CallerMemberName] string? origin = null)
Expand Down
Loading