Skip to content

Commit 527fa9f

Browse files
committed
redis: Use local shared_ptr copies to prevent race conditions
The previous fix with null checks still had a race condition window between checking the pointer and using it. Even with the null check, the shared_ptr could be reset to null by another thread between the check and use. Solution: Make local copies of shared_ptr before use. This ensures the pointer remains valid throughout its usage in the current scope. Changes: 1. startResolveRedis(): Copy info_ to local variable before use 2. updateDnsStats(): Use local copy of info_ 3. DNS callbacks: Use local copy for stats updates 4. onResponse(), onUnexpectedResponse(), onFailure(): Use local copies 5. client_factory_.create(): Check and use local copy of info_ The pattern applied: auto info = parent_.info_; // Make local copy (ref count++) if (!info) { // Check if null return; } info->method(); // Safe to use - won't become null This prevents the crash at line 376 where info_ was becoming null between the check and the access, even with memory_order_acquire.
1 parent ab759f8 commit 527fa9f

File tree

1 file changed

+29
-18
lines changed

1 file changed

+29
-18
lines changed

source/extensions/clusters/redis/redis_cluster.cc

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -257,11 +257,12 @@ void RedisCluster::DnsDiscoveryResolveTarget::startResolveDns() {
257257
active_query_ = nullptr;
258258
ENVOY_LOG(trace, "async DNS resolution complete for {}", dns_address_);
259259
if (status == Network::DnsResolver::ResolutionStatus::Failure || response.empty()) {
260-
if (parent_.info_) {
260+
auto info = parent_.info_;
261+
if (info) {
261262
if (status == Network::DnsResolver::ResolutionStatus::Failure) {
262-
parent_.info_->configUpdateStats().update_failure_.inc();
263+
info->configUpdateStats().update_failure_.inc();
263264
} else {
264-
parent_.info_->configUpdateStats().update_empty_.inc();
265+
info->configUpdateStats().update_empty_.inc();
265266
}
266267
}
267268

@@ -368,16 +369,17 @@ void RedisCluster::RedisDiscoverySession::startResolveRedis() {
368369
return;
369370
}
370371

371-
// Also check if info_ is still valid
372-
if (!parent_.info_) {
372+
// Make a local copy of the shared_ptr to prevent it from becoming null between check and use
373+
auto info = parent_.info_;
374+
if (!info) {
373375
return;
374376
}
375377

376-
parent_.info_->configUpdateStats().update_attempt_.inc();
378+
info->configUpdateStats().update_attempt_.inc();
377379
// If a resolution is currently in progress, skip it.
378380
if (current_request_) {
379381
ENVOY_LOG(debug, "redis cluster slot request is already in progress for '{}'",
380-
parent_.info_ ? parent_.info_->name() : "unknown");
382+
info ? info->name() : "unknown");
381383
return;
382384
}
383385

@@ -400,25 +402,31 @@ void RedisCluster::RedisDiscoverySession::startResolveRedis() {
400402
if (!client) {
401403
client = std::make_unique<RedisDiscoveryClient>(*this);
402404
client->host_ = current_host_address_;
405+
auto parent_info = parent_.info_;
406+
if (!parent_info) {
407+
return;
408+
}
403409
client->client_ = client_factory_.create(host, dispatcher_, shared_from_this(),
404-
redis_command_stats_, parent_.info()->statsScope(),
410+
redis_command_stats_, parent_info->statsScope(),
405411
parent_.auth_username_, parent_.auth_password_, false);
406412
client->client_->addConnectionCallbacks(*client);
407413
}
414+
auto info = parent_.info_;
408415
ENVOY_LOG(debug, "executing redis cluster slot request for '{}'",
409-
parent_.info_ ? parent_.info_->name() : "unknown");
416+
info ? info->name() : "unknown");
410417
current_request_ = client->client_->makeRequest(ClusterSlotsRequest::instance_, *this);
411418
}
412419

413420
void RedisCluster::RedisDiscoverySession::updateDnsStats(
414421
Network::DnsResolver::ResolutionStatus status, bool empty_response) {
415-
if (!parent_.info_) {
422+
auto info = parent_.info_;
423+
if (!info) {
416424
return;
417425
}
418426
if (status == Network::DnsResolver::ResolutionStatus::Failure) {
419-
parent_.info_->configUpdateStats().update_failure_.inc();
427+
info->configUpdateStats().update_failure_.inc();
420428
} else if (empty_response) {
421-
parent_.info_->configUpdateStats().update_empty_.inc();
429+
info->configUpdateStats().update_empty_.inc();
422430
}
423431
}
424432

@@ -579,8 +587,9 @@ void RedisCluster::RedisDiscoverySession::onResponse(
579587
return;
580588
}
581589

590+
auto info = parent_.info_;
582591
ENVOY_LOG(debug, "redis cluster slot request for '{}' succeeded",
583-
parent_.info_ ? parent_.info_->name() : "unknown");
592+
info ? info->name() : "unknown");
584593
current_request_ = nullptr;
585594

586595
const uint32_t SlotRangeStart = 0;
@@ -714,8 +723,9 @@ void RedisCluster::RedisDiscoverySession::onUnexpectedResponse(
714723
}
715724

716725
ENVOY_LOG(warn, "Unexpected response to cluster slot command: {}", value->toString());
717-
if (this->parent_.info_) {
718-
this->parent_.info_->configUpdateStats().update_failure_.inc();
726+
auto info = this->parent_.info_;
727+
if (info) {
728+
info->configUpdateStats().update_failure_.inc();
719729
}
720730
if (resolve_timer_) {
721731
resolve_timer_->enableTimer(parent_.cluster_refresh_rate_);
@@ -730,14 +740,15 @@ void RedisCluster::RedisDiscoverySession::onFailure() {
730740
return;
731741
}
732742

743+
auto info = parent_.info_;
733744
ENVOY_LOG(debug, "redis cluster slot request for '{}' failed",
734-
parent_.info_ ? parent_.info_->name() : "unknown");
745+
info ? info->name() : "unknown");
735746
if (!current_host_address_.empty()) {
736747
auto client_to_delete = client_map_.find(current_host_address_);
737748
client_to_delete->second->client_->close();
738749
}
739-
if (parent_.info_) {
740-
parent_.info_->configUpdateStats().update_failure_.inc();
750+
if (info) {
751+
info->configUpdateStats().update_failure_.inc();
741752
}
742753
if (resolve_timer_) {
743754
resolve_timer_->enableTimer(parent_.cluster_refresh_rate_);

0 commit comments

Comments
 (0)