diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index c11c6abd7f..0fc27cf8f2 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -38,7 +38,10 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy private bool isMultiMasterWriteRequest; private Uri locationEndpoint; private RetryContext retryContext; - private DocumentServiceRequest documentServiceRequest; + private DocumentServiceRequest documentServiceRequest; +#if !INTERNAL + private volatile bool addHubRegionProcessingOnlyHeader; +#endif public ClientRetryPolicy( GlobalEndpointManager globalEndpointManager, @@ -222,8 +225,14 @@ public void OnBeforeSendRequest(DocumentServiceRequest request) // set location-based routing directive based on request retry context request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); } - } - + } +#if !INTERNAL + // If previous attempt failed with 404/1002, add the hub-region-processing-only header to all subsequent retry attempts + if (this.addHubRegionProcessingOnlyHeader) + { + request.Headers[HttpConstants.HttpHeaders.ShouldProcessOnlyInHubRegion] = bool.TrueString; + } +#endif // Resolve the endpoint for the request and pin the resolution to the resolved endpoint // This enables marking the endpoint unavailability on endpoint failover/unreachability this.locationEndpoint = this.isThinClientEnabled @@ -322,7 +331,10 @@ private async Task ShouldRetryInternalAsync( if (statusCode == HttpStatusCode.NotFound && subStatusCode == SubStatusCodes.ReadSessionNotAvailable) - { + { +#if !INTERNAL + this.addHubRegionProcessingOnlyHeader = true; +#endif return this.ShouldRetryOnSessionNotAvailable(this.documentServiceRequest); } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosItemTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosItemTests.cs index 6882b42e8d..afd5b89baf 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosItemTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosItemTests.cs @@ -20,6 +20,7 @@ namespace Microsoft.Azure.Cosmos.SDK.EmulatorTests using System.Threading.Tasks; using Microsoft.Azure.Cosmos; using Microsoft.Azure.Cosmos.Diagnostics; + using Microsoft.Azure.Cosmos.Handlers; using Microsoft.Azure.Cosmos.Json; using Microsoft.Azure.Cosmos.Query.Core.ExecutionContext; using Microsoft.Azure.Cosmos.Query.Core.QueryClient; @@ -39,7 +40,8 @@ public class CosmosItemTests : BaseCosmosClientHelper { private Container Container = null; private ContainerProperties containerSettings = null; - + + private const string HubRegionHeader = "x-ms-cosmos-hub-region-processing-only"; private static readonly string nonPartitionItemId = "fixed-Container-Item"; private static readonly string undefinedPartitionItemId = "undefined-partition-Item"; @@ -4315,7 +4317,115 @@ private static async Task GivenItemAsyncWhenMissingMemberHandlingIsErrorThenExpe JsonConvert.DefaultSettings = () => default; } - } + } + + [TestMethod] + [Owner("aavasthy")] + [Description("Forces a single 404/1002 from the gateway and verifies ClientRetryPolicy adds x-ms-cosmos-hub-region-processing-only on the retry request.")] + public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002() + { + bool headerObservedOnRetry = false; + int requestCount = 0; + bool shouldReturn404 = true; + + // Created HTTP handler to intercept requests + HttpClientHandlerHelper httpHandler = new HttpClientHandlerHelper + { + RequestCallBack = (request, cancellationToken) => + { + // Track all document read requests + if (request.Method == HttpMethod.Get && + request.RequestUri != null && + request.RequestUri.AbsolutePath.Contains("/docs/")) + { + requestCount++; + + // Check for hub header on retry (2nd+ request) + if (requestCount > 1 && + request.Headers.TryGetValues(HubRegionHeader, out IEnumerable values) && + values.Any(v => v.Equals(bool.TrueString, StringComparison.OrdinalIgnoreCase))) + { + headerObservedOnRetry = true; + } + } + + return Task.FromResult(null); + }, + + ResponseIntercepter = (response, request) => + { + if (shouldReturn404 && + request.Method == HttpMethod.Get && + request.RequestUri != null && + request.RequestUri.AbsolutePath.Contains("/docs/")) + { + shouldReturn404 = false; // Only return 404 once + + var errorResponse = new + { + code = "NotFound", + message = "Message: {\"Errors\":[\"Resource Not Found. Learn more: https://aka.ms/cosmosdb-tsg-not-found\"]}\r\nActivityId: " + Guid.NewGuid() + ", Request URI: " + request.RequestUri, + additionalErrorInfo = "" + }; + + HttpResponseMessage notFoundResponse = new HttpResponseMessage(HttpStatusCode.NotFound) + { + Content = new StringContent( + JsonConvert.SerializeObject(errorResponse), + Encoding.UTF8, + "application/json" + ) + }; + + // Add the substatus header for ReadSessionNotAvailable + notFoundResponse.Headers.Add("x-ms-substatus", "1002"); + notFoundResponse.Headers.Add("x-ms-activity-id", Guid.NewGuid().ToString()); + notFoundResponse.Headers.Add("x-ms-request-charge", "1.0"); + + return Task.FromResult(notFoundResponse); + } + + return Task.FromResult(response); + } + }; + + CosmosClientOptions clientOptions = new CosmosClientOptions + { + ConnectionMode = ConnectionMode.Gateway, + ConsistencyLevel = Cosmos.ConsistencyLevel.Session, + HttpClientFactory = () => new HttpClient(httpHandler), + MaxRetryAttemptsOnRateLimitedRequests = 9, + MaxRetryWaitTimeOnRateLimitedRequests = TimeSpan.FromSeconds(30) + }; + + using CosmosClient customClient = TestCommon.CreateCosmosClient(clientOptions); + + Container customContainer = customClient.GetContainer(this.database.Id, this.Container.Id); + + // Create a test item first + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + await this.Container.CreateItemAsync(testItem, new Cosmos.PartitionKey(testItem.pk)); + + try + { + // This should trigger 404/1002 on first attempt, then retry with hub header + ItemResponse response = await customContainer.ReadItemAsync( + testItem.id, + new Cosmos.PartitionKey(testItem.pk)); + + Assert.IsNotNull(response); + Assert.IsNotNull(response.Resource); + } + catch (CosmosException) + { + // It's possible the retry also fails, but should still have seen the retry attempt + } + + // Verifying retry happened + Assert.IsTrue(requestCount >= 2, $"Expected at least 2 requests (original + retry), but got {requestCount}"); + Assert.IsTrue(headerObservedOnRetry, $"Expected retry request to include '{HubRegionHeader}: true'"); + } + private async Task AutoGenerateIdPatternTest(Cosmos.PartitionKey pk, T itemWithoutId) { diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs index 26ad1e3b88..927220f3c8 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs @@ -27,7 +27,8 @@ public sealed class ClientRetryPolicyTests { private static Uri Location1Endpoint = new Uri("https://location1.documents.azure.com"); private static Uri Location2Endpoint = new Uri("https://location2.documents.azure.com"); - + + private const string HubRegionHeader = "x-ms-cosmos-hub-region-processing-only"; private ReadOnlyCollection preferredLocations; private AccountProperties databaseAccount; private GlobalPartitionEndpointManager partitionKeyRangeLocationCache; @@ -400,6 +401,68 @@ public async Task ClientRetryPolicy_NoRetry_MultiMaster_Write_NoPreferredLocatio { await this.ValidateConnectTimeoutTriggersClientRetryPolicyAsync(isReadRequest: false, useMultipleWriteLocations: true, usesPreferredLocations: false, true); } + + [TestMethod] + [DataRow(true, DisplayName = "Read request - Hub region header persists across retries after 404/1002")] + [DataRow(false, DisplayName = "Write request - Hub region header persists across retries after 404/1002")] + public async Task ClientRetryPolicy_HubRegionHeader_AddedOn404_1002_AndPersistsAcrossRetries(bool isReadRequest) + { + // Arrange + const bool enableEndpointDiscovery = true; + + using GlobalEndpointManager endpointManager = this.Initialize( + useMultipleWriteLocations: true, + enableEndpointDiscovery: enableEndpointDiscovery, + isPreferredLocationsListEmpty: false); + + ClientRetryPolicy retryPolicy = new ClientRetryPolicy( + endpointManager, + this.partitionKeyRangeLocationCache, + new RetryOptions(), + enableEndpointDiscovery, + isThinClientEnabled: false); + + DocumentServiceRequest request = this.CreateRequest(isReadRequest: isReadRequest, isMasterResourceType: false); + + // First attempt - header should not exist + retryPolicy.OnBeforeSendRequest(request); + Assert.IsNull(request.Headers.GetValues(HubRegionHeader), "Header should not exist on initial request before any 404/1002 error."); + + // Simulate 404/1002 error + DocumentClientException sessionNotAvailableException = new DocumentClientException( + message: "Simulated 404/1002 ReadSessionNotAvailable", + innerException: null, + statusCode: HttpStatusCode.NotFound, + substatusCode: SubStatusCodes.ReadSessionNotAvailable, + requestUri: request.RequestContext.LocationEndpointToRoute, + responseHeaders: new DictionaryNameValueCollection()); + + ShouldRetryResult shouldRetry = await retryPolicy.ShouldRetryAsync(sessionNotAvailableException, CancellationToken.None); + Assert.IsTrue(shouldRetry.ShouldRetry, "Should retry on 404/1002."); + + // Verify header is added and persists across multiple retry attempts + for (int retryAttempt = 1; retryAttempt <= 3; retryAttempt++) + { + retryPolicy.OnBeforeSendRequest(request); + string[] headerValues = request.Headers.GetValues(HubRegionHeader); + Assert.IsNotNull(headerValues, $"Header should be present on retry attempt {retryAttempt}."); + Assert.AreEqual(1, headerValues.Length, $"Header should have exactly one value on retry attempt {retryAttempt}."); + Assert.AreEqual(bool.TrueString, headerValues[0], $"Header value should be 'True' on retry attempt {retryAttempt}."); + + if (retryAttempt < 3) + { + DocumentClientException serviceUnavailableException = new DocumentClientException( + message: "Simulated 503 ServiceUnavailable", + innerException: null, + statusCode: HttpStatusCode.ServiceUnavailable, + substatusCode: SubStatusCodes.Unknown, + requestUri: request.RequestContext.LocationEndpointToRoute, + responseHeaders: new DictionaryNameValueCollection()); + + await retryPolicy.ShouldRetryAsync(serviceUnavailableException, CancellationToken.None); + } + } + } private async Task ValidateConnectTimeoutTriggersClientRetryPolicyAsync( bool isReadRequest,