Skip to content

Commit 7b8b3e4

Browse files
HADOOP-17377: ABFS: MsiTokenProvider doesn't retry HTTP 429/410 from the Instance Metadata Service (#5273)
Contributed by Anmol Asrani
1 parent 675b7ef commit 7b8b3e4

File tree

5 files changed

+116
-4
lines changed

5 files changed

+116
-4
lines changed

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,12 @@ public final class AbfsHttpConstants {
166166
// The HTTP 100 Continue informational status response code indicates that everything so far
167167
// is OK and that the client should continue with the request or ignore it if it is already finished.
168168
public static final String HUNDRED_CONTINUE = "100-continue";
169+
/**
170+
* HTTP status code indicating that the server has received too many requests and the client should
171+
* qualify for retrying the operation, as described in the Microsoft Azure documentation.
172+
* {@link "https://learn.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/how-to-use-vm-token#error-handling"}.
173+
*/
174+
public static final int HTTP_TOO_MANY_REQUESTS = 429;
169175

170176
public static final char CHAR_FORWARD_SLASH = '/';
171177
public static final char CHAR_EXCLAMATION_POINT = '!';

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ public final class FileSystemConfigurations {
8686
public static final int DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_ATTEMPTS = 5;
8787
public static final int DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MIN_BACKOFF_INTERVAL = 0;
8888
public static final int DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_BACKOFF_INTERVAL = SIXTY_SECONDS;
89-
public static final int DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_DELTA_BACKOFF = 2;
89+
public static final int DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_DELTA_BACKOFF = 2_000;
9090

9191
public static final int ONE_KB = 1024;
9292
public static final int ONE_MB = ONE_KB * ONE_KB;

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.util.Hashtable;
3030
import java.util.Map;
3131

32+
import org.apache.hadoop.classification.VisibleForTesting;
3233
import org.apache.hadoop.util.Preconditions;
3334

3435
import com.fasterxml.jackson.core.JsonFactory;
@@ -73,6 +74,11 @@ public static void init(AbfsConfiguration abfsConfiguration) {
7374
tokenFetchRetryPolicy = abfsConfiguration.getOauthTokenFetchRetryPolicy();
7475
}
7576

77+
@VisibleForTesting
78+
public static void setTokenFetchRetryPolicy(ExponentialRetryPolicy retryPolicy) {
79+
tokenFetchRetryPolicy = retryPolicy;
80+
}
81+
7682
/**
7783
* gets Azure Active Directory token using the user ID and password of
7884
* a service principal (that is, Web App in Azure Active Directory).
@@ -255,7 +261,19 @@ public String getRequestId() {
255261
return this.requestId;
256262
}
257263

258-
protected HttpException(
264+
/**
265+
Constructs an instance of HttpException with detailed information about an HTTP error response.
266+
This exception is designed to encapsulate details of an HTTP error response, providing context about the error
267+
encountered during an HTTP operation. It includes the HTTP error code, the associated request ID, an error message,
268+
the URL that triggered the error, the content type of the response, and the response body.
269+
@param httpErrorCode The HTTP error code indicating the nature of the encountered error.
270+
@param requestId The unique identifier associated with the corresponding HTTP request.
271+
@param message A descriptive error message providing additional information about the encountered error.
272+
@param url The URL that resulted in the HTTP error response.
273+
@param contentType The content type of the HTTP response.
274+
@param body The body of the HTTP response, containing more details about the error.
275+
*/
276+
public HttpException(
259277
final int httpErrorCode,
260278
final String requestId,
261279
final String message,
@@ -383,7 +401,20 @@ private static boolean isRecoverableFailure(IOException e) {
383401
|| e instanceof FileNotFoundException);
384402
}
385403

386-
private static AzureADToken getTokenSingleCall(String authEndpoint,
404+
/**
405+
Retrieves an Azure OAuth token for authentication through a single API call.
406+
This method facilitates the acquisition of an OAuth token from Azure Active Directory
407+
to enable secure authentication for various services. It supports both Managed Service Identity (MSI)
408+
tokens and non-MSI tokens based on the provided parameters.
409+
@param authEndpoint The URL endpoint for OAuth token retrieval.
410+
@param payload The payload to be included in the token request. This typically contains grant type and
411+
any required parameters for token acquisition.
412+
@param headers A Hashtable containing additional HTTP headers to be included in the token request.
413+
@param httpMethod The HTTP method to be used for the token request (e.g., GET, POST).
414+
@param isMsi A boolean flag indicating whether to request a Managed Service Identity (MSI) token or not.
415+
@return An AzureADToken object containing the acquired OAuth token and associated metadata.
416+
*/
417+
public static AzureADToken getTokenSingleCall(String authEndpoint,
387418
String payload, Hashtable<String, String> headers, String httpMethod,
388419
boolean isMsi)
389420
throws IOException {

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRetryPolicy.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@
2020

2121
import java.net.HttpURLConnection;
2222

23+
import org.apache.hadoop.classification.VisibleForTesting;
24+
2325
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_CONTINUE;
26+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_TOO_MANY_REQUESTS;
2427

2528
/**
2629
* Abstract Class for Retry policy to be used by {@link AbfsClient}
@@ -57,6 +60,8 @@ public boolean shouldRetry(final int retryCount, final int statusCode) {
5760
return retryCount < maxRetryCount
5861
&& (statusCode < HTTP_CONTINUE
5962
|| statusCode == HttpURLConnection.HTTP_CLIENT_TIMEOUT
63+
|| statusCode == HttpURLConnection.HTTP_GONE
64+
|| statusCode == HTTP_TOO_MANY_REQUESTS
6065
|| (statusCode >= HttpURLConnection.HTTP_INTERNAL_ERROR
6166
&& statusCode != HttpURLConnection.HTTP_NOT_IMPLEMENTED
6267
&& statusCode != HttpURLConnection.HTTP_VERSION));
@@ -84,7 +89,8 @@ public String getAbbreviation() {
8489
* Returns maximum number of retries allowed in this retry policy
8590
* @return max retry count
8691
*/
87-
protected int getMaxRetryCount() {
92+
@VisibleForTesting
93+
public int getMaxRetryCount() {
8894
return maxRetryCount;
8995
}
9096

hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsMsiTokenProvider.java

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,27 @@
2020

2121
import java.io.IOException;
2222
import java.util.Date;
23+
import java.util.concurrent.TimeUnit;
24+
import java.util.concurrent.atomic.AtomicInteger;
2325

26+
import org.assertj.core.api.Assertions;
2427
import org.junit.jupiter.api.Test;
2528

2629
import org.apache.commons.lang3.StringUtils;
2730
import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider;
31+
import org.apache.hadoop.fs.azurebfs.oauth2.AzureADAuthenticator;
2832
import org.apache.hadoop.fs.azurebfs.oauth2.AzureADToken;
2933
import org.apache.hadoop.fs.azurebfs.oauth2.MsiTokenProvider;
34+
import org.apache.hadoop.fs.azurebfs.services.ExponentialRetryPolicy;
3035

36+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_TOO_MANY_REQUESTS;
3137
import static org.apache.hadoop.fs.azurebfs.constants.AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_MSI_AUTHORITY;
3238
import static org.apache.hadoop.fs.azurebfs.constants.AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_MSI_ENDPOINT;
3339
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID;
3440
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_MSI_AUTHORITY;
3541
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_MSI_ENDPOINT;
3642
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_MSI_TENANT;
43+
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_ATTEMPTS;
3744
import static org.assertj.core.api.Assertions.assertThat;
3845
import static org.assertj.core.api.Assumptions.assumeThat;
3946

@@ -86,4 +93,66 @@ private String getTrimmedPasswordString(AbfsConfiguration conf, String key,
8693
return value.trim();
8794
}
8895

96+
/**
97+
* Verifies that MsiTokenProvider retries on HTTP 429 responses.
98+
* Ensures shouldRetry returns true for 429 until the maximum retries are reached.
99+
*/
100+
@Test
101+
public void testShouldRetryFor429() throws Exception {
102+
ExponentialRetryPolicy retryPolicy = new ExponentialRetryPolicy(
103+
DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_ATTEMPTS);
104+
AzureADAuthenticator.setTokenFetchRetryPolicy(retryPolicy);
105+
AtomicInteger attemptCounter = new AtomicInteger(0);
106+
107+
// Inner class to simulate MsiTokenProvider retry logic
108+
class TestMsiTokenProvider extends MsiTokenProvider {
109+
TestMsiTokenProvider(String endpoint, String tenant, String clientId, String authority) {
110+
super(endpoint, tenant, clientId, authority);
111+
}
112+
113+
@Override
114+
public AzureADToken getToken() throws IOException {
115+
int attempt = 0;
116+
while (true) {
117+
attempt++;
118+
attemptCounter.incrementAndGet();
119+
120+
boolean retry = retryPolicy.shouldRetry(attempt - 1,
121+
HTTP_TOO_MANY_REQUESTS);
122+
123+
// Validate shouldRetry returns true until the final attempt
124+
if (attempt < retryPolicy.getMaxRetryCount()) {
125+
Assertions.assertThat(retry)
126+
.describedAs("Attempt %d: shouldRetry must be true for 429", attempt)
127+
.isTrue();
128+
// Simulate retry by continuing
129+
} else {
130+
// Final attempt: shouldRetry should now be false if this was last retry
131+
Assertions.assertThat(retry)
132+
.describedAs("Final attempt %d: shouldRetry can be false after max retries", attempt)
133+
.isTrue(); // Still true because maxRetries not exceeded yet
134+
135+
// Return a valid fake token
136+
AzureADToken token = new AzureADToken();
137+
token.setAccessToken("fake-token");
138+
token.setExpiry(new Date(System.currentTimeMillis() + TimeUnit.HOURS.toMillis(1)));
139+
return token;
140+
}
141+
}
142+
}
143+
}
144+
AccessTokenProvider tokenProvider = new TestMsiTokenProvider(
145+
"https://fake-endpoint", "tenant", "clientId", "authority"
146+
);
147+
// Trigger token acquisition
148+
AzureADToken token = tokenProvider.getToken();
149+
// Assertions
150+
assertThat(token.getAccessToken()).isEqualTo("fake-token");
151+
// If the status code doesn't qualify for retry shouldRetry returns false and the loop ends.
152+
// It being called multiple times verifies that the retry was done for the throttling status code 429.
153+
Assertions.assertThat(attemptCounter.get())
154+
.describedAs("Number of retries should be equal to "
155+
+ "max attempts for token fetch.")
156+
.isEqualTo(DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_ATTEMPTS);
157+
}
89158
}

0 commit comments

Comments
 (0)