Skip to content

Commit f913eb3

Browse files
Harish Senthilkumarharishxr
authored andcommitted
Add Exponential/Backoff retry to GetResourceTags
1 parent 02c2db1 commit f913eb3

File tree

4 files changed

+375
-20
lines changed

4 files changed

+375
-20
lines changed

agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/api/ecs/client/ecs_client.go

Lines changed: 73 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ecs-agent/api/ecs/client/ecs_client.go

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
"github.com/aws/amazon-ecs-agent/ecs-agent/metrics"
3535
"github.com/aws/amazon-ecs-agent/ecs-agent/utils"
3636
"github.com/aws/amazon-ecs-agent/ecs-agent/utils/retry"
37+
"github.com/aws/aws-sdk-go-v2/aws/ratelimit"
3738

3839
"github.com/aws/aws-sdk-go-v2/aws"
3940
awsconfig "github.com/aws/aws-sdk-go-v2/config"
@@ -73,6 +74,14 @@ const (
7374
rciMaxBackoff = 192 * time.Second
7475
rciRetryJitter = 0.2
7576
rciRetryMultiple = 2.0
77+
78+
// Below constants are used for GetResourceTags retry with exponential backoff when receiving non-terminal errors.
79+
getResourceTagsTimeout = 30 * time.Second
80+
getResourceTagsBackoffMin = 500 * time.Millisecond
81+
getResourceTagsBackoffMax = 10 * time.Second
82+
getResourceTagsJitter = 0.1
83+
getResourceTagsMultiplier = 1.5
84+
getResourceTagsCooldown = 1 * time.Second
7685
)
7786

7887
var nonRetriableErrors = []smithy.APIError{
@@ -889,13 +898,61 @@ func (client *ecsClient) discoverPollEndpoint(containerInstanceArn string,
889898
}
890899

891900
func (client *ecsClient) GetResourceTags(resourceArn string) ([]types.Tag, error) {
892-
output, err := client.standardClient.ListTagsForResource(context.TODO(), &ecsservice.ListTagsForResourceInput{
893-
ResourceArn: &resourceArn,
901+
backoff := retry.NewExponentialBackoff(
902+
getResourceTagsBackoffMin,
903+
getResourceTagsBackoffMax,
904+
getResourceTagsJitter,
905+
getResourceTagsMultiplier,
906+
)
907+
ctx, cancel := context.WithTimeout(context.Background(), getResourceTagsTimeout)
908+
defer cancel()
909+
910+
var (
911+
tags []types.Tag
912+
lastErr error
913+
retryCount int
914+
)
915+
916+
err := retry.RetryWithBackoffCtx(ctx, backoff, func() error {
917+
retryCount++
918+
919+
output, err := client.standardClient.ListTagsForResource(ctx, &ecsservice.ListTagsForResourceInput{
920+
ResourceArn: &resourceArn,
921+
})
922+
if err == nil {
923+
tags = output.Tags
924+
return nil
925+
}
926+
927+
lastErr = err
928+
929+
if isTransientError(err) {
930+
logger.Warn("ListTagsForResource throttled or rate limited", logger.Fields{
931+
field.Error: err,
932+
"resource": resourceArn,
933+
"attempt": retryCount,
934+
})
935+
return apierrors.NewRetriableError(apierrors.NewRetriable(true), err)
936+
}
937+
938+
logger.Error("ListTagsForResource failed", logger.Fields{
939+
field.Error: err,
940+
"resource": resourceArn,
941+
"attempt": retryCount,
942+
})
943+
return apierrors.NewRetriableError(apierrors.NewRetriable(false), err)
894944
})
945+
895946
if err != nil {
896-
return nil, err
947+
logger.Error("GetResourceTags exhausted retries", logger.Fields{
948+
field.Error: lastErr,
949+
"resource": resourceArn,
950+
"attempts": retryCount,
951+
})
952+
return nil, fmt.Errorf("GetResourceTags failed for %s after %d attempts: %v", resourceArn, retryCount, lastErr)
897953
}
898-
return output.Tags, nil
954+
955+
return tags, nil
899956
}
900957

901958
func (client *ecsClient) UpdateContainerInstancesState(instanceARN string, status types.ContainerInstanceStatus) error {
@@ -995,13 +1052,21 @@ func trimString(inputString string, maxLen int) string {
9951052
}
9961053

9971054
func isTransientError(err error) bool {
998-
var apiErr smithy.APIError
999-
// Using errors.As to unwrap as opposed to errors.Is.
1000-
if errors.As(err, &apiErr) {
1055+
var (
1056+
apiErr smithy.APIError
1057+
quotaErr ratelimit.QuotaExceededError
1058+
)
1059+
1060+
switch {
1061+
case errors.As(err, &apiErr):
10011062
switch apiErr.ErrorCode() {
1002-
case apierrors.ErrCodeServerException, apierrors.ErrCodeLimitExceededException:
1063+
case "ThrottlingException",
1064+
apierrors.ErrCodeServerException,
1065+
apierrors.ErrCodeLimitExceededException:
10031066
return true
10041067
}
1068+
case errors.As(err, &quotaErr):
1069+
return true
10051070
}
10061071
return false
10071072
}

0 commit comments

Comments
 (0)