Skip to content

Commit 5bf6a31

Browse files
authored
handle service conflict exceptions with RetryError (#959)
1 parent cb95fae commit 5bf6a31

2 files changed

Lines changed: 172 additions & 0 deletions

File tree

pkg/deploy/lattice/service_manager.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,13 @@ func (m *defaultServiceManager) deleteAssociation(ctx context.Context, assocArn
384384
delReq := &DelSnSvcAssocReq{ServiceNetworkServiceAssociationIdentifier: assocArn}
385385
_, err := m.cloud.Lattice().DeleteServiceNetworkServiceAssociation(ctx, delReq)
386386
if err != nil {
387+
// Any ConflictException on delete is transient — it means the association is in a
388+
// mutating state (CREATE_IN_PROGRESS, UPDATE_IN_PROGRESS). Retry is always correct.
389+
var ce *types.ConflictException
390+
if errors.As(err, &ce) {
391+
return fmt.Errorf("%w: failed DeleteServiceNetworkServiceAssociation %s due to %s",
392+
lattice_runtime.NewRetryError(), aws.ToString(assocArn), err)
393+
}
387394
return fmt.Errorf("failed DeleteServiceNetworkServiceAssociation %s due to %s",
388395
aws.ToString(assocArn), err)
389396
}
@@ -398,6 +405,12 @@ func (m *defaultServiceManager) deleteService(ctx context.Context, svc *SvcSumma
398405
}
399406
_, err := m.cloud.Lattice().DeleteService(ctx, &delInput)
400407
if err != nil {
408+
// Any ConflictException on delete is transient — it means associations are still
409+
// being removed or the service is in a mutating state. Retry is always correct.
410+
var ce *types.ConflictException
411+
if errors.As(err, &ce) {
412+
return fmt.Errorf("%w: failed DeleteService %s due to %s", lattice_runtime.NewRetryError(), aws.ToString(svc.Id), err)
413+
}
401414
return fmt.Errorf("failed DeleteService %s due to %s", aws.ToString(svc.Id), err)
402415
}
403416

pkg/deploy/lattice/service_manager_test.go

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -975,3 +975,162 @@ func Test_ServiceManager_ServiceTakeover_NotAllowed(t *testing.T) {
975975
assert.Contains(t, err.Error(), "svc-arn")
976976
assert.Equal(t, "", status.Arn)
977977
}
978+
979+
func TestCreateAssociation_ConflictCreateInProgress_ReturnsRetryError(t *testing.T) {
980+
c := gomock.NewController(t)
981+
defer c.Finish()
982+
983+
mockLattice := mocks.NewMockLattice(c)
984+
mockTagging := mocks.NewMockTagging(c)
985+
cfg := pkg_aws.CloudConfig{VpcId: "vpc-id", AccountId: "account-id"}
986+
cl := pkg_aws.NewDefaultCloudWithTagging(mockLattice, mockTagging, cfg)
987+
ctx := context.Background()
988+
m := NewServiceManager(gwlog.FallbackLogger, cl)
989+
990+
svc := &Service{
991+
Spec: model.ServiceSpec{
992+
ServiceTagFields: model.ServiceTagFields{RouteName: "rt", RouteNamespace: "ns", RouteType: core.HttpRouteType},
993+
ServiceNetworkNames: []string{"sn"},
994+
},
995+
}
996+
997+
mockLattice.EXPECT().FindService(gomock.Any(), gomock.Any()).
998+
Return(nil, mocks.NewNotFoundError("", ""))
999+
mockLattice.EXPECT().CreateService(gomock.Any(), gomock.Any(), gomock.Any()).
1000+
Return(&vpclattice.CreateServiceOutput{
1001+
Id: aws.String("svc-id"), Arn: aws.String("svc-arn"), Name: aws.String("svc"),
1002+
}, nil)
1003+
mockLattice.EXPECT().FindServiceNetwork(gomock.Any(), "sn").
1004+
Return(&mocks.ServiceNetworkInfo{SvcNetwork: types.ServiceNetworkSummary{Id: aws.String("sn-id")}}, nil)
1005+
mockLattice.EXPECT().CreateServiceNetworkServiceAssociation(gomock.Any(), gomock.Any(), gomock.Any()).
1006+
Return(nil, &types.ConflictException{
1007+
Message: aws.String("Invalid resource status for this operation, resource id svc-id, status: CREATE_IN_PROGRESS."),
1008+
})
1009+
1010+
_, err := m.Upsert(ctx, svc)
1011+
assert.NotNil(t, err)
1012+
var retryErr *lattice_runtime.RequeueNeededAfter
1013+
assert.True(t, errors.As(err, &retryErr))
1014+
}
1015+
1016+
func TestCreateAssociation_ConflictNonTransient_ReturnsRegularError(t *testing.T) {
1017+
c := gomock.NewController(t)
1018+
defer c.Finish()
1019+
1020+
mockLattice := mocks.NewMockLattice(c)
1021+
mockTagging := mocks.NewMockTagging(c)
1022+
cfg := pkg_aws.CloudConfig{VpcId: "vpc-id", AccountId: "account-id"}
1023+
cl := pkg_aws.NewDefaultCloudWithTagging(mockLattice, mockTagging, cfg)
1024+
ctx := context.Background()
1025+
m := NewServiceManager(gwlog.FallbackLogger, cl)
1026+
1027+
svc := &Service{
1028+
Spec: model.ServiceSpec{
1029+
ServiceTagFields: model.ServiceTagFields{RouteName: "rt", RouteNamespace: "ns", RouteType: core.HttpRouteType},
1030+
ServiceNetworkNames: []string{"sn"},
1031+
},
1032+
}
1033+
1034+
mockLattice.EXPECT().FindService(gomock.Any(), gomock.Any()).
1035+
Return(nil, mocks.NewNotFoundError("", ""))
1036+
mockLattice.EXPECT().CreateService(gomock.Any(), gomock.Any(), gomock.Any()).
1037+
Return(&vpclattice.CreateServiceOutput{
1038+
Id: aws.String("svc-id"), Arn: aws.String("svc-arn"), Name: aws.String("svc"),
1039+
}, nil)
1040+
mockLattice.EXPECT().FindServiceNetwork(gomock.Any(), "sn").
1041+
Return(&mocks.ServiceNetworkInfo{SvcNetwork: types.ServiceNetworkSummary{Id: aws.String("sn-id")}}, nil)
1042+
mockLattice.EXPECT().CreateServiceNetworkServiceAssociation(gomock.Any(), gomock.Any(), gomock.Any()).
1043+
Return(nil, &types.ConflictException{
1044+
Message: aws.String("Association already exists."),
1045+
})
1046+
1047+
_, err := m.Upsert(ctx, svc)
1048+
assert.NotNil(t, err)
1049+
var retryErr *lattice_runtime.RequeueNeededAfter
1050+
assert.False(t, errors.As(err, &retryErr))
1051+
assert.Contains(t, err.Error(), "failed CreateServiceNetworkServiceAssociation")
1052+
}
1053+
1054+
func TestDeleteService_ConflictException_ReturnsRetryError(t *testing.T) {
1055+
c := gomock.NewController(t)
1056+
defer c.Finish()
1057+
1058+
mockLattice := mocks.NewMockLattice(c)
1059+
mockTagging := mocks.NewMockTagging(c)
1060+
cfg := pkg_aws.CloudConfig{VpcId: "vpc-id", AccountId: "account-id", ClusterName: "cluster"}
1061+
cl := pkg_aws.NewDefaultCloudWithTagging(mockLattice, mockTagging, cfg)
1062+
ctx := context.Background()
1063+
m := NewServiceManager(gwlog.FallbackLogger, cl)
1064+
1065+
svc := &Service{
1066+
Spec: model.ServiceSpec{
1067+
ServiceTagFields: model.ServiceTagFields{RouteName: "rt", RouteNamespace: "ns", RouteType: core.HttpRouteType},
1068+
ServiceNetworkNames: []string{},
1069+
},
1070+
IsDeleted: true,
1071+
}
1072+
1073+
mockLattice.EXPECT().FindService(gomock.Any(), gomock.Any()).
1074+
Return(&types.ServiceSummary{
1075+
Id: aws.String("svc-id"), Arn: aws.String("svc-arn"), Name: aws.String("svc"),
1076+
}, nil)
1077+
mockLattice.EXPECT().ListTagsForResource(gomock.Any(), gomock.Any(), gomock.Any()).
1078+
Return(&vpclattice.ListTagsForResourceOutput{
1079+
Tags: cl.DefaultTagsMergedWith(svc.Spec.ToTags()),
1080+
}, nil)
1081+
mockLattice.EXPECT().ListServiceNetworkServiceAssociationsAsList(gomock.Any(), gomock.Any()).
1082+
Return([]types.ServiceNetworkServiceAssociationSummary{}, nil)
1083+
mockLattice.EXPECT().ListListenersAsList(gomock.Any(), gomock.Any()).
1084+
Return([]types.ListenerSummary{}, nil)
1085+
mockLattice.EXPECT().DeleteService(gomock.Any(), gomock.Any(), gomock.Any()).
1086+
Return(nil, &types.ConflictException{
1087+
Message: aws.String("Service svc-id has one or more associated service networks."),
1088+
})
1089+
1090+
err := m.Delete(ctx, svc)
1091+
assert.NotNil(t, err)
1092+
var retryErr *lattice_runtime.RequeueNeededAfter
1093+
assert.True(t, errors.As(err, &retryErr))
1094+
}
1095+
1096+
func TestDeleteAssociation_ConflictException_ReturnsRetryError(t *testing.T) {
1097+
c := gomock.NewController(t)
1098+
defer c.Finish()
1099+
1100+
mockLattice := mocks.NewMockLattice(c)
1101+
mockTagging := mocks.NewMockTagging(c)
1102+
cfg := pkg_aws.CloudConfig{VpcId: "vpc-id", AccountId: "account-id", ClusterName: "cluster"}
1103+
cl := pkg_aws.NewDefaultCloudWithTagging(mockLattice, mockTagging, cfg)
1104+
ctx := context.Background()
1105+
m := NewServiceManager(gwlog.FallbackLogger, cl)
1106+
1107+
svc := &Service{
1108+
Spec: model.ServiceSpec{
1109+
ServiceTagFields: model.ServiceTagFields{RouteName: "rt", RouteNamespace: "ns", RouteType: core.HttpRouteType},
1110+
ServiceNetworkNames: []string{},
1111+
},
1112+
IsDeleted: true,
1113+
}
1114+
1115+
mockLattice.EXPECT().FindService(gomock.Any(), gomock.Any()).
1116+
Return(&types.ServiceSummary{
1117+
Id: aws.String("svc-id"), Arn: aws.String("svc-arn"), Name: aws.String("svc"),
1118+
}, nil)
1119+
mockLattice.EXPECT().ListTagsForResource(gomock.Any(), gomock.Any(), gomock.Any()).
1120+
Return(&vpclattice.ListTagsForResourceOutput{
1121+
Tags: cl.DefaultTagsMergedWith(svc.Spec.ToTags()),
1122+
}, nil)
1123+
mockLattice.EXPECT().ListServiceNetworkServiceAssociationsAsList(gomock.Any(), gomock.Any()).
1124+
Return([]types.ServiceNetworkServiceAssociationSummary{
1125+
{Arn: aws.String("assoc-arn"), Id: aws.String("snsa-123")},
1126+
}, nil)
1127+
mockLattice.EXPECT().DeleteServiceNetworkServiceAssociation(gomock.Any(), gomock.Any(), gomock.Any()).
1128+
Return(nil, &types.ConflictException{
1129+
Message: aws.String("Creation is in progress."),
1130+
})
1131+
1132+
err := m.Delete(ctx, svc)
1133+
assert.NotNil(t, err)
1134+
var retryErr *lattice_runtime.RequeueNeededAfter
1135+
assert.True(t, errors.As(err, &retryErr))
1136+
}

0 commit comments

Comments
 (0)