Skip to content

Commit ec27bfa

Browse files
committed
Fix flakiness in parallel integration tests
The main problem were flakes were testenv did not delete rabbitmqcluster objects, even after issuing a delete command and waiting for 5 seconds. The "solution" is to not delete the rabbitmqcluster objects. Instead, create objects with unique names. I also updated the default k8s version for testenv from 1.26 to 1.29. Current stable version of upstream Kubernetes in 1.34. Testing on N-5 is very decent to maintain the backwards compatibility promise. Earlier Kubernetes versions are EOL'ed.
1 parent af16449 commit ec27bfa

11 files changed

+47
-193
lines changed

Makefile

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ __check_defined = \
2727
###
2828

2929
# The latest 1.25 available for envtest
30-
ENVTEST_K8S_VERSION ?= 1.26.1
30+
ENVTEST_K8S_VERSION ?= 1.29.5
3131
LOCAL_TESTBIN = $(CURDIR)/testbin
3232
$(LOCAL_TESTBIN):
3333
mkdir -p $@
@@ -66,10 +66,11 @@ unit-tests::manifests
6666
unit-tests::just-unit-tests ## Run unit tests
6767

6868
GINKGO ?= go run github.com/onsi/ginkgo/v2/ginkgo
69+
GINKGO_PROCS ?= 4
6970

7071
.PHONY: just-unit-tests
7172
just-unit-tests:
72-
$(GINKGO) -r -p --randomize-all --fail-on-pending api/ internal/ pkg/
73+
$(GINKGO) -r -p --randomize-all --fail-on-pending --procs=$(GINKGO_PROCS) $(GINKGO_EXTRA) api/ internal/ pkg/
7374

7475
.PHONY: integration-tests
7576
integration-tests::install-tools
@@ -83,7 +84,7 @@ integration-tests::just-integration-tests ## Run integration tests
8384

8485
.PHONY: just-integration-tests
8586
just-integration-tests:
86-
$(GINKGO) -r -p --fail-on-pending --randomize-all controllers/
87+
$(GINKGO) -r -p --fail-on-pending --randomize-all --procs=$(GINKGO_PROCS) $(GINKGO_EXTRA) controllers/
8788

8889
manifests: install-tools ## Generate manifests e.g. CRD, RBAC etc.
8990
controller-gen crd rbac:roleName=operator-role paths="./api/...;./controllers/..." output:crd:artifacts:config=config/crd/bases
@@ -236,7 +237,7 @@ cert-manager-rm:
236237
kubectl delete -f https://github.com/cert-manager/cert-manager/releases/download/v$(CERT_MANAGER_VERSION)/cert-manager.yaml --ignore-not-found
237238

238239
system-tests: install-tools ## Run end-to-end tests against Kubernetes cluster defined in ~/.kube/config
239-
NAMESPACE="$(SYSTEM_TEST_NAMESPACE)" K8S_OPERATOR_NAMESPACE="$(K8S_OPERATOR_NAMESPACE)" $(GINKGO) -nodes=3 --randomize-all -r system_tests/
240+
NAMESPACE="$(SYSTEM_TEST_NAMESPACE)" K8S_OPERATOR_NAMESPACE="$(K8S_OPERATOR_NAMESPACE)" $(GINKGO) -nodes=3 --randomize-all -r $(GINKGO_EXTRA) system_tests/
240241

241242
kubectl-plugin-tests: ## Run kubectl-rabbitmq tests
242243
@echo "running kubectl plugin tests"

controllers/rabbitmqcluster_controller_test.go

Lines changed: 10 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717

1818
"k8s.io/utils/ptr"
1919

20+
"k8s.io/apimachinery/pkg/types"
2021
"k8s.io/apimachinery/pkg/util/intstr"
2122

2223
. "github.com/onsi/ginkgo/v2"
@@ -30,7 +31,6 @@ import (
3031
apierrors "k8s.io/apimachinery/pkg/api/errors"
3132
k8sresource "k8s.io/apimachinery/pkg/api/resource"
3233
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
33-
"k8s.io/apimachinery/pkg/types"
3434
runtimeClient "sigs.k8s.io/controller-runtime/pkg/client"
3535
)
3636

@@ -60,18 +60,10 @@ var _ = Describe("RabbitmqClusterController", func() {
6060
waitForClusterCreation(ctx, cluster, client)
6161
})
6262

63-
AfterEach(func() {
64-
Expect(client.Delete(ctx, cluster)).To(Succeed())
65-
Eventually(func() bool {
66-
err := client.Get(ctx, types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}, cluster)
67-
return apierrors.IsNotFound(err)
68-
}, 5).Should(BeTrue(), "expected to delete cluster '%s' but it still exists", cluster.Name)
69-
})
70-
7163
It("works", func() {
7264
By("populating the image spec with the default image", func() {
7365
fetchedCluster := &rabbitmqv1beta1.RabbitmqCluster{}
74-
Expect(client.Get(ctx, types.NamespacedName{Name: "rabbitmq-one", Namespace: defaultNamespace}, fetchedCluster)).To(Succeed())
66+
Expect(client.Get(ctx, runtimeClient.ObjectKeyFromObject(cluster), fetchedCluster)).To(Succeed())
7567
Expect(fetchedCluster.Spec.Image).To(Equal(defaultRabbitmqImage))
7668
})
7769

@@ -199,10 +191,6 @@ var _ = Describe("RabbitmqClusterController", func() {
199191
waitForClusterCreation(ctx, cluster, client)
200192
})
201193

202-
AfterEach(func() {
203-
Expect(client.Delete(ctx, cluster)).To(Succeed())
204-
})
205-
206194
It("adds annotations to child resources", func() {
207195
headlessSvc := service(ctx, cluster, "nodes")
208196
Expect(headlessSvc.Annotations).Should(HaveKeyWithValue("my-annotation", "this-annotation"))
@@ -234,10 +222,6 @@ var _ = Describe("RabbitmqClusterController", func() {
234222
waitForClusterCreation(ctx, cluster, client)
235223
})
236224

237-
AfterEach(func() {
238-
Expect(client.Delete(ctx, cluster)).To(Succeed())
239-
})
240-
241225
It("applies the Vault configuration", func() {
242226
By("not exposing DefaultUser or its Binding as status")
243227
Expect(cluster).NotTo(BeNil())
@@ -246,7 +230,7 @@ var _ = Describe("RabbitmqClusterController", func() {
246230
Expect(cluster.Status.Binding).To(BeNil())
247231
By("setting the default user updater image to the controller default")
248232
fetchedCluster := &rabbitmqv1beta1.RabbitmqCluster{}
249-
Expect(client.Get(ctx, types.NamespacedName{Name: "rabbitmq-vault", Namespace: defaultNamespace}, fetchedCluster)).To(Succeed())
233+
Expect(client.Get(ctx, runtimeClient.ObjectKeyFromObject(cluster), fetchedCluster)).To(Succeed())
250234
Expect(fetchedCluster.Spec.SecretBackend.Vault.DefaultUserUpdaterImage).To(PointTo(Equal(defaultUserUpdaterImage)))
251235
})
252236
})
@@ -267,10 +251,6 @@ var _ = Describe("RabbitmqClusterController", func() {
267251
waitForClusterCreation(ctx, cluster, client)
268252
})
269253

270-
AfterEach(func() {
271-
Expect(client.Delete(ctx, cluster)).To(Succeed())
272-
})
273-
274254
It("configures the imagePullSecret on sts correctly", func() {
275255
By("using the instance spec secret", func() {
276256
sts := statefulSet(ctx, cluster)
@@ -307,10 +287,6 @@ var _ = Describe("RabbitmqClusterController", func() {
307287
Expect(client.Create(ctx, cluster)).To(Succeed())
308288
})
309289

310-
AfterEach(func() {
311-
Expect(client.Delete(ctx, cluster)).To(Succeed())
312-
})
313-
314290
It("adds the affinity rules to pod spec", func() {
315291
sts := statefulSet(ctx, cluster)
316292
podSpecAffinity := sts.Spec.Template.Spec.Affinity
@@ -319,11 +295,6 @@ var _ = Describe("RabbitmqClusterController", func() {
319295
})
320296

321297
Context("Service configurations", func() {
322-
AfterEach(func() {
323-
Expect(client.Delete(ctx, cluster)).To(Succeed())
324-
Expect(clientSet.CoreV1().Services(cluster.Namespace).Delete(ctx, cluster.ChildResourceName(""), metav1.DeleteOptions{}))
325-
})
326-
327298
It("creates the service type and annotations as configured in instance spec", func() {
328299
cluster = &rabbitmqv1beta1.RabbitmqCluster{
329300
ObjectMeta: metav1.ObjectMeta{
@@ -360,10 +331,6 @@ var _ = Describe("RabbitmqClusterController", func() {
360331
})
361332

362333
Context("Resource requirements configurations", func() {
363-
AfterEach(func() {
364-
Expect(client.Delete(ctx, cluster)).To(Succeed())
365-
})
366-
367334
It("uses resource requirements from instance spec when provided", func() {
368335
cluster = &rabbitmqv1beta1.RabbitmqCluster{
369336
ObjectMeta: metav1.ObjectMeta{
@@ -400,10 +367,6 @@ var _ = Describe("RabbitmqClusterController", func() {
400367
})
401368

402369
Context("Persistence configurations", func() {
403-
AfterEach(func() {
404-
Expect(client.Delete(ctx, cluster)).To(Succeed())
405-
})
406-
407370
It("creates the RabbitmqCluster with the specified storage from instance spec", func() {
408371
cluster = &rabbitmqv1beta1.RabbitmqCluster{
409372
ObjectMeta: metav1.ObjectMeta{
@@ -426,11 +389,11 @@ var _ = Describe("RabbitmqClusterController", func() {
426389
})
427390
})
428391

429-
Context("Custom Resource updates", func() {
392+
Context("Custom Resource updates", FlakeAttempts(3), func() {
430393
BeforeEach(func() {
431394
cluster = &rabbitmqv1beta1.RabbitmqCluster{
432395
ObjectMeta: metav1.ObjectMeta{
433-
Name: "rabbitmq-cr-update",
396+
Name: fmt.Sprintf("cr-update-%d-%d", GinkgoParallelProcess(), time.Now().Unix()),
434397
Namespace: defaultNamespace,
435398
},
436399
}
@@ -439,11 +402,6 @@ var _ = Describe("RabbitmqClusterController", func() {
439402
waitForClusterCreation(ctx, cluster, client)
440403
})
441404

442-
AfterEach(func() {
443-
Expect(client.Delete(ctx, cluster)).To(Succeed())
444-
waitForClusterDeletion(ctx, cluster, client)
445-
})
446-
447405
It("the service annotations are updated", func() {
448406
Expect(updateWithRetry(cluster, func(r *rabbitmqv1beta1.RabbitmqCluster) {
449407
r.Spec.Service.Annotations = map[string]string{"test-key": "test-value"}
@@ -867,11 +825,6 @@ var _ = Describe("RabbitmqClusterController", func() {
867825
waitForClusterCreation(ctx, cluster, client)
868826
})
869827

870-
AfterEach(func() {
871-
Expect(client.Delete(ctx, cluster)).To(Succeed())
872-
waitForClusterDeletion(ctx, cluster, client)
873-
})
874-
875828
It("creates a StatefulSet with the override applied", func() {
876829
sts := statefulSet(ctx, cluster)
877830
myStorage := k8sresource.MustParse("100Gi")
@@ -1081,11 +1034,13 @@ var _ = Describe("RabbitmqClusterController", func() {
10811034
})
10821035

10831036
Context("Service Override", func() {
1084-
1037+
var clusterName string
10851038
BeforeEach(func() {
1039+
suffix = fmt.Sprintf("-%d", time.Now().UnixNano())
1040+
clusterName = "svc-override" + suffix
10861041
cluster = &rabbitmqv1beta1.RabbitmqCluster{
10871042
ObjectMeta: metav1.ObjectMeta{
1088-
Name: "svc-override",
1043+
Name: clusterName,
10891044
Namespace: defaultNamespace,
10901045
},
10911046
Spec: rabbitmqv1beta1.RabbitmqClusterSpec{
@@ -1118,11 +1073,6 @@ var _ = Describe("RabbitmqClusterController", func() {
11181073
waitForClusterCreation(ctx, cluster, client)
11191074
})
11201075

1121-
AfterEach(func() {
1122-
Expect(client.Delete(ctx, cluster)).To(Succeed())
1123-
waitForClusterDeletion(ctx, cluster, client)
1124-
})
1125-
11261076
It("creates a Service with the override applied", func() {
11271077
amqpTargetPort := intstr.IntOrString{IntVal: int32(5672)}
11281078
managementTargetPort := intstr.IntOrString{IntVal: int32(15672)}
@@ -1159,7 +1109,7 @@ var _ = Describe("RabbitmqClusterController", func() {
11591109
TargetPort: additionalTargetPort,
11601110
},
11611111
))
1162-
Expect(svc.Spec.Selector).To(Equal(map[string]string{"a-selector": "a-label", "app.kubernetes.io/name": "svc-override"}))
1112+
Expect(svc.Spec.Selector).To(Equal(map[string]string{"a-selector": "a-label", "app.kubernetes.io/name": clusterName}))
11631113
Expect(svc.Spec.SessionAffinity).To(Equal(corev1.ServiceAffinityClientIP))
11641114
Expect(svc.Spec.PublishNotReadyAddresses).To(BeFalse())
11651115
})
@@ -1188,10 +1138,6 @@ var _ = Describe("RabbitmqClusterController", func() {
11881138
waitForClusterCreation(ctx, cluster, client)
11891139
})
11901140

1191-
AfterEach(func() {
1192-
Expect(client.Delete(ctx, cluster)).To(Succeed())
1193-
})
1194-
11951141
It("works", func() {
11961142
By("skipping reconciling if label is set to true", func() {
11971143
Expect(updateWithRetry(cluster, func(r *rabbitmqv1beta1.RabbitmqCluster) {

controllers/reconcile_cli_test.go

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package controllers_test
22

33
import (
4+
"fmt"
45
"time"
56

67
"sigs.k8s.io/controller-runtime/pkg/envtest/komega"
@@ -22,11 +23,6 @@ var _ = Describe("Reconcile CLI", func() {
2223
defaultNamespace = "default"
2324
)
2425

25-
AfterEach(func() {
26-
Expect(client.Delete(ctx, cluster)).To(Succeed())
27-
waitForClusterDeletion(ctx, cluster, client)
28-
})
29-
3026
When("cluster is created", func() {
3127
var sts *appsv1.StatefulSet
3228
BeforeEach(func() {
@@ -63,10 +59,19 @@ var _ = Describe("Reconcile CLI", func() {
6359
})
6460

6561
When("the cluster is configured to run post-deploy steps", func() {
62+
const (
63+
rmqNamePrefix = "rabbitmq-three"
64+
)
65+
66+
var (
67+
rmqName string
68+
)
69+
6670
BeforeEach(func() {
71+
rmqName = fmt.Sprintf("%s-%d", rmqNamePrefix, time.Now().Unix())
6772
cluster = &rabbitmqv1beta1.RabbitmqCluster{
6873
ObjectMeta: metav1.ObjectMeta{
69-
Name: "rabbitmq-three",
74+
Name: rmqName,
7075
Namespace: defaultNamespace,
7176
},
7277
Spec: rabbitmqv1beta1.RabbitmqClusterSpec{
@@ -83,6 +88,8 @@ var _ = Describe("Reconcile CLI", func() {
8388
BeforeEach(func() {
8489
sts = statefulSet(ctx, cluster)
8590
sts.Status.Replicas = 3
91+
sts.Status.AvailableReplicas = 2
92+
sts.Status.ReadyReplicas = 2
8693
sts.Status.CurrentReplicas = 2
8794
sts.Status.CurrentRevision = "some-old-revision"
8895
sts.Status.UpdatedReplicas = 1
@@ -96,7 +103,7 @@ var _ = Describe("Reconcile CLI", func() {
96103

97104
By("setting an annotation on the CR", func() {
98105
rmq := &rabbitmqv1beta1.RabbitmqCluster{}
99-
rmq.Name = "rabbitmq-three"
106+
rmq.Name = rmqName
100107
rmq.Namespace = defaultNamespace
101108
Eventually(k.Object(rmq)).Within(time.Second * 5).WithPolling(time.Second).Should(HaveField("ObjectMeta.Annotations", HaveKey("rabbitmq.com/queueRebalanceNeededAt")))
102109

@@ -112,11 +119,12 @@ var _ = Describe("Reconcile CLI", func() {
112119
sts.Status.UpdatedReplicas = 3
113120
sts.Status.UpdateRevision = "some-new-revision"
114121
sts.Status.ReadyReplicas = 2
122+
sts.Status.AvailableReplicas = 2
115123
})).Should(Succeed())
116124

117125
// by not removing the annotation
118126
rmq := &rabbitmqv1beta1.RabbitmqCluster{}
119-
rmq.Name = "rabbitmq-three"
127+
rmq.Name = rmqName
120128
rmq.Namespace = defaultNamespace
121129
Eventually(k.Object(rmq)).Within(time.Second * 5).WithPolling(time.Second).Should(HaveField("ObjectMeta.Annotations", HaveKey("rabbitmq.com/queueRebalanceNeededAt")))
122130

@@ -130,11 +138,12 @@ var _ = Describe("Reconcile CLI", func() {
130138
By("removing the annotation once all Pods are up, and triggering the queue rebalance", func() {
131139
// setup transition to all pods ready
132140
sts.Status.ReadyReplicas = 3
141+
sts.Status.AvailableReplicas = 3
133142
Expect(client.Status().Update(ctx, sts)).To(Succeed())
134143

135144
// by not having the annotation
136145
rmq := &rabbitmqv1beta1.RabbitmqCluster{}
137-
rmq.Name = "rabbitmq-three"
146+
rmq.Name = rmqName
138147
rmq.Namespace = defaultNamespace
139148
Eventually(k.Object(rmq)).Within(time.Second * 5).WithPolling(time.Second).ShouldNot(HaveField("ObjectMeta.Annotations", HaveKey("rabbitmq.com/queueRebalanceNeededAt")))
140149

controllers/reconcile_finalizer_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import (
77
. "github.com/onsi/gomega"
88

99
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10-
"k8s.io/apimachinery/pkg/types"
10+
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
1111
)
1212

1313
var _ = Describe("Reconcile finalizer", func() {
@@ -31,7 +31,7 @@ var _ = Describe("Reconcile finalizer", func() {
3131
It("adds the deletion finalizer", func() {
3232
rmq := &rabbitmqv1beta1.RabbitmqCluster{}
3333
Eventually(func() string {
34-
err := client.Get(ctx, types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}, rmq)
34+
err := client.Get(ctx, k8sclient.ObjectKeyFromObject(cluster), rmq)
3535
if err != nil {
3636
return ""
3737
}

controllers/reconcile_no_persistence_test.go

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@ import (
99
rabbitmqv1beta1 "github.com/rabbitmq/cluster-operator/v2/api/v1beta1"
1010
"github.com/rabbitmq/cluster-operator/v2/internal/status"
1111
v1 "k8s.io/api/core/v1"
12-
apierrors "k8s.io/apimachinery/pkg/api/errors"
1312
k8sresource "k8s.io/apimachinery/pkg/api/resource"
1413
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
15-
"k8s.io/apimachinery/pkg/types"
1614
"k8s.io/utils/ptr"
1715
runtimeClient "sigs.k8s.io/controller-runtime/pkg/client"
1816
)
@@ -42,15 +40,6 @@ var _ = Describe("Persistence", func() {
4240
waitForClusterCreation(ctx, cluster, client)
4341
})
4442

45-
AfterEach(func() {
46-
Expect(client.Delete(ctx, cluster)).To(Succeed())
47-
Eventually(func() bool {
48-
rmq := &rabbitmqv1beta1.RabbitmqCluster{}
49-
err := client.Get(ctx, types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}, rmq)
50-
return apierrors.IsNotFound(err)
51-
}, 5).Should(BeTrue())
52-
})
53-
5443
It("does not allow changing the capcity from zero (no persistence)", func() {
5544
By("failing a statefulSet update", func() {
5645
Expect(updateWithRetry(cluster, func(r *rabbitmqv1beta1.RabbitmqCluster) {
@@ -72,10 +61,7 @@ var _ = Describe("Persistence", func() {
7261
By("setting ReconcileSuccess to 'false' with failed reason and message", func() {
7362
Eventually(func() string {
7463
rabbit := &rabbitmqv1beta1.RabbitmqCluster{}
75-
Expect(client.Get(ctx, runtimeClient.ObjectKey{
76-
Name: cluster.Name,
77-
Namespace: defaultNamespace,
78-
}, rabbit)).To(Succeed())
64+
Expect(client.Get(ctx, runtimeClient.ObjectKeyFromObject(cluster), rabbit)).To(Succeed())
7965

8066
for i := range rabbit.Status.Conditions {
8167
if rabbit.Status.Conditions[i].Type == status.ReconcileSuccess {

0 commit comments

Comments
 (0)