diff --git a/Makefile b/Makefile index 2732500a..2bcc05a1 100644 --- a/Makefile +++ b/Makefile @@ -111,8 +111,8 @@ vet: ## Run go vet against code. go vet ./... .PHONY: test -test: manifests generate fmt vet envtest ## Run tests. - KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./... -coverprofile cover.out +test: manifests generate fmt vet envtest ginkgo ## Run tests. + KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" $(GINKGO) run --succinct -p ./... -coverprofile cover.out ##@ Build @@ -202,6 +202,7 @@ $(LOCALBIN): KUSTOMIZE ?= $(LOCALBIN)/kustomize CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen ENVTEST ?= $(LOCALBIN)/setup-envtest +GINKGO ?= $(LOCALBIN)/ginkgo ## Tool Versions KUSTOMIZE_VERSION ?= v3.8.7 @@ -228,6 +229,11 @@ envtest: $(ENVTEST) ## Download envtest-setup locally if necessary. $(ENVTEST): $(LOCALBIN) test -s $(LOCALBIN)/setup-envtest || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest +.PHONY: ginkgo +ginkgo: $(GINKGO) ## Download envtest-setup locally if necessary. +$(GINKGO): $(LOCALBIN) + test -s $(LOCALBIN)/ginkgo || GOBIN=$(LOCALBIN) go install github.com/onsi/ginkgo/v2/ginkgo + .PHONY: operator-sdk OPERATOR_SDK ?= $(LOCALBIN)/operator-sdk operator-sdk: ## Download operator-sdk locally if necessary. diff --git a/controllers/onload_controller.go b/controllers/onload_controller.go index 935258fa..b540cc53 100644 --- a/controllers/onload_controller.go +++ b/controllers/onload_controller.go @@ -323,7 +323,8 @@ func (r *OnloadReconciler) addOnloadLabelsToNodes(ctx context.Context, onload *o if node.Labels[kmmLabel] != onload.Spec.Onload.Version { continue } - if _, found := node.Labels[labelKey]; !found { + value, found := node.Labels[labelKey] + if !found { nodeCopy := node.DeepCopy() node.Labels[labelKey] = onload.Spec.Onload.Version err := r.Patch(ctx, &node, client.MergeFrom(nodeCopy)) @@ -333,6 +334,16 @@ func (r *OnloadReconciler) addOnloadLabelsToNodes(ctx context.Context, onload *o return nil, err } changesMade = true + } else if value != onload.Spec.Onload.Version { + nodeCopy := node.DeepCopy() + delete(node.Labels, labelKey) + err := r.Patch(ctx, &node, client.MergeFrom(nodeCopy)) + if err != nil { + log.Error(err, "Failed to remove old label from Node", + "Node", node.Name) + return nil, err + } + changesMade = true } } diff --git a/controllers/onload_controller_test.go b/controllers/onload_controller_test.go index 3d0726fc..34ee87d6 100644 --- a/controllers/onload_controller_test.go +++ b/controllers/onload_controller_test.go @@ -3,6 +3,7 @@ package controllers import ( + "context" "errors" "strconv" "time" @@ -22,6 +23,8 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + "sigs.k8s.io/controller-runtime/pkg/manager" onloadv1alpha1 "github.com/Xilinx-CNS/kubernetes-onload/api/v1alpha1" mock_client "github.com/Xilinx-CNS/kubernetes-onload/mocks/client" @@ -838,3 +841,279 @@ var _ = Describe("onload controller", func() { }) }) }) + +// Please note that each of these tests will spin-up a new envtest cluster +// before each test and destroy it after. This has quite a heavy time cost, so +// if possible please try to test functionality in a "lighter" way. +// Running tests on an Intel(R) Xeon(R) Silver 4216 CPU @ 2.10GHz showed that +// adding an empty test would increase the time taken to run `make test` by ~5s +var _ = Describe("Recovery from a non-clean state", func() { + + // Shadowing variables that relate to the env test cluster. + var ( + k8sManager manager.Manager + k8sClient client.Client + testEnv *envtest.Environment + ctx context.Context + cancel context.CancelFunc + ) + + // General definitions for tests + var ( + onload onloadv1alpha1.Onload + devicePlugin appsv1.DaemonSet + devicePluginName types.NamespacedName + module kmm.Module + ) + + BeforeEach(func() { + + ctx, cancel, testEnv, k8sClient, k8sManager = createEnvTestCluster() + + onload = onloadv1alpha1.Onload{ + ObjectMeta: metav1.ObjectMeta{ + Name: "onload-test", + Namespace: "default", + }, + Spec: onloadv1alpha1.Spec{ + Selector: map[string]string{ + "key": "value", + }, + Onload: onloadv1alpha1.OnloadSpec{ + KernelMappings: []onloadv1alpha1.OnloadKernelMapping{ + { + KernelModuleImage: "", + Regexp: "", + }, + }, + UserImage: "image:tag", + Version: "old", + }, + DevicePlugin: onloadv1alpha1.DevicePluginSpec{ + DevicePluginImage: "image:tag", + }, + ServiceAccountName: "", + }, + } + + devicePluginName = types.NamespacedName{ + Name: onload.Name + "-onload-device-plugin-ds", + Namespace: onload.Namespace, + } + + dsLabels := map[string]string{"onload.amd.com/name": devicePluginName.Name} + devicePlugin = appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: devicePluginName.Name, + Namespace: devicePluginName.Namespace, + Labels: map[string]string{onloadVersionLabel: onload.Spec.Onload.Version}, + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: dsLabels}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: dsLabels, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "foo", Image: "bar"}, + }, + InitContainers: []corev1.Container{ + { + Name: "init", Image: onload.Spec.Onload.UserImage, + }, + }, + }, + }, + }, + } + + module = kmm.Module{ + ObjectMeta: metav1.ObjectMeta{ + Name: onload.Name + onloadModuleNameSuffix, + Namespace: onload.Namespace, + }, + Spec: kmm.ModuleSpec{ + Selector: onload.Spec.Selector, + ModuleLoader: kmm.ModuleLoaderSpec{ + Container: kmm.ModuleLoaderContainerSpec{ + KernelMappings: []kmm.KernelMapping{{}}, + }, + }, + }, + } + }) + + AfterEach(func() { + cancel() + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) + }) + + It("shouldn't remove existing operands", func() { + Expect(k8sClient.Create(ctx, &onload)).Should(Succeed()) + + Expect(ctrl.SetControllerReference(&onload, &devicePlugin, testEnv.Scheme)).Should(Succeed()) + Expect(k8sClient.Create(ctx, &devicePlugin)).Should(Succeed()) + + startReconciler(k8sManager, ctx) + + // Create a new object so we can compare the old and new versions of the + // object + newDevicePlugin := appsv1.DaemonSet{} + + Eventually(func() bool { + err := k8sClient.Get(ctx, devicePluginName, &newDevicePlugin) + return err == nil + }, timeout, pollingInterval).Should(BeTrue()) + + Expect(newDevicePlugin.UID).To(Equal(devicePlugin.UID)) + }) + + // These test are restricted to testing the upgrading of the operands. Tests + // that use Nodes are in a subsequent section. + It("should continue upgrade process (device plugin)", func() { + + // Fake the upgrade, the device plugin is still using the old values + onload.Spec.Onload.Version = "new" + onload.Spec.Onload.UserImage = "image:tag-newer" + + Expect(onload.Spec.Onload.Version).ShouldNot(Equal(devicePlugin.Labels[onloadVersionLabel])) + + Expect(k8sClient.Create(ctx, &onload)).Should(Succeed()) + + Expect(ctrl.SetControllerReference(&onload, &devicePlugin, testEnv.Scheme)).Should(Succeed()) + Expect(k8sClient.Create(ctx, &devicePlugin)).Should(Succeed()) + + startReconciler(k8sManager, ctx) + + // Create a new object so we can compare the old and new versions of the + // object + newDevicePlugin := appsv1.DaemonSet{} + + Eventually(func(g Gomega) { + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(&devicePlugin), &newDevicePlugin)).Should(Succeed()) + g.Expect(newDevicePlugin.UID).To(Equal(devicePlugin.UID)) + g.Expect(newDevicePlugin.Labels[onloadVersionLabel]).ShouldNot(Equal(devicePlugin.Labels[onloadVersionLabel])) + g.Expect(newDevicePlugin.Labels[onloadVersionLabel]).Should(Equal(onload.Spec.Onload.Version)) + }, timeout, pollingInterval).Should(Succeed()) + + }) + + It("should continue upgrade process (module)", func() { + // Fake the upgrade, the operand is still using the old values + onload.Spec.Onload.Version = "new" + onload.Spec.Onload.UserImage = "image:tag-newer" + + Expect(onload.Spec.Onload.Version).ShouldNot(Equal(module.Spec.ModuleLoader.Container.Version)) + + Expect(k8sClient.Create(ctx, &onload)).Should(Succeed()) + + Expect(ctrl.SetControllerReference(&onload, &module, testEnv.Scheme)).Should(Succeed()) + Expect(k8sClient.Create(ctx, &module)).Should(Succeed()) + + startReconciler(k8sManager, ctx) + + // Create a new object so we can compare the old and new versions of the + // object + newModule := kmm.Module{} + + Eventually(func(g Gomega) { + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(&module), &newModule)).Should(Succeed()) + g.Expect(newModule.UID).To(Equal(module.UID)) + g.Expect(newModule.Spec.ModuleLoader.Container.Version).ShouldNot(Equal(module.Spec.ModuleLoader.Container.Version)) + g.Expect(newModule.Spec.ModuleLoader.Container.Version).Should(Equal(onload.Spec.Onload.Version)) + }, timeout, pollingInterval).Should(Succeed()) + + }) + + Describe("Node-based tests", func() { + + var ( + node corev1.Node + kmmOnloadLabel string + dpOnloadLabel string + ) + + BeforeEach(func() { + node = corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node", + Labels: map[string]string{ + "key": "value", + }, + }, + } + + kmmOnloadLabel = kmmOnloadLabelName(onload.Name, onload.Namespace) + dpOnloadLabel = onloadLabelName(onload.Name, onload.Namespace) + }) + + It("should add the initial labels", func() { + + Expect(k8sClient.Create(ctx, &node)).Should(Succeed()) + + Expect(k8sClient.Create(ctx, &onload)).Should(Succeed()) + + startReconciler(k8sManager, ctx) + + newNode := corev1.Node{} + Eventually(func(g Gomega) { + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(&node), &newNode)).Should(Succeed()) + g.Expect(newNode.Labels[kmmOnloadLabel]).Should(Equal(onload.Spec.Onload.Version)) + g.Expect(newNode.Labels[dpOnloadLabel]).Should(Equal(onload.Spec.Onload.Version)) + }, timeout, pollingInterval).Should(Succeed()) + }) + + // Want to test 3 different values for each label/version: unset, old and new + DescribeTable("controller starting at different points during upgrade", + func(label1, label2 string) { + if label1 != "" { + node.Labels[kmmOnloadLabel] = label1 + } + + if label2 != "" { + node.Labels[dpOnloadLabel] = label2 + } + + Expect(k8sClient.Create(ctx, &node)).Should(Succeed()) + + onload.Spec.Onload.Version = "new" + onload.Spec.Onload.UserImage = "image:tag-newer" + Expect(k8sClient.Create(ctx, &onload)).Should(Succeed()) + + if label1 != "" { + module.Spec.ModuleLoader.Container.Version = label1 + } + Expect(ctrl.SetControllerReference(&onload, &module, testEnv.Scheme)).Should(Succeed()) + + if label2 != "" { + devicePlugin.Labels[onloadVersionLabel] = label2 + } + Expect(ctrl.SetControllerReference(&onload, &devicePlugin, testEnv.Scheme)).Should(Succeed()) + + Expect(k8sClient.Create(ctx, &module)).Should(Succeed()) + Expect(k8sClient.Create(ctx, &devicePlugin)).Should(Succeed()) + + startReconciler(k8sManager, ctx) + + newNode := corev1.Node{} + Eventually(func(g Gomega) { + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(&node), &newNode)).Should(Succeed()) + g.Expect(newNode.Labels[kmmOnloadLabel]).Should(Equal(onload.Spec.Onload.Version)) + g.Expect(newNode.Labels[dpOnloadLabel]).Should(Equal(onload.Spec.Onload.Version)) + }, timeout, pollingInterval).Should(Succeed()) + }, + Entry("00", "", ""), + Entry("01", "", "old"), // I don't think this should be able to happen + Entry("02", "", "new"), + Entry("10", "old", ""), + Entry("11", "old", "old"), + Entry("12", "old", "new"), + Entry("20", "new", ""), + Entry("21", "new", "old"), // I don't think this should be able to happen + Entry("22", "new", "new"), + ) + }) + +}) diff --git a/controllers/suite_test.go b/controllers/suite_test.go index 0cc3e8c7..25dcc406 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -15,11 +15,11 @@ import ( "k8s.io/apimachinery/pkg/util/rand" "k8s.io/client-go/kubernetes/scheme" - "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/envtest" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/manager" @@ -32,7 +32,6 @@ import ( // These tests use Ginkgo (BDD-style Go testing framework). Refer to // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. -var cfg *rest.Config var k8sClient client.Client var testEnv *envtest.Environment @@ -47,9 +46,9 @@ var ( cancel context.CancelFunc ) -var _ = BeforeSuite(func() { - logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) - ctx, cancel = context.WithCancel(context.TODO()) +func createEnvTestCluster() (context.Context, context.CancelFunc, + *envtest.Environment, client.Client, manager.Manager) { + ctx, cancel := context.WithCancel(context.TODO()) By("bootstrapping test environment") testEnv = &envtest.Environment{ @@ -67,7 +66,7 @@ var _ = BeforeSuite(func() { var err error // cfg is defined in this file globally. - cfg, err = testEnv.Start() + cfg, err := testEnv.Start() Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) @@ -79,15 +78,22 @@ var _ = BeforeSuite(func() { //+kubebuilder:scaffold:scheme - k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + k8sClient, err := client.New(cfg, client.Options{Scheme: scheme.Scheme}) Expect(err).NotTo(HaveOccurred()) Expect(k8sClient).NotTo(BeNil()) - k8sManager, err := manager.New(cfg, manager.Options{Scheme: scheme.Scheme}) + k8sManager, err := manager.New(cfg, manager.Options{ + Scheme: scheme.Scheme, + Metrics: server.Options{BindAddress: "0"}, + }) Expect(err).NotTo(HaveOccurred()) Expect(k8sManager).NotTo(BeNil()) - err = (&OnloadReconciler{ + return ctx, cancel, testEnv, k8sClient, k8sManager +} + +func startReconciler(k8sManager manager.Manager, ctx context.Context) { + err := (&OnloadReconciler{ Client: k8sManager.GetClient(), Scheme: k8sManager.GetScheme(), }).SetupWithManager(k8sManager) @@ -95,9 +101,18 @@ var _ = BeforeSuite(func() { go func() { defer GinkgoRecover() - err = k8sManager.Start(ctx) + err := k8sManager.Start(ctx) Expect(err).ToNot(HaveOccurred(), "failed to run manager") }() +} + +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + var k8sManager manager.Manager + ctx, cancel, testEnv, k8sClient, k8sManager = createEnvTestCluster() + + startReconciler(k8sManager, ctx) // k8s.io/apimachinery/pkg/util/rand is only used to generate unique // namesapces that tests will run in. A fixed seed is used to make this more