diff --git a/Makefile b/Makefile index 8e1e6ba3f19f..9c5fd12a5574 100644 --- a/Makefile +++ b/Makefile @@ -615,6 +615,7 @@ generate-e2e-templates-main: $(KUSTOMIZE) $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-ipv6 --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-ipv6.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-topology-dualstack-ipv6-primary --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-topology-dualstack-ipv6-primary.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-topology-dualstack-ipv4-primary --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-topology-dualstack-ipv4-primary.yaml + $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-topology-in-place --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-topology-in-place.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-topology-no-workers --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-topology-no-workers.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-topology-runtimesdk-v1beta1 --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-topology-runtimesdk-v1beta1.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-topology-kcp-only --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-topology-kcp-only.yaml diff --git a/_manual-testing/cluster-quick-start.yaml b/_manual-testing/cluster-quick-start.yaml new file mode 100644 index 000000000000..c86b14ee53eb --- /dev/null +++ b/_manual-testing/cluster-quick-start.yaml @@ -0,0 +1,37 @@ +apiVersion: cluster.x-k8s.io/v1beta2 +kind: Cluster +metadata: + name: quick-start + namespace: default +spec: + clusterNetwork: + services: + cidrBlocks: ["10.128.0.0/12"] + pods: + cidrBlocks: ["192.168.0.0/16"] + serviceDomain: "cluster.local" + topology: + classRef: + name: quick-start + controlPlane: + replicas: 3 + variables: + - name: imageRepository + value: "" + - name: etcdImageTag + value: "3.6.4-0" + - name: coreDNSImageTag + value: "" + - name: podSecurityStandard + value: + enabled: true + enforce: "baseline" + audit: "restricted" + warn: "restricted" + version: v1.33.4 + workers: + machineDeployments: + - class: default-worker + name: md-0 + replicas: 1 +--- diff --git a/_manual-testing/clusterclass-quick-start.yaml b/_manual-testing/clusterclass-quick-start.yaml new file mode 100644 index 000000000000..600439c9c9b0 --- /dev/null +++ b/_manual-testing/clusterclass-quick-start.yaml @@ -0,0 +1,323 @@ +apiVersion: cluster.x-k8s.io/v1beta2 +kind: ClusterClass +metadata: + name: quick-start +spec: + controlPlane: + templateRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta2 + kind: KubeadmControlPlaneTemplate + name: quick-start-control-plane + machineInfrastructure: + templateRef: + kind: DockerMachineTemplate + apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 + name: quick-start-control-plane + healthCheck: + checks: + unhealthyNodeConditions: + - type: Ready + status: Unknown + timeoutSeconds: 300 + - type: Ready + status: "False" + timeoutSeconds: 300 + infrastructure: + templateRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 + kind: DockerClusterTemplate + name: quick-start-cluster + workers: + machineDeployments: + - class: default-worker + bootstrap: + templateRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 + kind: KubeadmConfigTemplate + name: quick-start-default-worker-bootstraptemplate + infrastructure: + templateRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 + kind: DockerMachineTemplate + name: quick-start-default-worker-machinetemplate + healthCheck: + checks: + unhealthyNodeConditions: + - type: Ready + status: Unknown + timeoutSeconds: 300 + - type: Ready + status: "False" + timeoutSeconds: 300 + machinePools: + - class: default-worker + bootstrap: + templateRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 + kind: KubeadmConfigTemplate + name: quick-start-default-worker-bootstraptemplate + infrastructure: + templateRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 + kind: DockerMachinePoolTemplate + name: quick-start-default-worker-machinepooltemplate + variables: + - name: imageRepository + required: true + schema: + openAPIV3Schema: + type: string + default: "" + example: "registry.k8s.io" + description: "imageRepository sets the container registry to pull images from. If empty, nothing will be set and the from of kubeadm will be used." + - name: etcdImageTag + required: true + schema: + openAPIV3Schema: + type: string + default: "" + example: "3.5.3-0" + description: "etcdImageTag sets the tag for the etcd image." + - name: coreDNSImageTag + required: true + schema: + openAPIV3Schema: + type: string + default: "" + example: "v1.8.5" + description: "coreDNSImageTag sets the tag for the coreDNS image." + - name: podSecurityStandard + required: false + schema: + openAPIV3Schema: + type: object + properties: + enabled: + type: boolean + default: true + description: "enabled enables the patches to enable Pod Security Standard via AdmissionConfiguration." + enforce: + type: string + default: "baseline" + description: "enforce sets the level for the enforce PodSecurityConfiguration mode. One of privileged, baseline, restricted." + audit: + type: string + default: "restricted" + description: "audit sets the level for the audit PodSecurityConfiguration mode. One of privileged, baseline, restricted." + warn: + type: string + default: "restricted" + description: "warn sets the level for the warn PodSecurityConfiguration mode. One of privileged, baseline, restricted." + patches: + - name: imageRepository + description: "Sets the imageRepository used for the KubeadmControlPlane." + enabledIf: '{{ ne .imageRepository "" }}' + definitions: + - selector: + apiVersion: controlplane.cluster.x-k8s.io/v1beta2 + kind: KubeadmControlPlaneTemplate + matchResources: + controlPlane: true + jsonPatches: + - op: add + path: "/spec/template/spec/kubeadmConfigSpec/clusterConfiguration/imageRepository" + valueFrom: + variable: imageRepository + - name: etcdImageTag + enabledIf: '{{ ne .etcdImageTag "" }}' + description: "Sets tag to use for the etcd image in the KubeadmControlPlane." + definitions: + - selector: + apiVersion: controlplane.cluster.x-k8s.io/v1beta2 + kind: KubeadmControlPlaneTemplate + matchResources: + controlPlane: true + jsonPatches: + - op: add + path: "/spec/template/spec/kubeadmConfigSpec/clusterConfiguration/etcd" + valueFrom: + template: | + local: + imageTag: {{ .etcdImageTag }} + - name: coreDNSImageTag + enabledIf: '{{ ne .coreDNSImageTag "" }}' + description: "Sets tag to use for the etcd image in the KubeadmControlPlane." + definitions: + - selector: + apiVersion: controlplane.cluster.x-k8s.io/v1beta2 + kind: KubeadmControlPlaneTemplate + matchResources: + controlPlane: true + jsonPatches: + - op: add + path: "/spec/template/spec/kubeadmConfigSpec/clusterConfiguration/dns" + valueFrom: + template: | + imageTag: {{ .coreDNSImageTag }} + - name: customImage + description: "Sets the container image that is used for running dockerMachines for the controlPlane and default-worker machineDeployments." + definitions: + - selector: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 + kind: DockerMachineTemplate + matchResources: + machineDeploymentClass: + names: + - default-worker + jsonPatches: + - op: add + path: "/spec/template/spec/customImage" + valueFrom: + template: | + kindest/node:{{ .builtin.machineDeployment.version | replace "+" "_" }} + - selector: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 + kind: DockerMachinePoolTemplate + matchResources: + machinePoolClass: + names: + - default-worker + jsonPatches: + - op: add + path: "/spec/template/spec/template/customImage" + valueFrom: + template: | + kindest/node:{{ .builtin.machinePool.version | replace "+" "_" }} + - selector: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 + kind: DockerMachineTemplate + matchResources: + controlPlane: true + jsonPatches: + - op: add + path: "/spec/template/spec/customImage" + valueFrom: + template: | + kindest/node:{{ .builtin.controlPlane.version | replace "+" "_" }} + - name: podSecurityStandard + description: "Adds an admission configuration for PodSecurity to the kube-apiserver." + definitions: + - selector: + apiVersion: controlplane.cluster.x-k8s.io/v1beta2 + kind: KubeadmControlPlaneTemplate + matchResources: + controlPlane: true + jsonPatches: + - op: add + path: "/spec/template/spec/kubeadmConfigSpec/clusterConfiguration/apiServer/extraArgs" + value: + - name: admission-control-config-file + value: "/etc/kubernetes/kube-apiserver-admission-pss.yaml" + - op: add + path: "/spec/template/spec/kubeadmConfigSpec/clusterConfiguration/apiServer/extraVolumes" + value: + - name: admission-pss + hostPath: /etc/kubernetes/kube-apiserver-admission-pss.yaml + mountPath: /etc/kubernetes/kube-apiserver-admission-pss.yaml + readOnly: true + pathType: "File" + - op: add + path: "/spec/template/spec/kubeadmConfigSpec/files" + valueFrom: + template: | + - content: | + apiVersion: apiserver.config.k8s.io/v1 + kind: AdmissionConfiguration + plugins: + - name: PodSecurity + configuration: + apiVersion: pod-security.admission.config.k8s.io/v1{{ if semverCompare "< v1.25-0" .builtin.controlPlane.version }}beta1{{ end }} + kind: PodSecurityConfiguration + defaults: + enforce: "{{ .podSecurityStandard.enforce }}" + enforce-version: "latest" + audit: "{{ .podSecurityStandard.audit }}" + audit-version: "latest" + warn: "{{ .podSecurityStandard.warn }}" + warn-version: "latest" + exemptions: + usernames: [] + runtimeClasses: [] + namespaces: [kube-system] + path: /etc/kubernetes/kube-apiserver-admission-pss.yaml + enabledIf: "{{ .podSecurityStandard.enabled }}" +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 +kind: DockerClusterTemplate +metadata: + name: quick-start-cluster +spec: + template: + spec: {} +--- +kind: KubeadmControlPlaneTemplate +apiVersion: controlplane.cluster.x-k8s.io/v1beta2 +metadata: + name: quick-start-control-plane +spec: + template: + spec: + rollout: + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 0 + kubeadmConfigSpec: + clusterConfiguration: + apiServer: + # host.docker.internal is required by kubetest when running on MacOS because of the way ports are proxied. + certSANs: [localhost, 127.0.0.1, 0.0.0.0, host.docker.internal] + initConfiguration: + nodeRegistration: # node registration parameters are automatically injected by CAPD according to the kindest/node image in use. + kubeletExtraArgs: # having a not empty kubeletExtraArgs is required for the externalCloudProvider patch to work + - name: eviction-hard + value: 'nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0%' + joinConfiguration: + nodeRegistration: # node registration parameters are automatically injected by CAPD according to the kindest/node image in use. + kubeletExtraArgs: # having a not empty kubeletExtraArgs is required for the externalCloudProvider patch to work + - name: eviction-hard + value: 'nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0%' +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 +kind: DockerMachineTemplate +metadata: + name: quick-start-control-plane +spec: + template: + spec: + extraMounts: + - containerPath: "/var/run/docker.sock" + hostPath: "/var/run/docker.sock" +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 +kind: DockerMachineTemplate +metadata: + name: quick-start-default-worker-machinetemplate +spec: + template: + spec: + extraMounts: + - containerPath: "/var/run/docker.sock" + hostPath: "/var/run/docker.sock" +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 +kind: DockerMachinePoolTemplate +metadata: + name: quick-start-default-worker-machinepooltemplate +spec: + template: + spec: + template: {} +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 +kind: KubeadmConfigTemplate +metadata: + name: quick-start-default-worker-bootstraptemplate +spec: + template: + spec: + joinConfiguration: + nodeRegistration: # node registration parameters are automatically injected by CAPD according to the kindest/node image in use. + kubeletExtraArgs: # having a not empty kubeletExtraArgs is required for the externalCloudProvider to work + - name: eviction-hard + value: 'nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0%' diff --git a/api/core/v1beta2/machine_phase_types.go b/api/core/v1beta2/machine_phase_types.go index 1ca955156444..b849ea61d654 100644 --- a/api/core/v1beta2/machine_phase_types.go +++ b/api/core/v1beta2/machine_phase_types.go @@ -45,6 +45,10 @@ const ( // become a Kubernetes Node in a Ready state. MachinePhaseRunning = MachinePhase("Running") + // MachinePhaseUpdating is the Machine state when the Machine + // is updating. + MachinePhaseUpdating = MachinePhase("Updating") + // MachinePhaseDeleting is the Machine state when a delete // request has been sent to the API Server, // but its infrastructure has not yet been fully deleted. diff --git a/api/core/v1beta2/machine_types.go b/api/core/v1beta2/machine_types.go index 18d97b42a911..f8b7b3d56544 100644 --- a/api/core/v1beta2/machine_types.go +++ b/api/core/v1beta2/machine_types.go @@ -162,6 +162,28 @@ const ( // MachineNotUpToDateReason surface when a Machine spec does not match the spec of the Machine's owner resource, e.g. KubeadmControlPlane or MachineDeployment. MachineNotUpToDateReason = "NotUpToDate" + + // MachineUpToDateUpdatingReason surface when a Machine spec matches the spec of the Machine's owner resource, + // but the Machine is still updating. + MachineUpToDateUpdatingReason = "Updating" +) + +// Machine's Updating condition and corresponding reasons. +// Note: Updating condition is set by the Machine controller during in-place updates. +const ( + // MachineUpdatingCondition is true while an in-place update is in progress on the Machine. + // The condition is owned by the Machine controller and is used to track the progress of in-place updates. + // This condition is considered when computing the UpToDate condition. + MachineUpdatingCondition = "Updating" + + // MachineNotUpdatingReason surfaces when the Machine is not performing an in-place update. + MachineNotUpdatingReason = "NotUpdating" + + // MachineInPlaceUpdatingReason surfaces when the Machine is waiting for in-place update to complete. + MachineInPlaceUpdatingReason = "InPlaceUpdating" + + // MachineInPlaceUpdateFailedReason surfaces when the in-place update has failed. + MachineInPlaceUpdateFailedReason = "InPlaceUpdateFailed" ) // Machine's BootstrapConfigReady condition and corresponding reasons. @@ -552,7 +574,7 @@ type MachineStatus struct { // phase represents the current phase of machine actuation. // +optional - // +kubebuilder:validation:Enum=Pending;Provisioning;Provisioned;Running;Deleting;Deleted;Failed;Unknown + // +kubebuilder:validation:Enum=Pending;Provisioning;Provisioned;Running;Updating;Deleting;Deleted;Failed;Unknown Phase string `json:"phase,omitempty"` // certificatesExpiryDate is the expiry date of the machine certificates. @@ -710,6 +732,7 @@ func (m *MachineStatus) GetTypedPhase() MachinePhase { MachinePhaseProvisioning, MachinePhaseProvisioned, MachinePhaseRunning, + MachinePhaseUpdating, MachinePhaseDeleting, MachinePhaseDeleted, MachinePhaseFailed: diff --git a/config/crd/bases/cluster.x-k8s.io_machines.yaml b/config/crd/bases/cluster.x-k8s.io_machines.yaml index 12f910515b07..bc29bfcfe003 100644 --- a/config/crd/bases/cluster.x-k8s.io_machines.yaml +++ b/config/crd/bases/cluster.x-k8s.io_machines.yaml @@ -2122,6 +2122,7 @@ spec: - Provisioning - Provisioned - Running + - Updating - Deleting - Deleted - Failed diff --git a/controllers/alias.go b/controllers/alias.go index 657b1c03477d..0bf00eb64897 100644 --- a/controllers/alias.go +++ b/controllers/alias.go @@ -72,9 +72,10 @@ func (r *ClusterReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manag // MachineReconciler reconciles a Machine object. type MachineReconciler struct { - Client client.Client - APIReader client.Reader - ClusterCache clustercache.ClusterCache + Client client.Client + APIReader client.Reader + ClusterCache clustercache.ClusterCache + RuntimeClient runtimeclient.Client // WatchFilterValue is the label value used to filter events prior to reconciliation. WatchFilterValue string @@ -90,6 +91,7 @@ func (r *MachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manag Client: r.Client, APIReader: r.APIReader, ClusterCache: r.ClusterCache, + RuntimeClient: r.RuntimeClient, WatchFilterValue: r.WatchFilterValue, RemoteConditionsGracePeriod: r.RemoteConditionsGracePeriod, AdditionalSyncMachineLabels: r.AdditionalSyncMachineLabels, diff --git a/controlplane/kubeadm/internal/controllers/controller.go b/controlplane/kubeadm/internal/controllers/controller.go index f48e64045c67..7a258b9c6cf5 100644 --- a/controlplane/kubeadm/internal/controllers/controller.go +++ b/controlplane/kubeadm/internal/controllers/controller.go @@ -1045,6 +1045,15 @@ func reconcileMachineUpToDateCondition(_ context.Context, controlPlane *internal }) continue } + if _, ok := machine.Annotations[clusterv1.UpdateInProgressAnnotation]; ok { // Note: This should probably *also* use the Updating condition (but also already set condition to false if only this annotation is set) + conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineUpToDateCondition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineUpToDateUpdatingReason, + }) + continue + } + conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineUpToDateCondition, Status: metav1.ConditionTrue, diff --git a/exp/topology/desiredstate/lifecycle_hooks.go b/exp/topology/desiredstate/lifecycle_hooks.go index a116f8188a61..8eeb6bc9001b 100644 --- a/exp/topology/desiredstate/lifecycle_hooks.go +++ b/exp/topology/desiredstate/lifecycle_hooks.go @@ -174,7 +174,7 @@ func (g *generator) callAfterControlPlaneUpgradeHook(ctx context.Context, s *sco s.HookResponseTracker.Add(runtimehooksv1.AfterControlPlaneUpgrade, hookResponse) if hookResponse.RetryAfterSeconds != 0 { - log.Info(fmt.Sprintf("Cluster Upgrade is blocked after control plane upgrade to version %s by %s hook", *currentVersion, runtimecatalog.HookName(runtimehooksv1.AfterControlPlaneUpgrade)), + log.Info(fmt.Sprintf("Cluster upgrade is blocked after control plane upgrade to version %s by %s hook", *currentVersion, runtimecatalog.HookName(runtimehooksv1.AfterControlPlaneUpgrade)), "ControlPlaneUpgrades", hookRequest.ControlPlaneUpgrades, "WorkersUpgrades", hookRequest.WorkersUpgrades, ) diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index 428ed8195fad..8cf90e54a063 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -52,6 +52,7 @@ import ( "sigs.k8s.io/cluster-api/controllers/clustercache" "sigs.k8s.io/cluster-api/controllers/external" "sigs.k8s.io/cluster-api/controllers/noderefutil" + runtimeclient "sigs.k8s.io/cluster-api/exp/runtime/client" "sigs.k8s.io/cluster-api/feature" "sigs.k8s.io/cluster-api/internal/contract" "sigs.k8s.io/cluster-api/internal/controllers/machine/drain" @@ -93,9 +94,10 @@ var ( // Reconciler reconciles a Machine object. type Reconciler struct { - Client client.Client - APIReader client.Reader - ClusterCache clustercache.ClusterCache + Client client.Client + APIReader client.Reader + ClusterCache clustercache.ClusterCache + RuntimeClient runtimeclient.Client // WatchFilterValue is the label value used to filter events prior to reconciliation. WatchFilterValue string @@ -129,6 +131,9 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt // to have some buffer. return errors.New("Client, APIReader and ClusterCache must not be nil and RemoteConditionsGracePeriod must not be < 2m") } + if feature.Gates.Enabled(feature.InPlaceUpdates) && r.RuntimeClient == nil { + return errors.New("RuntimeClient must not be nil when InPlaceUpdates feature gate is enabled") + } r.predicateLog = ptr.To(ctrl.LoggerFrom(ctx).WithValues("controller", "machine")) clusterToMachines, err := util.ClusterToTypedObjectsMapper(mgr.GetClient(), &clusterv1.MachineList{}, mgr.GetScheme()) @@ -282,7 +287,12 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re } // Handle normal reconciliation loop. - return doReconcile(ctx, alwaysReconcile, s) + reconcileNormal := append( + alwaysReconcile, + r.reconcileInPlaceUpdate, + ) + + return doReconcile(ctx, reconcileNormal, s) } func patchMachine(ctx context.Context, patchHelper *patch.Helper, machine *clusterv1.Machine, options ...patch.Option) error { @@ -326,6 +336,7 @@ func patchMachine(ctx context.Context, patchHelper *patch.Helper, machine *clust clusterv1.MachineNodeReadyCondition, clusterv1.MachineNodeHealthyCondition, clusterv1.MachineDeletingCondition, + clusterv1.MachineUpdatingCondition, }}, ) @@ -397,6 +408,12 @@ type scope struct { // deletingMessage is the message that should be used when setting the Deleting condition. deletingMessage string + + // updatingReason is the reason that should be used when setting the Updating condition. + updatingReason string + + // updatingMessage is the message that should be used when setting the Updating condition. + updatingMessage string } func (r *Reconciler) reconcileMachineOwnerAndLabels(_ context.Context, s *scope) (ctrl.Result, error) { diff --git a/internal/controllers/machine/machine_controller_inplace_update.go b/internal/controllers/machine/machine_controller_inplace_update.go new file mode 100644 index 000000000000..ea1733ccd359 --- /dev/null +++ b/internal/controllers/machine/machine_controller_inplace_update.go @@ -0,0 +1,267 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package machine + +import ( + "context" + "fmt" + "time" + + "github.com/pkg/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/klog/v2" + "k8s.io/utils/ptr" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/apiutil" + + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + runtimehooksv1 "sigs.k8s.io/cluster-api/api/runtime/hooks/v1alpha1" + "sigs.k8s.io/cluster-api/feature" + "sigs.k8s.io/cluster-api/internal/hooks" +) + +// reconcileInPlaceUpdate handles the in-place update workflow for a Machine. +func (r *Reconciler) reconcileInPlaceUpdate(ctx context.Context, s *scope) (ctrl.Result, error) { + if !feature.Gates.Enabled(feature.InPlaceUpdates) { + return ctrl.Result{}, nil + } + + log := ctrl.LoggerFrom(ctx) + + machineAnnotations := s.machine.GetAnnotations() + _, inPlaceUpdateInProgress := machineAnnotations[clusterv1.UpdateInProgressAnnotation] + hasUpdateMachinePending := hooks.IsPending(runtimehooksv1.UpdateMachine, s.machine) + + if !inPlaceUpdateInProgress { + // Clean up any orphaned pending hooks and annotations before exiting. + // This can happen if the in-place update annotation was removed from Machine + // but the UpdateMachine hook is still pending or annotations are still on InfraMachine/BootstrapConfig. + if hasUpdateMachinePending { + log.Info("In-place update annotation removed but UpdateMachine hook still pending, cleaning up orphaned hook and annotations") + if err := r.completeInPlaceUpdate(ctx, s); err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to clean up orphaned UpdateMachine hook and annotations") + } + } + + return ctrl.Result{}, nil + } + + // If hook is not pending, we're waiting for the owner controller to mark it as pending. + if !hasUpdateMachinePending { + log.Info("In-place update annotation is set, waiting for UpdateMachine hook to be marked as pending") + return ctrl.Result{}, nil + } + + if !ptr.Deref(s.machine.Status.Initialization.InfrastructureProvisioned, false) { + log.V(5).Info("Infrastructure not yet provisioned, skipping in-place update") + return ctrl.Result{}, nil + } + if !ptr.Deref(s.machine.Status.Initialization.BootstrapDataSecretCreated, false) { + log.V(5).Info("Bootstrap data secret not yet created, skipping in-place update") + return ctrl.Result{}, nil + } + + if s.infraMachine == nil { + s.updatingReason = clusterv1.MachineInPlaceUpdateFailedReason + s.updatingMessage = "In-place update not possible: InfraMachine not found" + return ctrl.Result{}, errors.New("in-place update failed: InfraMachine not found") + } + + infraReady := r.isInfraMachineReadyForUpdate(s) + bootstrapReady := r.isBootstrapConfigReadyForUpdate(s) + + if !infraReady || !bootstrapReady { + log.Info("Waiting for InfraMachine and BootstrapConfig to be marked for in-place update") + return ctrl.Result{}, nil + } + + result, message, err := r.callUpdateMachineHook(ctx, s) + if err != nil { + s.updatingReason = clusterv1.MachineInPlaceUpdateFailedReason + s.updatingMessage = "UpdateMachine hook failed: please check controller logs for errors" + return ctrl.Result{}, errors.Wrap(err, "in-place update failed") + } + + if result.RequeueAfter > 0 { + s.updatingReason = clusterv1.MachineInPlaceUpdatingReason + if message != "" { + s.updatingMessage = fmt.Sprintf("In-place update in progress: %s", message) + } else { + s.updatingMessage = "In-place update in progress" + } + return result, nil + } + + log.Info("In-place update completed successfully") + if err := r.completeInPlaceUpdate(ctx, s); err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to complete in-place update") + } + + return ctrl.Result{}, nil +} + +// isInfraMachineReadyForUpdate checks if the InfraMachine has the in-place update annotation. +func (r *Reconciler) isInfraMachineReadyForUpdate(s *scope) bool { + _, hasAnnotation := s.infraMachine.GetAnnotations()[clusterv1.UpdateInProgressAnnotation] + return hasAnnotation +} + +// isBootstrapConfigReadyForUpdate checks if the BootstrapConfig has the in-place update annotation. +func (r *Reconciler) isBootstrapConfigReadyForUpdate(s *scope) bool { + if s.bootstrapConfig == nil { + return true + } + _, hasAnnotation := s.bootstrapConfig.GetAnnotations()[clusterv1.UpdateInProgressAnnotation] + return hasAnnotation +} + +// callUpdateMachineHook calls the UpdateMachine runtime hook for the machine. +func (r *Reconciler) callUpdateMachineHook(ctx context.Context, s *scope) (ctrl.Result, string, error) { + log := ctrl.LoggerFrom(ctx) + + // Validate that exactly one extension is registered for the UpdateMachine hook. + // For the current iteration, we only support a single extension to ensure safe behavior. + // Support for multiple extensions will be introduced in a future iteration. + extensions, err := r.RuntimeClient.GetAllExtensions(ctx, runtimehooksv1.UpdateMachine, s.machine) + if err != nil { + return ctrl.Result{}, "", err + } + + if len(extensions) == 0 { + return ctrl.Result{}, "", errors.New("no extensions registered for UpdateMachine hook") + } + + if len(extensions) > 1 { + return ctrl.Result{}, "", errors.Errorf("multiple extensions registered for UpdateMachine hook: only one extension is supported, found %d extensions: %v", len(extensions), extensions) + } + + // Note: When building request message, dropping status; Runtime extension should treat UpdateMachine + // requests as desired state; it is up to them to compare with current state and perform necessary actions. + request := &runtimehooksv1.UpdateMachineRequest{ + Desired: runtimehooksv1.UpdateMachineRequestObjects{ + Machine: *cleanupMachine(s.machine), + InfrastructureMachine: runtime.RawExtension{Object: cleanupUnstructured(s.infraMachine)}, + }, + } + + if s.bootstrapConfig != nil { + request.Desired.BootstrapConfig = runtime.RawExtension{Object: cleanupUnstructured(s.bootstrapConfig)} + } + + response := &runtimehooksv1.UpdateMachineResponse{} + + if err := r.RuntimeClient.CallAllExtensions(ctx, runtimehooksv1.UpdateMachine, s.machine, request, response); err != nil { + return ctrl.Result{}, "", err + } + + if response.GetRetryAfterSeconds() != 0 { + log.Info(fmt.Sprintf("UpdateMachine hook requested retry after %d seconds", response.GetRetryAfterSeconds())) + return ctrl.Result{RequeueAfter: time.Duration(response.GetRetryAfterSeconds()) * time.Second}, response.GetMessage(), nil + } + + log.Info("UpdateMachine hook completed successfully") + return ctrl.Result{}, response.GetMessage(), nil +} + +// completeInPlaceUpdate removes in-place update annotations from InfraMachine, BootstrapConfig, Machine, +// and then marks the UpdateMachine hook as done (removes it from pending-hooks annotation). +func (r *Reconciler) completeInPlaceUpdate(ctx context.Context, s *scope) error { + log := ctrl.LoggerFrom(ctx) + + if err := r.removeInPlaceUpdateAnnotation(ctx, s.machine); err != nil { + return err + } + + if s.infraMachine == nil { + log.Info("InfraMachine not found during in-place update completion, skipping annotation removal") + } else { + if err := r.removeInPlaceUpdateAnnotation(ctx, s.infraMachine); err != nil { + return err + } + } + + if s.bootstrapConfig != nil { + if err := r.removeInPlaceUpdateAnnotation(ctx, s.bootstrapConfig); err != nil { + return err + } + } + + if err := hooks.MarkAsDone(ctx, r.Client, s.machine, runtimehooksv1.UpdateMachine); err != nil { + return err + } + + log.Info("In place upgrade completed!") + return nil +} + +// removeInPlaceUpdateAnnotation removes the in-place update annotation from an object and patches it immediately. +func (r *Reconciler) removeInPlaceUpdateAnnotation(ctx context.Context, obj client.Object) error { + annotations := obj.GetAnnotations() + if _, exists := annotations[clusterv1.UpdateInProgressAnnotation]; !exists { + return nil + } + + gvk, err := apiutil.GVKForObject(obj, r.Client.Scheme()) + if err != nil { + return errors.Wrapf(err, "failed to remove %s annotation from object %s", clusterv1.UpdateInProgressAnnotation, klog.KObj(obj)) + } + + orig := obj.DeepCopyObject().(client.Object) + delete(annotations, clusterv1.UpdateInProgressAnnotation) + obj.SetAnnotations(annotations) + + if err := r.Client.Patch(ctx, obj, client.MergeFrom(orig)); err != nil { + return errors.Wrapf(err, "failed to remove %s annotation from %s %s", clusterv1.UpdateInProgressAnnotation, gvk.Kind, klog.KObj(obj)) + } + + return nil +} + +func cleanupMachine(machine *clusterv1.Machine) *clusterv1.Machine { + return &clusterv1.Machine{ + // Set GVK because object is later marshalled with json.Marshal when the hook request is sent. + TypeMeta: metav1.TypeMeta{ + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Machine", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: machine.Name, + Namespace: machine.Namespace, + Labels: machine.Labels, + Annotations: machine.Annotations, + }, + Spec: *machine.Spec.DeepCopy(), + } +} + +func cleanupUnstructured(u *unstructured.Unstructured) *unstructured.Unstructured { + cleanedUpU := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": u.GetAPIVersion(), + "kind": u.GetKind(), + "spec": u.Object["spec"], + }, + } + cleanedUpU.SetName(u.GetName()) + cleanedUpU.SetNamespace(u.GetNamespace()) + cleanedUpU.SetLabels(u.GetLabels()) + cleanedUpU.SetAnnotations(u.GetAnnotations()) + return cleanedUpU +} diff --git a/internal/controllers/machine/machine_controller_inplace_update_test.go b/internal/controllers/machine/machine_controller_inplace_update_test.go new file mode 100644 index 000000000000..05e2a55d9898 --- /dev/null +++ b/internal/controllers/machine/machine_controller_inplace_update_test.go @@ -0,0 +1,636 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package machine + +import ( + "context" + "testing" + "time" + + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + utilfeature "k8s.io/component-base/featuregate/testing" + "k8s.io/utils/ptr" + ctrl "sigs.k8s.io/controller-runtime" + ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + runtimehooksv1 "sigs.k8s.io/cluster-api/api/runtime/hooks/v1alpha1" + runtimev1 "sigs.k8s.io/cluster-api/api/runtime/v1beta2" + runtimecatalog "sigs.k8s.io/cluster-api/exp/runtime/catalog" + "sigs.k8s.io/cluster-api/feature" + fakeruntimeclient "sigs.k8s.io/cluster-api/internal/runtime/client/fake" +) + +func TestReconcileInPlaceUpdate(t *testing.T) { + tests := []struct { + name string + featureEnabled bool + setup func(*testing.T) (*Reconciler, *scope) + wantResult ctrl.Result + wantErr bool + wantErrContains string + wantReason string + wantMessage string + verify func(*testing.T, *WithT, context.Context, *Reconciler, *scope) + }{ + { + name: "feature gate disabled returns immediately", + featureEnabled: false, + setup: func(t *testing.T) (*Reconciler, *scope) { + t.Helper() + return &Reconciler{}, &scope{machine: newTestMachine()} + }, + wantResult: ctrl.Result{}, + }, + { + name: "cleans up orphaned hook and annotations", + featureEnabled: true, + setup: func(t *testing.T) (*Reconciler, *scope) { + t.Helper() + + scheme := runtime.NewScheme() + if err := clusterv1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add clusterv1 to scheme: %v", err) + } + + machine := newTestMachine() + machine.Annotations[runtimev1.PendingHooksAnnotation] = runtimecatalog.HookName(runtimehooksv1.UpdateMachine) + + infra := newTestUnstructured("GenericInfrastructureMachine", "infrastructure.cluster.x-k8s.io/v1beta2", "infra") + infra.SetAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: ""}) + + bootstrap := newTestUnstructured("GenericBootstrapConfig", "bootstrap.cluster.x-k8s.io/v1beta2", "bootstrap") + bootstrap.SetAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: ""}) + + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(machine, infra, bootstrap).Build() + + return &Reconciler{Client: client}, &scope{ + machine: machine, + infraMachine: infra, + bootstrapConfig: bootstrap, + } + }, + wantResult: ctrl.Result{}, + verify: func(t *testing.T, g *WithT, ctx context.Context, r *Reconciler, s *scope) { + t.Helper() + + updatedMachine := &clusterv1.Machine{} + g.Expect(r.Client.Get(ctx, ctrlclient.ObjectKeyFromObject(s.machine), updatedMachine)).To(Succeed()) + g.Expect(updatedMachine.Annotations).ToNot(HaveKey(runtimev1.PendingHooksAnnotation)) + + updatedInfra := &unstructured.Unstructured{} + updatedInfra.SetGroupVersionKind(s.infraMachine.GroupVersionKind()) + g.Expect(r.Client.Get(ctx, ctrlclient.ObjectKeyFromObject(s.infraMachine), updatedInfra)).To(Succeed()) + g.Expect(updatedInfra.GetAnnotations()).ToNot(HaveKey(clusterv1.UpdateInProgressAnnotation)) + + if s.bootstrapConfig != nil { + updatedBootstrap := &unstructured.Unstructured{} + updatedBootstrap.SetGroupVersionKind(s.bootstrapConfig.GroupVersionKind()) + g.Expect(r.Client.Get(ctx, ctrlclient.ObjectKeyFromObject(s.bootstrapConfig), updatedBootstrap)).To(Succeed()) + g.Expect(updatedBootstrap.GetAnnotations()).ToNot(HaveKey(clusterv1.UpdateInProgressAnnotation)) + } + }, + }, + { + name: "waits for pending hook to be marked", + featureEnabled: true, + setup: func(t *testing.T) (*Reconciler, *scope) { + t.Helper() + machine := newTestMachine() + machine.Annotations[clusterv1.UpdateInProgressAnnotation] = "" + return &Reconciler{}, &scope{machine: machine} + }, + wantResult: ctrl.Result{}, + }, + { + name: "fails when infra machine is missing", + featureEnabled: true, + setup: func(t *testing.T) (*Reconciler, *scope) { + t.Helper() + machine := newTestMachine() + machine.Annotations[clusterv1.UpdateInProgressAnnotation] = "" + machine.Annotations[runtimev1.PendingHooksAnnotation] = runtimecatalog.HookName(runtimehooksv1.UpdateMachine) + machine.Status.Initialization.InfrastructureProvisioned = ptr.To(true) + machine.Status.Initialization.BootstrapDataSecretCreated = ptr.To(true) + return &Reconciler{}, &scope{machine: machine} + }, + wantResult: ctrl.Result{}, + wantErr: true, + wantErrContains: "InfraMachine not found", + wantReason: clusterv1.MachineInPlaceUpdateFailedReason, + wantMessage: "In-place update not possible: InfraMachine not found", + }, + { + name: "requeues while UpdateMachine hook is in progress", + featureEnabled: true, + setup: func(t *testing.T) (*Reconciler, *scope) { + t.Helper() + + catalog := runtimecatalog.New() + if err := runtimehooksv1.AddToCatalog(catalog); err != nil { + t.Fatalf("failed to add hooks to catalog: %v", err) + } + updateGVH, err := catalog.GroupVersionHook(runtimehooksv1.UpdateMachine) + if err != nil { + t.Fatalf("failed to look up UpdateMachine hook: %v", err) + } + + runtimeClient := fakeruntimeclient.NewRuntimeClientBuilder(). + WithCatalog(catalog). + WithGetAllExtensionResponses(map[runtimecatalog.GroupVersionHook][]string{ + updateGVH: {"test-extension"}, + }). + WithCallAllExtensionResponses(map[runtimecatalog.GroupVersionHook]runtimehooksv1.ResponseObject{ + updateGVH: &runtimehooksv1.UpdateMachineResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + CommonResponse: runtimehooksv1.CommonResponse{ + Status: runtimehooksv1.ResponseStatusSuccess, + Message: "processing", + }, + RetryAfterSeconds: 30, + }, + }, + }). + Build() + + scheme := runtime.NewScheme() + if err := clusterv1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add clusterv1 to scheme: %v", err) + } + + machine := newTestMachine() + machine.Annotations[clusterv1.UpdateInProgressAnnotation] = "" + machine.Annotations[runtimev1.PendingHooksAnnotation] = runtimecatalog.HookName(runtimehooksv1.UpdateMachine) + machine.Status.Initialization.InfrastructureProvisioned = ptr.To(true) + machine.Status.Initialization.BootstrapDataSecretCreated = ptr.To(true) + + infra := newTestUnstructured("GenericInfrastructureMachine", "infrastructure.cluster.x-k8s.io/v1beta2", "infra") + infra.SetAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: ""}) + + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(machine, infra).Build() + + return &Reconciler{ + Client: client, + RuntimeClient: runtimeClient, + }, &scope{ + machine: machine, + infraMachine: infra, + } + }, + wantResult: ctrl.Result{RequeueAfter: 30 * time.Second}, + wantReason: clusterv1.MachineInPlaceUpdatingReason, + wantMessage: "In-place update in progress: processing", + }, + { + name: "completes successfully and cleans annotations", + featureEnabled: true, + setup: func(t *testing.T) (*Reconciler, *scope) { + t.Helper() + + catalog := runtimecatalog.New() + if err := runtimehooksv1.AddToCatalog(catalog); err != nil { + t.Fatalf("failed to add hooks to catalog: %v", err) + } + updateGVH, err := catalog.GroupVersionHook(runtimehooksv1.UpdateMachine) + if err != nil { + t.Fatalf("failed to look up UpdateMachine hook: %v", err) + } + + runtimeClient := fakeruntimeclient.NewRuntimeClientBuilder(). + WithCatalog(catalog). + WithGetAllExtensionResponses(map[runtimecatalog.GroupVersionHook][]string{ + updateGVH: {"test-extension"}, + }). + WithCallAllExtensionResponses(map[runtimecatalog.GroupVersionHook]runtimehooksv1.ResponseObject{ + updateGVH: &runtimehooksv1.UpdateMachineResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + CommonResponse: runtimehooksv1.CommonResponse{ + Status: runtimehooksv1.ResponseStatusSuccess, + Message: "done", + }, + RetryAfterSeconds: 0, + }, + }, + }). + Build() + + scheme := runtime.NewScheme() + if err := clusterv1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add clusterv1 to scheme: %v", err) + } + + machine := newTestMachine() + machine.Annotations[clusterv1.UpdateInProgressAnnotation] = "" + machine.Annotations[runtimev1.PendingHooksAnnotation] = runtimecatalog.HookName(runtimehooksv1.UpdateMachine) + machine.Status.Initialization.InfrastructureProvisioned = ptr.To(true) + machine.Status.Initialization.BootstrapDataSecretCreated = ptr.To(true) + + infra := newTestUnstructured("GenericInfrastructureMachine", "infrastructure.cluster.x-k8s.io/v1beta2", "infra") + infra.SetAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: ""}) + + bootstrap := newTestUnstructured("GenericBootstrapConfig", "bootstrap.cluster.x-k8s.io/v1beta2", "bootstrap") + bootstrap.SetAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: ""}) + + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(machine, infra, bootstrap).Build() + + return &Reconciler{ + Client: client, + RuntimeClient: runtimeClient, + }, &scope{ + machine: machine, + infraMachine: infra, + bootstrapConfig: bootstrap, + } + }, + wantResult: ctrl.Result{}, + verify: func(t *testing.T, g *WithT, ctx context.Context, r *Reconciler, s *scope) { + t.Helper() + + updatedMachine := &clusterv1.Machine{} + g.Expect(r.Client.Get(ctx, ctrlclient.ObjectKeyFromObject(s.machine), updatedMachine)).To(Succeed()) + g.Expect(updatedMachine.Annotations).ToNot(HaveKey(clusterv1.UpdateInProgressAnnotation)) + g.Expect(updatedMachine.Annotations).ToNot(HaveKey(runtimev1.PendingHooksAnnotation)) + + updatedInfra := &unstructured.Unstructured{} + updatedInfra.SetGroupVersionKind(s.infraMachine.GroupVersionKind()) + g.Expect(r.Client.Get(ctx, ctrlclient.ObjectKeyFromObject(s.infraMachine), updatedInfra)).To(Succeed()) + g.Expect(updatedInfra.GetAnnotations()).ToNot(HaveKey(clusterv1.UpdateInProgressAnnotation)) + + if s.bootstrapConfig != nil { + updatedBootstrap := &unstructured.Unstructured{} + updatedBootstrap.SetGroupVersionKind(s.bootstrapConfig.GroupVersionKind()) + g.Expect(r.Client.Get(ctx, ctrlclient.ObjectKeyFromObject(s.bootstrapConfig), updatedBootstrap)).To(Succeed()) + g.Expect(updatedBootstrap.GetAnnotations()).ToNot(HaveKey(clusterv1.UpdateInProgressAnnotation)) + } + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.InPlaceUpdates, tt.featureEnabled) + + r, scope := tt.setup(t) + ctx := context.Background() + + result, err := r.reconcileInPlaceUpdate(ctx, scope) + + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + if tt.wantErrContains != "" { + g.Expect(err.Error()).To(ContainSubstring(tt.wantErrContains)) + } + } else { + g.Expect(err).ToNot(HaveOccurred()) + } + + g.Expect(result).To(Equal(tt.wantResult)) + + if tt.wantReason != "" { + g.Expect(scope.updatingReason).To(Equal(tt.wantReason)) + } else { + g.Expect(scope.updatingReason).To(BeEmpty()) + } + + if tt.wantMessage != "" { + g.Expect(scope.updatingMessage).To(Equal(tt.wantMessage)) + } else { + g.Expect(scope.updatingMessage).To(BeEmpty()) + } + + if tt.verify != nil { + tt.verify(t, g, ctx, r, scope) + } + }) + } +} + +func TestCallUpdateMachineHook(t *testing.T) { + catalog := runtimecatalog.New() + if err := runtimehooksv1.AddToCatalog(catalog); err != nil { + t.Fatalf("failed to add hooks to catalog: %v", err) + } + updateGVH, err := catalog.GroupVersionHook(runtimehooksv1.UpdateMachine) + if err != nil { + t.Fatalf("failed to determine UpdateMachine hook: %v", err) + } + + tests := []struct { + name string + setup func(*testing.T) (*Reconciler, *scope) + wantResult ctrl.Result + wantMessage string + wantErr bool + wantErrSubstrings []string + }{ + { + name: "fails if no extensions registered", + setup: func(t *testing.T) (*Reconciler, *scope) { + t.Helper() + runtimeClient := fakeruntimeclient.NewRuntimeClientBuilder(). + WithCatalog(catalog). + WithGetAllExtensionResponses(map[runtimecatalog.GroupVersionHook][]string{}). + Build() + return &Reconciler{RuntimeClient: runtimeClient}, &scope{machine: newTestMachine(), infraMachine: newTestUnstructured("GenericInfrastructureMachine", "infrastructure.cluster.x-k8s.io/v1beta2", "infra")} + }, + wantErr: true, + wantErrSubstrings: []string{"no extensions registered for UpdateMachine hook"}, + }, + { + name: "fails if multiple extensions registered", + setup: func(t *testing.T) (*Reconciler, *scope) { + t.Helper() + runtimeClient := fakeruntimeclient.NewRuntimeClientBuilder(). + WithCatalog(catalog). + WithGetAllExtensionResponses(map[runtimecatalog.GroupVersionHook][]string{ + updateGVH: {"ext-a", "ext-b"}, + }). + Build() + return &Reconciler{RuntimeClient: runtimeClient}, &scope{machine: newTestMachine(), infraMachine: newTestUnstructured("GenericInfrastructureMachine", "infrastructure.cluster.x-k8s.io/v1beta2", "infra")} + }, + wantErr: true, + wantErrSubstrings: []string{ + "multiple extensions registered for UpdateMachine hook", + "only one extension is supported", + "ext-a", + "ext-b", + }, + }, + { + name: "fails when hook invocation returns error", + setup: func(t *testing.T) (*Reconciler, *scope) { + t.Helper() + runtimeClient := fakeruntimeclient.NewRuntimeClientBuilder(). + WithCatalog(catalog). + WithGetAllExtensionResponses(map[runtimecatalog.GroupVersionHook][]string{ + updateGVH: {"ext"}, + }). + WithCallAllExtensionResponses(map[runtimecatalog.GroupVersionHook]runtimehooksv1.ResponseObject{ + updateGVH: &runtimehooksv1.UpdateMachineResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + CommonResponse: runtimehooksv1.CommonResponse{Status: runtimehooksv1.ResponseStatusFailure}, + }, + }, + }). + Build() + return &Reconciler{RuntimeClient: runtimeClient}, &scope{machine: newTestMachine(), infraMachine: newTestUnstructured("GenericInfrastructureMachine", "infrastructure.cluster.x-k8s.io/v1beta2", "infra")} + }, + wantErr: true, + wantErrSubstrings: []string{"runtime hook", "UpdateMachine", "failed"}, + }, + { + name: "returns requeue when hook succeeds with retry", + setup: func(t *testing.T) (*Reconciler, *scope) { + t.Helper() + runtimeClient := fakeruntimeclient.NewRuntimeClientBuilder(). + WithCatalog(catalog). + WithGetAllExtensionResponses(map[runtimecatalog.GroupVersionHook][]string{ + updateGVH: {"ext"}, + }). + WithCallAllExtensionResponses(map[runtimecatalog.GroupVersionHook]runtimehooksv1.ResponseObject{ + updateGVH: &runtimehooksv1.UpdateMachineResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + CommonResponse: runtimehooksv1.CommonResponse{ + Status: runtimehooksv1.ResponseStatusSuccess, + Message: "processing", + }, + RetryAfterSeconds: 45, + }, + }, + }). + Build() + return &Reconciler{RuntimeClient: runtimeClient}, &scope{machine: newTestMachine(), infraMachine: newTestUnstructured("GenericInfrastructureMachine", "infrastructure.cluster.x-k8s.io/v1beta2", "infra")} + }, + wantResult: ctrl.Result{RequeueAfter: 45 * time.Second}, + wantMessage: "processing", + }, + { + name: "returns message when hook succeeds", + setup: func(t *testing.T) (*Reconciler, *scope) { + t.Helper() + runtimeClient := fakeruntimeclient.NewRuntimeClientBuilder(). + WithCatalog(catalog). + WithGetAllExtensionResponses(map[runtimecatalog.GroupVersionHook][]string{ + updateGVH: {"ext"}, + }). + WithCallAllExtensionResponses(map[runtimecatalog.GroupVersionHook]runtimehooksv1.ResponseObject{ + updateGVH: &runtimehooksv1.UpdateMachineResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + CommonResponse: runtimehooksv1.CommonResponse{ + Status: runtimehooksv1.ResponseStatusSuccess, + Message: "done", + }, + }, + }, + }). + Build() + return &Reconciler{RuntimeClient: runtimeClient}, &scope{machine: newTestMachine(), infraMachine: newTestUnstructured("GenericInfrastructureMachine", "infrastructure.cluster.x-k8s.io/v1beta2", "infra")} + }, + wantResult: ctrl.Result{}, + wantMessage: "done", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + r, scope := tt.setup(t) + result, message, err := r.callUpdateMachineHook(context.Background(), scope) + + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + for _, substr := range tt.wantErrSubstrings { + g.Expect(err.Error()).To(ContainSubstring(substr)) + } + return + } + + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result).To(Equal(tt.wantResult)) + g.Expect(message).To(Equal(tt.wantMessage)) + }) + } +} + +func TestRemoveInPlaceUpdateAnnotation(t *testing.T) { + tests := []struct { + name string + setup func(*testing.T) (*Reconciler, ctrlclient.Client, *clusterv1.Machine) + verify func(*WithT, context.Context, ctrlclient.Client, *clusterv1.Machine) + }{ + { + name: "removes annotation when present", + setup: func(t *testing.T) (*Reconciler, ctrlclient.Client, *clusterv1.Machine) { + t.Helper() + scheme := runtime.NewScheme() + if err := clusterv1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add clusterv1 to scheme: %v", err) + } + + machine := &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "machine", Namespace: "default", Annotations: map[string]string{clusterv1.UpdateInProgressAnnotation: ""}}} + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(machine).Build() + return &Reconciler{Client: client}, client, machine + }, + verify: func(g *WithT, ctx context.Context, c ctrlclient.Client, machine *clusterv1.Machine) { + updated := &clusterv1.Machine{} + g.Expect(c.Get(ctx, ctrlclient.ObjectKeyFromObject(machine), updated)).To(Succeed()) + g.Expect(updated.Annotations).ToNot(HaveKey(clusterv1.UpdateInProgressAnnotation)) + }, + }, + { + name: "no-op when annotation missing", + setup: func(t *testing.T) (*Reconciler, ctrlclient.Client, *clusterv1.Machine) { + t.Helper() + scheme := runtime.NewScheme() + if err := clusterv1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add clusterv1 to scheme: %v", err) + } + + machine := &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "machine", Namespace: "default"}} + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(machine).Build() + return &Reconciler{Client: client}, client, machine + }, + verify: func(g *WithT, ctx context.Context, c ctrlclient.Client, machine *clusterv1.Machine) { + updated := &clusterv1.Machine{} + g.Expect(c.Get(ctx, ctrlclient.ObjectKeyFromObject(machine), updated)).To(Succeed()) + g.Expect(updated.Annotations).ToNot(HaveKey(clusterv1.UpdateInProgressAnnotation)) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + r, client, machine := tt.setup(t) + ctx := context.Background() + g.Expect(r.removeInPlaceUpdateAnnotation(ctx, machine)).To(Succeed()) + + if tt.verify != nil { + tt.verify(g, ctx, client, machine) + } + }) + } +} + +func TestCompleteInPlaceUpdate_MissingInfra(t *testing.T) { + g := NewWithT(t) + + scheme := runtime.NewScheme() + if err := clusterv1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add clusterv1 to scheme: %v", err) + } + + machine := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine", + Namespace: "default", + Annotations: map[string]string{}, + }, + } + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(machine).Build() + + r := &Reconciler{Client: client} + scope := &scope{machine: machine, infraMachine: nil} + + err := r.completeInPlaceUpdate(context.Background(), scope) + g.Expect(err).ToNot(HaveOccurred()) +} + +func TestCleanupMachine(t *testing.T) { + g := NewWithT(t) + + original := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine", + Namespace: "default", + Labels: map[string]string{"key": "value"}, + Annotations: map[string]string{"anno": "value"}, + }, + } + original.Status.Phase = "Running" + + cleaned := cleanupMachine(original) + + g.Expect(cleaned.APIVersion).To(Equal(clusterv1.GroupVersion.String())) + g.Expect(cleaned.Kind).To(Equal("Machine")) + g.Expect(cleaned.Name).To(Equal("machine")) + g.Expect(cleaned.Namespace).To(Equal("default")) + g.Expect(cleaned.Labels).To(HaveKeyWithValue("key", "value")) + g.Expect(cleaned.Annotations).To(HaveKeyWithValue("anno", "value")) + g.Expect(cleaned.Status).To(BeZero()) +} + +func TestCleanupUnstructured(t *testing.T) { + g := NewWithT(t) + + original := &unstructured.Unstructured{Object: map[string]interface{}{}} + original.SetAPIVersion("infrastructure.cluster.x-k8s.io/v1beta2") + original.SetKind("GenericInfrastructureMachine") + original.SetName("infra") + original.SetNamespace("default") + original.SetLabels(map[string]string{"key": "value"}) + original.SetAnnotations(map[string]string{"anno": "value"}) + original.Object["spec"] = map[string]interface{}{"field": "value"} + original.Object["status"] = map[string]interface{}{"state": "ready"} + + cleaned := cleanupUnstructured(original) + + g.Expect(cleaned.GetAPIVersion()).To(Equal(original.GetAPIVersion())) + g.Expect(cleaned.GetKind()).To(Equal(original.GetKind())) + g.Expect(cleaned.GetName()).To(Equal(original.GetName())) + g.Expect(cleaned.GetNamespace()).To(Equal(original.GetNamespace())) + g.Expect(cleaned.GetLabels()).To(HaveKeyWithValue("key", "value")) + g.Expect(cleaned.GetAnnotations()).To(HaveKeyWithValue("anno", "value")) + + spec, found, err := unstructured.NestedMap(cleaned.Object, "spec") + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(found).To(BeTrue()) + g.Expect(spec).To(HaveKeyWithValue("field", "value")) + + _, found, err = unstructured.NestedFieldCopy(cleaned.Object, "status") + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(found).To(BeFalse()) +} + +func newTestMachine() *clusterv1.Machine { + return &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine", + Namespace: "default", + Labels: map[string]string{}, + Annotations: map[string]string{}, + }, + Spec: clusterv1.MachineSpec{}, + } +} + +func newTestUnstructured(kind, apiVersion, name string) *unstructured.Unstructured { + u := &unstructured.Unstructured{Object: map[string]interface{}{}} + u.SetAPIVersion(apiVersion) + u.SetKind(kind) + u.SetNamespace("default") + u.SetName(name) + u.SetLabels(map[string]string{}) + u.SetAnnotations(map[string]string{}) + u.Object["spec"] = map[string]interface{}{"field": "value"} + return u +} diff --git a/internal/controllers/machine/machine_controller_status.go b/internal/controllers/machine/machine_controller_status.go index 59c70cac013f..b85e009137fb 100644 --- a/internal/controllers/machine/machine_controller_status.go +++ b/internal/controllers/machine/machine_controller_status.go @@ -65,10 +65,11 @@ func (r *Reconciler) updateStatus(ctx context.Context, s *scope) { // Note: also other controllers adds conditions to the machine object (machine's owner controller sets the UpToDate condition, // MHC controller sets HealthCheckSucceeded and OwnerRemediated conditions, KCP sets conditions about etcd and control plane pods). setDeletingCondition(ctx, s.machine, s.reconcileDeleteExecuted, s.deletingReason, s.deletingMessage) + setUpdatingCondition(ctx, s.machine, s.updatingReason, s.updatingMessage) setReadyCondition(ctx, s.machine) setAvailableCondition(ctx, s.machine) - setMachinePhaseAndLastUpdated(ctx, s.machine) + setMachinePhaseAndLastUpdated(ctx, s.machine, s.updatingReason) } func setBootstrapReadyCondition(_ context.Context, machine *clusterv1.Machine, bootstrapConfig *unstructured.Unstructured, bootstrapConfigIsNotFound bool) { @@ -633,6 +634,24 @@ func setDeletingCondition(_ context.Context, machine *clusterv1.Machine, reconci }) } +func setUpdatingCondition(_ context.Context, machine *clusterv1.Machine, updatingReason, updatingMessage string) { + if updatingReason == "" { + conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineUpdatingCondition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNotUpdatingReason, + }) + return + } + + conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineUpdatingCondition, + Status: metav1.ConditionTrue, + Reason: updatingReason, + Message: updatingMessage, + }) +} + func setReadyCondition(ctx context.Context, machine *clusterv1.Machine) { log := ctrl.LoggerFrom(ctx) @@ -795,7 +814,7 @@ func setAvailableCondition(ctx context.Context, machine *clusterv1.Machine) { }) } -func setMachinePhaseAndLastUpdated(_ context.Context, m *clusterv1.Machine) { +func setMachinePhaseAndLastUpdated(_ context.Context, m *clusterv1.Machine, updatingReason string) { originalPhase := m.Status.Phase // Set the phase to "pending" if nil. @@ -818,6 +837,10 @@ func setMachinePhaseAndLastUpdated(_ context.Context, m *clusterv1.Machine) { m.Status.SetTypedPhase(clusterv1.MachinePhaseRunning) } + if updatingReason != "" { + m.Status.SetTypedPhase(clusterv1.MachinePhaseUpdating) + } + // Set the phase to "deleting" if the deletion timestamp is set. if !m.DeletionTimestamp.IsZero() { m.Status.SetTypedPhase(clusterv1.MachinePhaseDeleting) diff --git a/main.go b/main.go index f8056b6efc17..8f6f050fafd8 100644 --- a/main.go +++ b/main.go @@ -672,6 +672,7 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager, watchNamespaces map Client: mgr.GetClient(), APIReader: mgr.GetAPIReader(), ClusterCache: clusterCache, + RuntimeClient: runtimeClient, WatchFilterValue: watchFilterValue, RemoteConditionsGracePeriod: remoteConditionsGracePeriod, AdditionalSyncMachineLabels: additionalSyncMachineLabelRegexes, diff --git a/test/e2e/cluster_in_place_update.go b/test/e2e/cluster_in_place_update.go new file mode 100644 index 000000000000..06a61b797954 --- /dev/null +++ b/test/e2e/cluster_in_place_update.go @@ -0,0 +1,321 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "fmt" + "maps" + "os" + "path/filepath" + "slices" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + bootstrapv1 "sigs.k8s.io/cluster-api/api/bootstrap/kubeadm/v1beta2" + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + "sigs.k8s.io/cluster-api/test/framework" + "sigs.k8s.io/cluster-api/test/framework/clusterctl" + "sigs.k8s.io/cluster-api/util" +) + +// ClusterInPlaceUpdateSpecInput is the input for ClusterInPlaceUpdateSpec. +type ClusterInPlaceUpdateSpecInput struct { + E2EConfig *clusterctl.E2EConfig + ClusterctlConfigPath string + BootstrapClusterProxy framework.ClusterProxy + ArtifactFolder string + SkipCleanup bool + + // InfrastructureProvider allows to specify the infrastructure provider to be used when looking for + // cluster templates. + // If not set, clusterctl will look at the infrastructure provider installed in the management cluster; + // if only one infrastructure provider exists, it will be used, otherwise the operation will fail if more than one exists. + InfrastructureProvider *string + + // Flavor, if specified is the template flavor used to create the cluster for testing. + // If not specified, the default flavor for the selected infrastructure provider is used. + Flavor *string + + // WorkerMachineCount defines number of worker machines to be added to the workload cluster. + // If not specified, 1 will be used. + WorkerMachineCount *int64 + + // ExtensionConfigName is the name of the ExtensionConfig. Defaults to "quick-start". + // This value is provided to clusterctl as "EXTENSION_CONFIG_NAME" variable and can be used to template the + // name of the ExtensionConfig into the ClusterClass. + ExtensionConfigName string + + // ExtensionServiceNamespace is the namespace where the service for the Runtime Extension is located. + // Note: This should only be set if a Runtime Extension is used. + ExtensionServiceNamespace string + + // ExtensionServiceNamespace is the name where the service for the Runtime Extension is located. + // Note: This should only be set if a Runtime Extension is used. + ExtensionServiceName string + + // Allows to inject a function to be run after test namespace is created. + // If not specified, this is a no-op. + PostNamespaceCreated func(managementClusterProxy framework.ClusterProxy, workloadClusterNamespace string) + + // ClusterctlVariables allows injecting variables to the cluster template. + // If not specified, this is a no-op. + ClusterctlVariables map[string]string +} + +// ClusterInPlaceUpdateSpec implements a test for in-place updates. +// Note: This test works with KCP as it tests the KCP in-place update feature. +func ClusterInPlaceUpdateSpec(ctx context.Context, inputGetter func() ClusterInPlaceUpdateSpecInput) { + var ( + specName = "in-place-update" + input ClusterInPlaceUpdateSpecInput + namespace *corev1.Namespace + cancelWatches context.CancelFunc + clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult + ) + + BeforeEach(func() { + Expect(ctx).NotTo(BeNil(), "ctx is required for %s spec", specName) + input = inputGetter() + Expect(input.E2EConfig).ToNot(BeNil(), "Invalid argument. input.E2EConfig can't be nil when calling %s spec", specName) + Expect(input.ClusterctlConfigPath).To(BeAnExistingFile(), "Invalid argument. input.ClusterctlConfigPath must be an existing file when calling %s spec", specName) + Expect(input.BootstrapClusterProxy).ToNot(BeNil(), "Invalid argument. input.BootstrapClusterProxy can't be nil when calling %s spec", specName) + Expect(os.MkdirAll(input.ArtifactFolder, 0750)).To(Succeed(), "Invalid argument. input.ArtifactFolder can't be created for %s spec", specName) + + Expect(input.E2EConfig.Variables).To(HaveKey(KubernetesVersion)) + + if input.ExtensionServiceNamespace != "" && input.ExtensionServiceName != "" { + if input.ExtensionConfigName == "" { + input.ExtensionConfigName = specName + } + } + + // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events. + namespace, cancelWatches = framework.SetupSpecNamespace(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder, input.PostNamespaceCreated) + + clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult) + }) + + It("Should create a workload cluster", func() { + By("Creating a workload cluster") + + infrastructureProvider := clusterctl.DefaultInfrastructureProvider + if input.InfrastructureProvider != nil { + infrastructureProvider = *input.InfrastructureProvider + } + + flavor := clusterctl.DefaultFlavor + if input.Flavor != nil { + flavor = *input.Flavor + } + + workerMachineCount := ptr.To[int64](1) + if input.WorkerMachineCount != nil { + workerMachineCount = input.WorkerMachineCount + } + + clusterName := fmt.Sprintf("%s-%s", specName, util.RandomString(6)) + + if input.ExtensionServiceNamespace != "" && input.ExtensionServiceName != "" { + // NOTE: test extension is already deployed in the management cluster. If for any reason in future we want + // to make this test more self-contained this test should be modified in order to create an additional + // management cluster; also the E2E test configuration should be modified introducing something like + // optional:true allowing to define which providers should not be installed by default in + // a management cluster. + By("Deploy Test Extension ExtensionConfig") + + // In this test we are defaulting all handlers to non-blocking because we don't expect the handlers to block the + // cluster lifecycle by default. Setting defaultAllHandlersToBlocking to false enforces that the test-extension + // automatically creates the ConfigMap with non-blocking preloaded responses. + defaultAllHandlersToBlocking := false + // select on the current namespace + // This is necessary so in CI this test doesn't influence other tests by enabling lifecycle hooks + // in other test namespaces. + namespaces := []string{namespace.Name} + extensionConfig := extensionConfig(input.ExtensionConfigName, input.ExtensionServiceNamespace, input.ExtensionServiceName, defaultAllHandlersToBlocking, namespaces...) + Expect(input.BootstrapClusterProxy.GetClient().Create(ctx, + extensionConfig)). + To(Succeed(), "Failed to create the ExtensionConfig") + } + + variables := map[string]string{ + // This is used to template the name of the ExtensionConfig into the ClusterClass. + "EXTENSION_CONFIG_NAME": input.ExtensionConfigName, + } + maps.Copy(variables, input.ClusterctlVariables) + + clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + ConfigCluster: clusterctl.ConfigClusterInput{ + LogFolder: filepath.Join(input.ArtifactFolder, "clusters", input.BootstrapClusterProxy.GetName()), + ClusterctlConfigPath: input.ClusterctlConfigPath, + ClusterctlVariables: variables, + KubeconfigPath: input.BootstrapClusterProxy.GetKubeconfigPath(), + InfrastructureProvider: infrastructureProvider, + Flavor: flavor, + Namespace: namespace.Name, + ClusterName: clusterName, + KubernetesVersion: input.E2EConfig.MustGetVariable(KubernetesVersion), + // ControlPlaneMachineCount is not configurable because it has to be 3 + // because we want to use scale-in to test in-place updates. + ControlPlaneMachineCount: ptr.To[int64](3), + WorkerMachineCount: workerMachineCount, + }, + WaitForClusterIntervals: input.E2EConfig.GetIntervals(specName, "wait-cluster"), + WaitForControlPlaneIntervals: input.E2EConfig.GetIntervals(specName, "wait-control-plane"), + WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), + }, clusterResources) + + cluster := clusterResources.Cluster + mgmtClient := input.BootstrapClusterProxy.GetClient() + + Byf("Verify Cluster is Available and Machines are Ready before starting in-place updates") + framework.VerifyClusterAvailable(ctx, framework.VerifyClusterAvailableInput{ + Getter: mgmtClient, + Name: clusterResources.Cluster.Name, + Namespace: clusterResources.Cluster.Namespace, + }) + framework.VerifyMachinesReady(ctx, framework.VerifyMachinesReadyInput{ + Lister: mgmtClient, + Name: clusterResources.Cluster.Name, + Namespace: clusterResources.Cluster.Namespace, + }) + + var machineObjectsBeforeInPlaceUpdate machineObjects + Eventually(func(g Gomega) { + machineObjectsBeforeInPlaceUpdate = getMachineObjects(ctx, g, mgmtClient, cluster) + }, 30*time.Second, 1*time.Second).Should(Succeed()) + + // Doing multiple in-place updates for additional coverage. + filePath := "/tmp/test" + for i, fileContent := range []string{"first in-place update", "second in-place update"} { + Byf("[%d] Trigger in-place update by modifying the files variable", i) + + originalCluster := cluster.DeepCopy() + // Ensure the files variable is set to the expected value. + cluster.Spec.Topology.Variables = slices.DeleteFunc(cluster.Spec.Topology.Variables, func(v clusterv1.ClusterVariable) bool { + return v.Name == "files" + }) + cluster.Spec.Topology.Variables = append(cluster.Spec.Topology.Variables, clusterv1.ClusterVariable{ + Name: "files", + Value: apiextensionsv1.JSON{Raw: []byte(fmt.Sprintf(`[{"path":"%s","content":"%s"}]`, filePath, fileContent))}, + }) + Expect(mgmtClient.Patch(ctx, cluster, client.MergeFrom(originalCluster))).To(Succeed()) + + var machineObjectsAfterInPlaceUpdate machineObjects + Eventually(func(g Gomega) { + framework.VerifyClusterCondition(ctx, framework.VerifyClusterConditionInput{ + Getter: mgmtClient, + Name: clusterResources.Cluster.Name, + Namespace: clusterResources.Cluster.Namespace, + ConditionType: clusterv1.ClusterControlPlaneMachinesUpToDateCondition, + }) + framework.VerifyClusterCondition(ctx, framework.VerifyClusterConditionInput{ + Getter: mgmtClient, + Name: clusterResources.Cluster.Name, + Namespace: clusterResources.Cluster.Namespace, + ConditionType: clusterv1.ClusterWorkerMachinesUpToDateCondition, + }) + + machineObjectsAfterInPlaceUpdate = getMachineObjects(ctx, g, mgmtClient, cluster) + // TODO(in-place): enable once Machine controller PRs are merged + g.Expect(machineNames(machineObjectsAfterInPlaceUpdate.ControlPlaneMachines)).To(Equal(machineNames(machineObjectsBeforeInPlaceUpdate.ControlPlaneMachines))) + // TODO(in-place): enable once MD/MS/Machine controller PRs are merged + //g.Expect(machineNames(machineObjects.WorkerMachines)).To(Equal(machineNames(machineObjectsBeforeInPlaceUpdate.WorkerMachines))) + + for _, kubeadmConfig := range machineObjectsAfterInPlaceUpdate.KubeadmConfigByMachine { + g.Expect(kubeadmConfig.Spec.Files).To(ContainElement(HaveField("Path", filePath))) + g.Expect(kubeadmConfig.Spec.Files).To(ContainElement(HaveField("Content", fileContent))) + } + }, input.E2EConfig.GetIntervals(specName, "wait-control-plane")...).Should(Succeed()) + + // Update for the next round of in-place update. + machineObjectsBeforeInPlaceUpdate = machineObjectsAfterInPlaceUpdate + } + + By("PASSED!") + }) + + AfterEach(func() { + // Dumps all the resources in the spec namespace, then cleanups the cluster object and the spec namespace itself. + framework.DumpSpecResourcesAndCleanup(ctx, specName, input.BootstrapClusterProxy, input.ClusterctlConfigPath, input.ArtifactFolder, namespace, cancelWatches, clusterResources.Cluster, input.E2EConfig.GetIntervals, input.SkipCleanup) + if !input.SkipCleanup { + if input.ExtensionServiceNamespace != "" && input.ExtensionServiceName != "" { + Eventually(func() error { + return input.BootstrapClusterProxy.GetClient().Delete(ctx, extensionConfig(input.ExtensionConfigName, input.ExtensionServiceNamespace, input.ExtensionServiceName, true)) + }, 10*time.Second, 1*time.Second).Should(Succeed(), "Deleting ExtensionConfig failed") + } + } + }) +} + +type machineObjects struct { + ControlPlaneMachines []*clusterv1.Machine + WorkerMachines []*clusterv1.Machine + + KubeadmConfigByMachine map[string]*bootstrapv1.KubeadmConfig +} + +// getMachineObjects retrieves Machines and corresponding KubeadmConfigs. +func getMachineObjects(ctx context.Context, g Gomega, c client.Client, cluster *clusterv1.Cluster) machineObjects { + res := machineObjects{ + KubeadmConfigByMachine: map[string]*bootstrapv1.KubeadmConfig{}, + } + + // ControlPlane Machines. + controlPlaneMachineList := &clusterv1.MachineList{} + g.Expect(c.List(ctx, controlPlaneMachineList, client.InNamespace(cluster.Namespace), client.MatchingLabels{ + clusterv1.MachineControlPlaneLabel: "", + clusterv1.ClusterNameLabel: cluster.Name, + })).To(Succeed()) + for _, machine := range controlPlaneMachineList.Items { + res.ControlPlaneMachines = append(res.ControlPlaneMachines, &machine) + kubeadmConfig := &bootstrapv1.KubeadmConfig{} + g.Expect(c.Get(ctx, client.ObjectKey{Namespace: machine.Namespace, Name: machine.Spec.Bootstrap.ConfigRef.Name}, kubeadmConfig)).To(Succeed()) + res.KubeadmConfigByMachine[machine.Name] = kubeadmConfig + } + + // MachineDeployments Machines. + machines := framework.GetMachinesByCluster(ctx, framework.GetMachinesByClusterInput{ + Lister: c, + ClusterName: cluster.Name, + Namespace: cluster.Namespace, + }) + for _, machine := range machines { + res.WorkerMachines = append(res.WorkerMachines, &machine) + kubeadmConfig := &bootstrapv1.KubeadmConfig{} + g.Expect(c.Get(ctx, client.ObjectKey{Namespace: machine.Namespace, Name: machine.Spec.Bootstrap.ConfigRef.Name}, kubeadmConfig)).To(Succeed()) + res.KubeadmConfigByMachine[machine.Name] = kubeadmConfig + } + + return res +} + +func machineNames(machines []*clusterv1.Machine) sets.Set[string] { + ret := sets.Set[string]{} + for _, m := range machines { + ret.Insert(m.Name) + } + return ret +} diff --git a/test/e2e/cluster_in_place_update_test.go b/test/e2e/cluster_in_place_update_test.go new file mode 100644 index 000000000000..d826d73df01b --- /dev/null +++ b/test/e2e/cluster_in_place_update_test.go @@ -0,0 +1,44 @@ +//go:build e2e +// +build e2e + +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + . "github.com/onsi/ginkgo/v2" + "k8s.io/utils/ptr" +) + +var _ = Describe("When in-place updating a workload cluster using ClusterClass", Label("ClusterClass"), func() { + ClusterInPlaceUpdateSpec(ctx, func() ClusterInPlaceUpdateSpecInput { + return ClusterInPlaceUpdateSpecInput{ + E2EConfig: e2eConfig, + ClusterctlConfigPath: clusterctlConfigPath, + BootstrapClusterProxy: bootstrapClusterProxy, + ArtifactFolder: artifactFolder, + SkipCleanup: skipCleanup, + Flavor: ptr.To("topology-in-place"), + // The runtime extension gets deployed to the test-extension-system namespace and is exposed + // by the test-extension-webhook-service. + // The below values are used when creating the cluster-wide ExtensionConfig to refer + // the actual service. + ExtensionServiceNamespace: "test-extension-system", + ExtensionServiceName: "test-extension-webhook-service", + } + }) +}) diff --git a/test/e2e/config/docker.yaml b/test/e2e/config/docker.yaml index b178df64a701..d7d914b123ae 100644 --- a/test/e2e/config/docker.yaml +++ b/test/e2e/config/docker.yaml @@ -342,6 +342,7 @@ providers: - sourcePath: "../data/infrastructure-docker/main/cluster-template-ipv6.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-topology-dualstack-ipv6-primary.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-topology-dualstack-ipv4-primary.yaml" + - sourcePath: "../data/infrastructure-docker/main/cluster-template-topology-in-place.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-topology-no-workers.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-topology-runtimesdk-v1beta1.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-topology-kcp-only.yaml" diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-topology-in-place/cluster-runtimesdk.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-topology-in-place/cluster-runtimesdk.yaml new file mode 100644 index 000000000000..87278c772409 --- /dev/null +++ b/test/e2e/data/infrastructure-docker/main/cluster-template-topology-in-place/cluster-runtimesdk.yaml @@ -0,0 +1,37 @@ +apiVersion: cluster.x-k8s.io/v1beta2 +kind: Cluster +metadata: + name: '${CLUSTER_NAME}' + namespace: default + labels: + cni: "${CLUSTER_NAME}-crs-0" +spec: + clusterNetwork: + services: + cidrBlocks: ['${DOCKER_SERVICE_CIDRS}'] + pods: + cidrBlocks: ['${DOCKER_POD_CIDRS}'] + serviceDomain: '${DOCKER_SERVICE_DOMAIN}' + topology: + classRef: + name: "quick-start-runtimesdk" + namespace: '${CLUSTER_CLASS_NAMESPACE:-${NAMESPACE}}' + version: "${KUBERNETES_VERSION}" + controlPlane: + replicas: ${CONTROL_PLANE_MACHINE_COUNT} + workers: + machineDeployments: + - class: "default-worker" + name: "md-0" + replicas: ${WORKER_MACHINE_COUNT} + rollout: + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 1 + variables: + - name: kubeadmControlPlaneMaxSurge + value: "0" + - name: imageRepository + value: "kindest" diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-topology-in-place/kustomization.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-topology-in-place/kustomization.yaml new file mode 100644 index 000000000000..479086cae767 --- /dev/null +++ b/test/e2e/data/infrastructure-docker/main/cluster-template-topology-in-place/kustomization.yaml @@ -0,0 +1,3 @@ +resources: + - ../bases/crs.yaml + - cluster-runtimesdk.yaml diff --git a/test/extension/handlers/inplaceupdate/handlers.go b/test/extension/handlers/inplaceupdate/handlers.go new file mode 100644 index 000000000000..0dd7df124c66 --- /dev/null +++ b/test/extension/handlers/inplaceupdate/handlers.go @@ -0,0 +1,349 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package inplaceupdate contains the handlers for the in-place Kubernetes version update hooks. +// +// The implementation of the handlers is specifically designed for Cluster API E2E tests use cases, +// focusing on simulating realistic Kubernetes version upgrades through the in-place update mechanism. +// When implementing custom RuntimeExtension, it is only required to expose HandlerFunc with the +// signature defined in sigs.k8s.io/cluster-api/api/runtime/hooks/v1alpha1. +package inplaceupdate + +import ( + "context" + "encoding/json" + "reflect" + "sync" + "time" + + "github.com/pkg/errors" + "gomodules.xyz/jsonpatch/v2" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/serializer" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + bootstrapv1 "sigs.k8s.io/cluster-api/api/bootstrap/kubeadm/v1beta2" + controlplanev1 "sigs.k8s.io/cluster-api/api/controlplane/kubeadm/v1beta2" + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + runtimehooksv1 "sigs.k8s.io/cluster-api/api/runtime/hooks/v1alpha1" + infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1beta2" +) + +// ExtensionHandlers provides a common struct shared across the in-place update hook handlers. +type ExtensionHandlers struct { + decoder runtime.Decoder + client client.Client + state sync.Map +} + +// NewExtensionHandlers returns a new ExtensionHandlers for the in-place update hook handlers. +func NewExtensionHandlers(client client.Client) *ExtensionHandlers { + scheme := runtime.NewScheme() + _ = infrav1.AddToScheme(scheme) + _ = bootstrapv1.AddToScheme(scheme) + _ = controlplanev1.AddToScheme(scheme) + return &ExtensionHandlers{ + client: client, + decoder: serializer.NewCodecFactory(scheme).UniversalDecoder( + infrav1.GroupVersion, + bootstrapv1.GroupVersion, + ), + } +} + +// canUpdateMachineSpec declares that this extension can update: +// * MachineSpec.FailureDomain +// * MachineSpec.Version. +func canUpdateMachineSpec(current, desired *clusterv1.MachineSpec) { + if current.FailureDomain != desired.FailureDomain { + current.FailureDomain = desired.FailureDomain + } + // TBD if we should keep this, but for now using it to test KCP machinery as this + // is the only field on Machine that KCP will try to roll out in-place. + if current.Version != desired.Version { + current.Version = desired.Version + } +} + +// canUpdateKubeadmConfigSpec declares that this extension can update: +// * KubeadmConfigSpec.ClusterConfiguration.Etcd.Local.ImageTag +// * KubeadmConfigSpec.Files. +func canUpdateKubeadmConfigSpec(current, desired *bootstrapv1.KubeadmConfigSpec) { + if current.ClusterConfiguration.Etcd.Local.ImageTag != desired.ClusterConfiguration.Etcd.Local.ImageTag { + current.ClusterConfiguration.Etcd.Local.ImageTag = desired.ClusterConfiguration.Etcd.Local.ImageTag + } + if !reflect.DeepEqual(current.Files, desired.Files) { + current.Files = desired.Files + } +} + +// canUpdateDockerMachineSpec declares that this extension can update: +// * DockerMachineSpec.BootstrapTimeout. +func canUpdateDockerMachineSpec(current, desired *infrav1.DockerMachineSpec) { + if current.BootstrapTimeout != desired.BootstrapTimeout { + current.BootstrapTimeout = desired.BootstrapTimeout + } +} + +// DoCanUpdateMachine implements the CanUpdateMachine hook. +func (h *ExtensionHandlers) DoCanUpdateMachine(ctx context.Context, req *runtimehooksv1.CanUpdateMachineRequest, resp *runtimehooksv1.CanUpdateMachineResponse) { + log := ctrl.LoggerFrom(ctx).WithValues("Machine", klog.KObj(&req.Desired.Machine)) + log.Info("CanUpdateMachine is called") + + currentMachine, desiredMachine, + currentBootstrapConfig, desiredBootstrapConfig, + currentInfraMachine, desiredInfraMachine, err := h.getObjectsFromCanUpdateMachineRequest(req) + if err != nil { + resp.Status = runtimehooksv1.ResponseStatusFailure + resp.Message = err.Error() + return + } + + // Declare changes that this Runtime Extension can update in-place. + + // Machine + canUpdateMachineSpec(¤tMachine.Spec, &desiredMachine.Spec) + + // BootstrapConfig (we can only update KubeadmConfigs) + currentKubeadmConfig, isCurrentKubeadmConfig := currentBootstrapConfig.(*bootstrapv1.KubeadmConfig) + desiredKubeadmConfig, isDesiredKubeadmConfig := desiredBootstrapConfig.(*bootstrapv1.KubeadmConfig) + if isCurrentKubeadmConfig && isDesiredKubeadmConfig { + canUpdateKubeadmConfigSpec(¤tKubeadmConfig.Spec, &desiredKubeadmConfig.Spec) + } + + // InfraMachine (we can only update DockerMachines) + currentDockerMachine, isCurrentDockerMachine := currentInfraMachine.(*infrav1.DockerMachine) + desiredDockerMachine, isDesiredDockerMachine := desiredInfraMachine.(*infrav1.DockerMachine) + if isCurrentDockerMachine && isDesiredDockerMachine { + canUpdateDockerMachineSpec(¤tDockerMachine.Spec, &desiredDockerMachine.Spec) + } + + if err := h.computeCanUpdateMachineResponse(req, resp, currentMachine, currentBootstrapConfig, currentInfraMachine); err != nil { + resp.Status = runtimehooksv1.ResponseStatusFailure + resp.Message = err.Error() + return + } + + resp.Status = runtimehooksv1.ResponseStatusSuccess +} + +// DoCanUpdateMachineSet implements the CanUpdateMachineSet hook. +func (h *ExtensionHandlers) DoCanUpdateMachineSet(ctx context.Context, req *runtimehooksv1.CanUpdateMachineSetRequest, resp *runtimehooksv1.CanUpdateMachineSetResponse) { + log := ctrl.LoggerFrom(ctx).WithValues("MachineSet", klog.KObj(&req.Desired.MachineSet)) + log.Info("CanUpdateMachineSet is called") + + currentMachineSet, desiredMachineSet, + currentBootstrapConfigTemplate, desiredBootstrapConfigTemplate, + currentInfraMachineTemplate, desiredInfraMachineTemplate, err := h.getObjectsFromCanUpdateMachineSetRequest(req) + if err != nil { + resp.Status = runtimehooksv1.ResponseStatusFailure + resp.Message = err.Error() + return + } + + // Declare changes that this Runtime Extension can update in-place. + + // Machine + canUpdateMachineSpec(¤tMachineSet.Spec.Template.Spec, &desiredMachineSet.Spec.Template.Spec) + + // BootstrapConfig (we can only update KubeadmConfigs) + currentKubeadmConfigTemplate, isCurrentKubeadmConfigTemplate := currentBootstrapConfigTemplate.(*bootstrapv1.KubeadmConfigTemplate) + desiredKubeadmConfigTemplate, isDesiredKubeadmConfigTemplate := desiredBootstrapConfigTemplate.(*bootstrapv1.KubeadmConfigTemplate) + if isCurrentKubeadmConfigTemplate && isDesiredKubeadmConfigTemplate { + canUpdateKubeadmConfigSpec(¤tKubeadmConfigTemplate.Spec.Template.Spec, &desiredKubeadmConfigTemplate.Spec.Template.Spec) + } + + // InfraMachine (we can only update DockerMachines) + currentDockerMachineTemplate, isCurrentDockerMachineTemplate := currentInfraMachineTemplate.(*infrav1.DockerMachineTemplate) + desiredDockerMachineTemplate, isDesiredDockerMachineTemplate := desiredInfraMachineTemplate.(*infrav1.DockerMachineTemplate) + if isCurrentDockerMachineTemplate && isDesiredDockerMachineTemplate { + canUpdateDockerMachineSpec(¤tDockerMachineTemplate.Spec.Template.Spec, &desiredDockerMachineTemplate.Spec.Template.Spec) + } + + if err := h.computeCanUpdateMachineSetResponse(req, resp, currentMachineSet, currentBootstrapConfigTemplate, currentInfraMachineTemplate); err != nil { + resp.Status = runtimehooksv1.ResponseStatusFailure + resp.Message = err.Error() + return + } + + resp.Status = runtimehooksv1.ResponseStatusSuccess +} + +// DoUpdateMachine implements the UpdateMachine hook. +// Note: We are intentionally not actually applying any in-place changes we are just faking them, +// which is good enough for test purposes. +func (h *ExtensionHandlers) DoUpdateMachine(ctx context.Context, req *runtimehooksv1.UpdateMachineRequest, resp *runtimehooksv1.UpdateMachineResponse) { + log := ctrl.LoggerFrom(ctx).WithValues("Machine", klog.KObj(&req.Desired.Machine)) + log.Info("UpdateMachine is called") + defer func() { + log.Info("UpdateMachine response", "Machine", klog.KObj(&req.Desired.Machine), "status", resp.Status, "message", resp.Message, "retryAfterSeconds", resp.RetryAfterSeconds) + }() + + key := klog.KObj(&req.Desired.Machine).String() + + // Note: We are intentionally not actually applying any in-place changes we are just faking them, + // which is good enough for test purposes. + if firstTimeCalled, ok := h.state.Load(key); ok { + if time.Since(firstTimeCalled.(time.Time)) > 20*time.Second { + h.state.Delete(key) + resp.Status = runtimehooksv1.ResponseStatusSuccess + resp.Message = "In-place update is done" + resp.RetryAfterSeconds = 0 + return + } + } else { + h.state.Store(key, time.Now()) + } + + resp.Status = runtimehooksv1.ResponseStatusSuccess + resp.Message = "In-place update still in progress" + resp.RetryAfterSeconds = 5 +} + +func (h *ExtensionHandlers) getObjectsFromCanUpdateMachineRequest(req *runtimehooksv1.CanUpdateMachineRequest) (*clusterv1.Machine, *clusterv1.Machine, runtime.Object, runtime.Object, runtime.Object, runtime.Object, error) { //nolint:gocritic // accepting high number of return parameters for now + currentMachine := req.Current.Machine.DeepCopy() + desiredMachine := req.Desired.Machine.DeepCopy() + currentBootstrapConfig, _, err := h.decoder.Decode(req.Current.BootstrapConfig.Raw, nil, req.Current.BootstrapConfig.Object) + if err != nil { + return nil, nil, nil, nil, nil, nil, err + } + desiredBootstrapConfig, _, err := h.decoder.Decode(req.Desired.BootstrapConfig.Raw, nil, req.Desired.BootstrapConfig.Object) + if err != nil { + return nil, nil, nil, nil, nil, nil, err + } + currentInfraMachine, _, err := h.decoder.Decode(req.Current.InfrastructureMachine.Raw, nil, req.Current.InfrastructureMachine.Object) + if err != nil { + return nil, nil, nil, nil, nil, nil, err + } + desiredInfraMachine, _, err := h.decoder.Decode(req.Desired.InfrastructureMachine.Raw, nil, req.Desired.InfrastructureMachine.Object) + if err != nil { + return nil, nil, nil, nil, nil, nil, err + } + + return currentMachine, desiredMachine, currentBootstrapConfig, desiredBootstrapConfig, currentInfraMachine, desiredInfraMachine, nil +} + +func (h *ExtensionHandlers) computeCanUpdateMachineResponse(req *runtimehooksv1.CanUpdateMachineRequest, resp *runtimehooksv1.CanUpdateMachineResponse, currentMachine *clusterv1.Machine, currentBootstrapConfig, currentInfraMachine runtime.Object) error { + marshalledCurrentMachine, err := json.Marshal(req.Current.Machine) + if err != nil { + return err + } + machinePatch, err := createJSONPatch(marshalledCurrentMachine, currentMachine) + if err != nil { + return err + } + bootstrapConfigPatch, err := createJSONPatch(req.Current.BootstrapConfig.Raw, currentBootstrapConfig) + if err != nil { + return err + } + infraMachinePatch, err := createJSONPatch(req.Current.InfrastructureMachine.Raw, currentInfraMachine) + if err != nil { + return err + } + + resp.MachinePatch = runtimehooksv1.Patch{ + PatchType: runtimehooksv1.JSONPatchType, + Patch: machinePatch, + } + resp.BootstrapConfigPatch = runtimehooksv1.Patch{ + PatchType: runtimehooksv1.JSONPatchType, + Patch: bootstrapConfigPatch, + } + resp.InfrastructureMachinePatch = runtimehooksv1.Patch{ + PatchType: runtimehooksv1.JSONPatchType, + Patch: infraMachinePatch, + } + return nil +} + +func (h *ExtensionHandlers) getObjectsFromCanUpdateMachineSetRequest(req *runtimehooksv1.CanUpdateMachineSetRequest) (*clusterv1.MachineSet, *clusterv1.MachineSet, runtime.Object, runtime.Object, runtime.Object, runtime.Object, error) { //nolint:gocritic // accepting high number of return parameters for now + currentMachineSet := req.Current.MachineSet.DeepCopy() + desiredMachineSet := req.Desired.MachineSet.DeepCopy() + currentBootstrapConfigTemplate, _, err := h.decoder.Decode(req.Current.BootstrapConfigTemplate.Raw, nil, req.Current.BootstrapConfigTemplate.Object) + if err != nil { + return nil, nil, nil, nil, nil, nil, err + } + desiredBootstrapConfigTemplate, _, err := h.decoder.Decode(req.Desired.BootstrapConfigTemplate.Raw, nil, req.Desired.BootstrapConfigTemplate.Object) + if err != nil { + return nil, nil, nil, nil, nil, nil, err + } + currentInfraMachineTemplate, _, err := h.decoder.Decode(req.Current.InfrastructureMachineTemplate.Raw, nil, req.Current.InfrastructureMachineTemplate.Object) + if err != nil { + return nil, nil, nil, nil, nil, nil, err + } + desiredInfraMachineTemplate, _, err := h.decoder.Decode(req.Desired.InfrastructureMachineTemplate.Raw, nil, req.Desired.InfrastructureMachineTemplate.Object) + if err != nil { + return nil, nil, nil, nil, nil, nil, err + } + + return currentMachineSet, desiredMachineSet, currentBootstrapConfigTemplate, desiredBootstrapConfigTemplate, currentInfraMachineTemplate, desiredInfraMachineTemplate, nil +} + +func (h *ExtensionHandlers) computeCanUpdateMachineSetResponse(req *runtimehooksv1.CanUpdateMachineSetRequest, resp *runtimehooksv1.CanUpdateMachineSetResponse, currentMachineSet *clusterv1.MachineSet, currentBootstrapConfigTemplate, currentInfraMachineTemplate runtime.Object) error { + marshalledCurrentMachineSet, err := json.Marshal(req.Current.MachineSet) + if err != nil { + return err + } + machineSetPatch, err := createJSONPatch(marshalledCurrentMachineSet, currentMachineSet) + if err != nil { + return err + } + bootstrapConfigTemplatePatch, err := createJSONPatch(req.Current.BootstrapConfigTemplate.Raw, currentBootstrapConfigTemplate) + if err != nil { + return err + } + infraMachineTemplatePatch, err := createJSONPatch(req.Current.InfrastructureMachineTemplate.Raw, currentInfraMachineTemplate) + if err != nil { + return err + } + + resp.MachineSetPatch = runtimehooksv1.Patch{ + PatchType: runtimehooksv1.JSONPatchType, + Patch: machineSetPatch, + } + resp.BootstrapConfigTemplatePatch = runtimehooksv1.Patch{ + PatchType: runtimehooksv1.JSONPatchType, + Patch: bootstrapConfigTemplatePatch, + } + resp.InfrastructureMachineTemplatePatch = runtimehooksv1.Patch{ + PatchType: runtimehooksv1.JSONPatchType, + Patch: infraMachineTemplatePatch, + } + return nil +} + +// createJSONPatch creates a RFC 6902 JSON patch from the original and the modified object. +func createJSONPatch(marshalledOriginal []byte, modified runtime.Object) ([]byte, error) { + // TODO: avoid producing patches for status (although they will be ignored by the KCP / MD controllers anyway) + marshalledModified, err := json.Marshal(modified) + if err != nil { + return nil, errors.Errorf("failed to marshal modified object: %v", err) + } + + patch, err := jsonpatch.CreatePatch(marshalledOriginal, marshalledModified) + if err != nil { + return nil, errors.Errorf("failed to create patch: %v", err) + } + + patchBytes, err := json.Marshal(patch) + if err != nil { + return nil, errors.Errorf("failed to marshal patch: %v", err) + } + + return patchBytes, nil +} diff --git a/test/extension/handlers/topologymutation/handler.go b/test/extension/handlers/topologymutation/handler.go index d434213b69d9..59cd2b573beb 100644 --- a/test/extension/handlers/topologymutation/handler.go +++ b/test/extension/handlers/topologymutation/handler.go @@ -111,7 +111,11 @@ func (h *ExtensionHandlers) GeneratePatches(ctx context.Context, req *runtimehoo // the patchKubeadmConfigTemplate func shows how to implement patches only for KubeadmConfigTemplates // linked to a specific MachineDeployment class; another option is to check the holderRef value and call // this func or more specialized func conditionally. - patchKubeadmConfigTemplate(ctx, obj, variables) + err := patchKubeadmConfigTemplate(ctx, obj, variables) + if err != nil { + log.Error(err, "Error patching KubeadmConfigTemplate") + return errors.Wrapf(err, "error patching KubeadmConfigTemplate") + } case *infrav1beta1.DockerMachineTemplate, *infrav1.DockerMachineTemplate: // NOTE: DockerMachineTemplate could be linked to the ControlPlane or one or more of the existing MachineDeployment class; // the patchDockerMachineTemplate func shows how to implement different patches for DockerMachineTemplate @@ -214,40 +218,56 @@ func patchKubeadmControlPlaneTemplate(ctx context.Context, obj runtime.Object, t // 2) Patch RolloutStrategy RollingUpdate MaxSurge with the value from the Cluster Topology variable. // If this is unset continue as this variable is not required. kcpControlPlaneMaxSurge, err := topologymutation.GetStringVariable(templateVariables, "kubeadmControlPlaneMaxSurge") - if err != nil { - if topologymutation.IsNotFoundError(err) { - return nil - } + if err != nil && !topologymutation.IsNotFoundError(err) { return errors.Wrap(err, "could not set KubeadmControlPlaneTemplate MaxSurge") } + if kcpControlPlaneMaxSurge != "" { + // This has to be converted to IntOrString type. + kubeadmControlPlaneMaxSurgeIntOrString := intstrutil.Parse(kcpControlPlaneMaxSurge) + log.Info(fmt.Sprintf("Setting KubeadmControlPlaneMaxSurge to %q", kubeadmControlPlaneMaxSurgeIntOrString.String())) - // This has to be converted to IntOrString type. - kubeadmControlPlaneMaxSurgeIntOrString := intstrutil.Parse(kcpControlPlaneMaxSurge) - log.Info(fmt.Sprintf("Setting KubeadmControlPlaneMaxSurge to %q", kubeadmControlPlaneMaxSurgeIntOrString.String())) - - kcpTemplateV1Beta1, ok := obj.(*controlplanev1beta1.KubeadmControlPlaneTemplate) - if ok { - if kcpTemplateV1Beta1.Spec.Template.Spec.RolloutStrategy == nil { - kcpTemplateV1Beta1.Spec.Template.Spec.RolloutStrategy = &controlplanev1beta1.RolloutStrategy{} + kcpTemplateV1Beta1, ok := obj.(*controlplanev1beta1.KubeadmControlPlaneTemplate) + if ok { + if kcpTemplateV1Beta1.Spec.Template.Spec.RolloutStrategy == nil { + kcpTemplateV1Beta1.Spec.Template.Spec.RolloutStrategy = &controlplanev1beta1.RolloutStrategy{} + } + if kcpTemplateV1Beta1.Spec.Template.Spec.RolloutStrategy.RollingUpdate == nil { + kcpTemplateV1Beta1.Spec.Template.Spec.RolloutStrategy.RollingUpdate = &controlplanev1beta1.RollingUpdate{} + } + kcpTemplateV1Beta1.Spec.Template.Spec.RolloutStrategy.RollingUpdate.MaxSurge = &kubeadmControlPlaneMaxSurgeIntOrString } - if kcpTemplateV1Beta1.Spec.Template.Spec.RolloutStrategy.RollingUpdate == nil { - kcpTemplateV1Beta1.Spec.Template.Spec.RolloutStrategy.RollingUpdate = &controlplanev1beta1.RollingUpdate{} + + kcpTemplate, ok := obj.(*controlplanev1.KubeadmControlPlaneTemplate) + if ok { + kcpTemplate.Spec.Template.Spec.Rollout.Strategy.Type = controlplanev1.RollingUpdateStrategyType + kcpTemplate.Spec.Template.Spec.Rollout.Strategy.RollingUpdate.MaxSurge = &kubeadmControlPlaneMaxSurgeIntOrString } - kcpTemplateV1Beta1.Spec.Template.Spec.RolloutStrategy.RollingUpdate.MaxSurge = &kubeadmControlPlaneMaxSurgeIntOrString - return nil } - kcpTemplate, ok := obj.(*controlplanev1.KubeadmControlPlaneTemplate) - if ok { - kcpTemplate.Spec.Template.Spec.Rollout.Strategy.Type = controlplanev1.RollingUpdateStrategyType - kcpTemplate.Spec.Template.Spec.Rollout.Strategy.RollingUpdate.MaxSurge = &kubeadmControlPlaneMaxSurgeIntOrString + // 3) Set files + files := []fileVariable{} + err = topologymutation.GetObjectVariableInto(templateVariables, "files", &files) + if err != nil && !topologymutation.IsNotFoundError(err) { + return errors.Wrap(err, "could not set KubeadmControlPlaneTemplate files") + } + if len(files) > 0 { + kcpTemplateV1Beta1, ok := obj.(*controlplanev1beta1.KubeadmControlPlaneTemplate) + if ok { + kcpTemplateV1Beta1.Spec.Template.Spec.KubeadmConfigSpec.Files = append(kcpTemplateV1Beta1.Spec.Template.Spec.KubeadmConfigSpec.Files, + convertToKubeadmConfigV1Beta1Files(files)...) + } + kcpTemplate, ok := obj.(*controlplanev1.KubeadmControlPlaneTemplate) + if ok { + kcpTemplate.Spec.Template.Spec.KubeadmConfigSpec.Files = append(kcpTemplate.Spec.Template.Spec.KubeadmConfigSpec.Files, + convertToKubeadmConfigFiles(files)...) + } } return nil } // patchKubeadmConfigTemplate patches the ControlPlaneTemplate. -func patchKubeadmConfigTemplate(_ context.Context, obj runtime.Object, _ map[string]apiextensionsv1.JSON) { +func patchKubeadmConfigTemplate(_ context.Context, obj runtime.Object, templateVariables map[string]apiextensionsv1.JSON) error { // 1) Set extraArgs switch obj := obj.(type) { case *bootstrapv1beta1.KubeadmConfigTemplate: @@ -261,6 +281,61 @@ func patchKubeadmConfigTemplate(_ context.Context, obj runtime.Object, _ map[str case *bootstrapv1.KubeadmConfigTemplate: obj.Spec.Template.Spec.JoinConfiguration.NodeRegistration.KubeletExtraArgs = append(obj.Spec.Template.Spec.JoinConfiguration.NodeRegistration.KubeletExtraArgs, bootstrapv1.Arg{Name: "v", Value: ptr.To("2")}) } + + files := []fileVariable{} + err := topologymutation.GetObjectVariableInto(templateVariables, "files", &files) + if err != nil && !topologymutation.IsNotFoundError(err) { + return errors.Wrap(err, "could not set KubeadmConfigTemplate files") + } + if len(files) > 0 { + kcpTemplateV1Beta1, ok := obj.(*bootstrapv1beta1.KubeadmConfigTemplate) + if ok { + kcpTemplateV1Beta1.Spec.Template.Spec.Files = append(kcpTemplateV1Beta1.Spec.Template.Spec.Files, + convertToKubeadmConfigV1Beta1Files(files)...) + } + kcpTemplate, ok := obj.(*bootstrapv1.KubeadmConfigTemplate) + if ok { + kcpTemplate.Spec.Template.Spec.Files = append(kcpTemplate.Spec.Template.Spec.Files, + convertToKubeadmConfigFiles(files)...) + } + } + + return nil +} + +type fileVariable struct { + Path string `json:"path,omitempty"` + Content string `json:"content,omitempty"` +} + +func convertToKubeadmConfigV1Beta1Files(files []fileVariable) []bootstrapv1beta1.File { + kubeadmConfigV1Beta1Files := []bootstrapv1beta1.File{} + for _, f := range files { + kubeadmConfigV1Beta1Files = append(kubeadmConfigV1Beta1Files, + bootstrapv1beta1.File{ + Path: f.Path, + Content: f.Content, + Owner: "root:root", + Permissions: "0600", + }, + ) + } + return kubeadmConfigV1Beta1Files +} + +func convertToKubeadmConfigFiles(files []fileVariable) []bootstrapv1.File { + kubeadmConfigFiles := []bootstrapv1.File{} + for _, f := range files { + kubeadmConfigFiles = append(kubeadmConfigFiles, + bootstrapv1.File{ + Path: f.Path, + Content: f.Content, + Owner: "root:root", + Permissions: "0600", + }, + ) + } + return kubeadmConfigFiles } // patchDockerMachineTemplate patches the DockerMachineTemplate. @@ -414,6 +489,26 @@ func (h *ExtensionHandlers) DiscoverVariables(ctx context.Context, _ *runtimehoo }, }, }, + { + Name: "files", + Required: false, + Schema: clusterv1beta1.VariableSchema{ + OpenAPIV3Schema: clusterv1beta1.JSONSchemaProps{ + Type: "array", + Items: &clusterv1beta1.JSONSchemaProps{ + Type: "object", + Properties: map[string]clusterv1beta1.JSONSchemaProps{ + "path": { + Type: "string", + }, + "content": { + Type: "string", + }, + }, + }, + }, + }, + }, // This variable must be set in the Cluster as it has no default value and is required. { Name: "imageRepository", diff --git a/test/extension/main.go b/test/extension/main.go index 3408921a17c3..75bf4456ad7f 100644 --- a/test/extension/main.go +++ b/test/extension/main.go @@ -50,6 +50,7 @@ import ( runtimecatalog "sigs.k8s.io/cluster-api/exp/runtime/catalog" "sigs.k8s.io/cluster-api/exp/runtime/server" "sigs.k8s.io/cluster-api/feature" + "sigs.k8s.io/cluster-api/test/extension/handlers/inplaceupdate" "sigs.k8s.io/cluster-api/test/extension/handlers/lifecycle" "sigs.k8s.io/cluster-api/test/extension/handlers/topologymutation" "sigs.k8s.io/cluster-api/util/flags" @@ -261,6 +262,7 @@ func main() { // Setup Runtime Extensions. setupTopologyMutationHookHandlers(runtimeExtensionWebhookServer) setupLifecycleHookHandlers(mgr, runtimeExtensionWebhookServer) + setupInPlaceUpdateHookHandlers(mgr, runtimeExtensionWebhookServer) // Setup checks, indexes, reconcilers and webhooks. setupChecks(mgr) @@ -399,6 +401,41 @@ func setupLifecycleHookHandlers(mgr ctrl.Manager, runtimeExtensionWebhookServer } } +// setupInPlaceUpdateHookHandlers sets up In-Place Update Hooks. +func setupInPlaceUpdateHookHandlers(mgr ctrl.Manager, runtimeExtensionWebhookServer *server.Server) { + // Create the ExtensionHandlers for the in-place update hooks + // NOTE: it is not mandatory to group all the ExtensionHandlers using a struct, what is important + // is to have HandlerFunc with the signature defined in sigs.k8s.io/cluster-api/api/runtime/hooks/v1alpha1. + inPlaceUpdateExtensionHandlers := inplaceupdate.NewExtensionHandlers(mgr.GetClient()) + + if err := runtimeExtensionWebhookServer.AddExtensionHandler(server.ExtensionHandler{ + Hook: runtimehooksv1.CanUpdateMachine, + Name: "can-update-machine", + HandlerFunc: inPlaceUpdateExtensionHandlers.DoCanUpdateMachine, + }); err != nil { + setupLog.Error(err, "Error adding CanUpdateMachine handler") + os.Exit(1) + } + + if err := runtimeExtensionWebhookServer.AddExtensionHandler(server.ExtensionHandler{ + Hook: runtimehooksv1.CanUpdateMachineSet, + Name: "can-update-machineset", + HandlerFunc: inPlaceUpdateExtensionHandlers.DoCanUpdateMachineSet, + }); err != nil { + setupLog.Error(err, "Error adding CanUpdateMachineSet handler") + os.Exit(1) + } + + if err := runtimeExtensionWebhookServer.AddExtensionHandler(server.ExtensionHandler{ + Hook: runtimehooksv1.UpdateMachine, + Name: "update-machine", + HandlerFunc: inPlaceUpdateExtensionHandlers.DoUpdateMachine, + }); err != nil { + setupLog.Error(err, "Error adding UpdateMachine handler") + os.Exit(1) + } +} + func setupChecks(mgr ctrl.Manager) { if err := mgr.AddReadyzCheck("webhook", mgr.GetWebhookServer().StartedChecker()); err != nil { setupLog.Error(err, "Unable to create ready check") diff --git a/test/framework/cluster_helpers.go b/test/framework/cluster_helpers.go index 535da0dcd342..b32848433449 100644 --- a/test/framework/cluster_helpers.go +++ b/test/framework/cluster_helpers.go @@ -433,34 +433,51 @@ func DescribeAllCluster(ctx context.Context, input DescribeAllClusterInput) { } } -type VerifyClusterAvailableInput struct { - Getter Getter - Name string - Namespace string +type VerifyClusterConditionInput struct { + Getter Getter + Name string + Namespace string + ConditionType string } -// VerifyClusterAvailable verifies that the Cluster's Available condition is set to true. -func VerifyClusterAvailable(ctx context.Context, input VerifyClusterAvailableInput) { +// VerifyClusterCondition verifies that the Cluster's condition is set to true. +func VerifyClusterCondition(ctx context.Context, input VerifyClusterConditionInput) { cluster := &clusterv1.Cluster{} key := client.ObjectKey{ Name: input.Name, Namespace: input.Namespace, } - // Wait for the cluster Available condition to stabilize. + // Wait for the cluster condition to stabilize. Eventually(func(g Gomega) { g.Expect(input.Getter.Get(ctx, key, cluster)).To(Succeed()) - availableConditionFound := false + conditionFound := false for _, condition := range cluster.Status.Conditions { - if condition.Type == clusterv1.AvailableCondition { - availableConditionFound = true - g.Expect(condition.Status).To(Equal(metav1.ConditionTrue), "The Available condition on the Cluster should be set to true; message: %s", condition.Message) - g.Expect(condition.Message).To(BeEmpty(), "The Available condition on the Cluster should have an empty message") + if condition.Type == input.ConditionType { + conditionFound = true + g.Expect(condition.Status).To(Equal(metav1.ConditionTrue), "The %s condition on the Cluster should be set to true; message: %s", condition.Type, condition.Message) + g.Expect(condition.Message).To(BeEmpty(), "The %s condition on the Cluster should have an empty message", condition.Type) break } } - g.Expect(availableConditionFound).To(BeTrue(), "Cluster %q should have an Available condition", input.Name) - }, 5*time.Minute, 10*time.Second).Should(Succeed(), "Failed to verify Cluster Available condition for %s", klog.KRef(input.Namespace, input.Name)) + g.Expect(conditionFound).To(BeTrue(), "Cluster %q should have an %s condition", input.Name, input.ConditionType) + }, 5*time.Minute, 10*time.Second).Should(Succeed(), "Failed to verify Cluster %s condition for %s", input.ConditionType, klog.KRef(input.Namespace, input.Name)) +} + +type VerifyClusterAvailableInput struct { + Getter Getter + Name string + Namespace string +} + +// VerifyClusterAvailable verifies that the Cluster's Available condition is set to true. +func VerifyClusterAvailable(ctx context.Context, input VerifyClusterAvailableInput) { + VerifyClusterCondition(ctx, VerifyClusterConditionInput{ + Getter: input.Getter, + Name: input.Name, + Namespace: input.Namespace, + ConditionType: clusterv1.AvailableCondition, + }) } type VerifyMachinesReadyInput struct { diff --git a/test/go.mod b/test/go.mod index 23d175112a2f..d96bbed6d001 100644 --- a/test/go.mod +++ b/test/go.mod @@ -23,6 +23,7 @@ require ( go.etcd.io/etcd/api/v3 v3.6.5 go.etcd.io/etcd/client/v3 v3.6.5 golang.org/x/net v0.46.0 + gomodules.xyz/jsonpatch/v2 v2.5.0 google.golang.org/grpc v1.72.3 k8s.io/api v0.34.1 k8s.io/apiextensions-apiserver v0.34.1 @@ -159,7 +160,6 @@ require ( golang.org/x/text v0.30.0 // indirect golang.org/x/time v0.9.0 // indirect golang.org/x/tools v0.37.0 // indirect - gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb // indirect google.golang.org/protobuf v1.36.7 // indirect