From cc0d38728b02cc2379a0d129f4b7f186f965cdb4 Mon Sep 17 00:00:00 2001 From: Jan Grant Date: Fri, 7 Jun 2024 10:06:19 +0100 Subject: [PATCH 1/2] Remove the assumption that a node's name == its hostname As per https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#built-in-node-labels [[[ Note: The value of these labels is cloud provider specific and is not guaranteed to be reliable. For example, the value of kubernetes.io/hostname may be the same as the node name in some environments and a different value in other environments. ]]] --- provisioner.go | 61 ++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/provisioner.go b/provisioner.go index 9f774cc59..b982b0739 100644 --- a/provisioner.go +++ b/provisioner.go @@ -422,20 +422,16 @@ func (p *LocalPathProvisioner) provisionFor(opts pvController.ProvisionOptions, // affinity, as path is accessible from any node nodeAffinity = nil } else { - valueNode, ok := node.GetLabels()[KeyNode] - if !ok { - valueNode = nodeName - } nodeAffinity = &v1.VolumeNodeAffinity{ Required: &v1.NodeSelector{ NodeSelectorTerms: []v1.NodeSelectorTerm{ { - MatchExpressions: []v1.NodeSelectorRequirement{ + MatchFields: []v1.NodeSelectorRequirement{ { - Key: KeyNode, + Key: metav1.ObjectNameField, Operator: v1.NodeSelectorOpIn, Values: []string{ - valueNode, + node.Name, }, }, }, @@ -475,7 +471,7 @@ func (p *LocalPathProvisioner) deleteFor(pv *v1.PersistentVolume, c *StorageClas err = errors.Wrapf(err, "failed to delete volume %v", pv.Name) }() - path, node, err := p.getPathAndNodeForPV(pv, c) + path, node, nodeLabels, err := p.getPathAndNodeForPV(pv, c) if err != nil { return err } @@ -498,6 +494,7 @@ func (p *LocalPathProvisioner) deleteFor(pv *v1.PersistentVolume, c *StorageClas Mode: *pv.Spec.VolumeMode, SizeInBytes: storage.Value(), Node: node, + NodeLabels: nodeLabels, }, c); err != nil { logrus.Infof("clean up volume %v failed: %v", pv.Name, err) return err @@ -508,7 +505,7 @@ func (p *LocalPathProvisioner) deleteFor(pv *v1.PersistentVolume, c *StorageClas return nil } -func (p *LocalPathProvisioner) getPathAndNodeForPV(pv *v1.PersistentVolume, cfg *StorageClassConfig) (path, node string, err error) { +func (p *LocalPathProvisioner) getPathAndNodeForPV(pv *v1.PersistentVolume, cfg *StorageClassConfig) (path, node string, nodeLabels map[string]string, err error) { defer func() { err = errors.Wrapf(err, "failed to delete volume %v", pv.Name) }() @@ -519,49 +516,55 @@ func (p *LocalPathProvisioner) getPathAndNodeForPV(pv *v1.PersistentVolume, cfg } else if volumeSource.Local != nil && volumeSource.HostPath == nil { path = volumeSource.Local.Path } else { - return "", "", fmt.Errorf("no path set") + return "", "", nil, fmt.Errorf("no path set") } sharedFS, err := p.isSharedFilesystem(cfg) if err != nil { - return "", "", err + return "", "", nil, err } if sharedFS { // We don't have affinity and can use any node - return path, "", nil + return path, "", nil, nil } // Dealing with local filesystem nodeAffinity := pv.Spec.NodeAffinity if nodeAffinity == nil { - return "", "", fmt.Errorf("no NodeAffinity set") + return "", "", nil, fmt.Errorf("no NodeAffinity set") } required := nodeAffinity.Required if required == nil { - return "", "", fmt.Errorf("no NodeAffinity.Required set") + return "", "", nil, fmt.Errorf("no NodeAffinity.Required set") } - node = "" + // If we have an explicit node, use that; otherwise use the selector. + for _, selectorTerm := range required.NodeSelectorTerms { + for _, expression := range selectorTerm.MatchFields { + if expression.Key == metav1.ObjectNameField && expression.Operator == v1.NodeSelectorOpIn { + if len(expression.Values) != 1 { + return "", "", nil, fmt.Errorf("multiple values for the node affinity") + } + return path, expression.Values[0], nil, nil + } + } + } + // The scheduler must use the PV's node selector to schedule a helper pod. for _, selectorTerm := range required.NodeSelectorTerms { for _, expression := range selectorTerm.MatchExpressions { if expression.Key == KeyNode && expression.Operator == v1.NodeSelectorOpIn { if len(expression.Values) != 1 { - return "", "", fmt.Errorf("multiple values for the node affinity") + return "", "", nil, fmt.Errorf("multiple values for the node affinity") } - node = expression.Values[0] - break + return path, "", map[string]string{ + KeyNode: expression.Values[0], + }, nil } } - if node != "" { - break - } } - if node == "" { - return "", "", fmt.Errorf("cannot find affinited node") - } - return path, node, nil + return "", "", nil, fmt.Errorf("cannot find affinited node") } type volumeOptions struct { @@ -570,6 +573,7 @@ type volumeOptions struct { Mode v1.PersistentVolumeMode SizeInBytes int64 Node string + NodeLabels map[string]string } func (p *LocalPathProvisioner) createHelperPod(action ActionType, cmd []string, o volumeOptions, cfg *StorageClassConfig) (err error) { @@ -580,7 +584,7 @@ func (p *LocalPathProvisioner) createHelperPod(action ActionType, cmd []string, if err != nil { return err } - if o.Name == "" || o.Path == "" || (!sharedFS && o.Node == "") { + if o.Name == "" || o.Path == "" || (!sharedFS && o.Node == "" && o.NodeLabels == nil) { return fmt.Errorf("invalid empty name or path or node") } if !filepath.IsAbs(o.Path) { @@ -661,9 +665,8 @@ func (p *LocalPathProvisioner) createHelperPod(action ActionType, cmd []string, helperPod.Name = helperPod.Name[:HelperPodNameMaxLength] } helperPod.Namespace = p.namespace - if o.Node != "" { - helperPod.Spec.NodeName = o.Node - } + helperPod.Spec.NodeName = o.Node + helperPod.Spec.NodeSelector = o.NodeLabels helperPod.Spec.ServiceAccountName = p.serviceAccountName helperPod.Spec.RestartPolicy = v1.RestartPolicyNever helperPod.Spec.Tolerations = append(helperPod.Spec.Tolerations, lpvTolerations...) From 199ec0a49c6722c9bce811b4683600d96fd05abf Mon Sep 17 00:00:00 2001 From: Jan Grant Date: Mon, 10 Jun 2024 13:13:20 +0100 Subject: [PATCH 2/2] e2e: confirm the cleanup of PVs with legacy affinity attributes This applies a small refactor to the e2e tests to ensure that the newer provisioner is capable of siting helper pods correctly to clean up PVs with "legacy" affinity constraints. The kind cluster itself is reconfigured to ensure that all nodes have `metadata.name` != `metadata.labels["kubernetes.io/hostname"]`, which is an assumption that does not hold for many cloud providers. --- test/pod_test.go | 51 ++++++++++++------- test/testdata/kind-cluster.yaml | 4 ++ .../pod-with-node-affinity/patch.yaml | 2 +- .../kustomization.yaml | 10 ++++ test/testdata/pv-with-legacy-affinity/pv.yaml | 38 ++++++++++++++ test/util.go | 2 +- 6 files changed, 86 insertions(+), 21 deletions(-) create mode 100644 test/testdata/pv-with-legacy-affinity/kustomization.yaml create mode 100644 test/testdata/pv-with-legacy-affinity/pv.yaml diff --git a/test/pod_test.go b/test/pod_test.go index 5a69d7e7e..fa829d671 100644 --- a/test/pod_test.go +++ b/test/pod_test.go @@ -82,38 +82,38 @@ func TestPVCTestSuite(t *testing.T) { func (p *PodTestSuite) TestPodWithHostPathVolume() { p.kustomizeDir = "pod" - runTest(p, []string{p.config.IMAGE}, "ready", hostPathVolumeType) + runTest(p, []string{p.config.IMAGE}, waitCondition("ready"), hostPathVolumeType) } func (p *PodTestSuite) TestPodWithLocalVolume() { p.kustomizeDir = "pod-with-local-volume" - runTest(p, []string{p.config.IMAGE}, "ready", localVolumeType) + runTest(p, []string{p.config.IMAGE}, waitCondition("ready"), localVolumeType) } func (p *PodTestSuite) TestPodWithLocalVolumeDefault() { p.kustomizeDir = "pod-with-default-local-volume" - runTest(p, []string{p.config.IMAGE}, "ready", localVolumeType) + runTest(p, []string{p.config.IMAGE}, waitCondition("ready"), localVolumeType) } func (p *PodTestSuite) TestPodWithNodeAffinity() { p.kustomizeDir = "pod-with-node-affinity" - runTest(p, []string{p.config.IMAGE}, "ready", hostPathVolumeType) + runTest(p, []string{p.config.IMAGE}, waitCondition("ready"), hostPathVolumeType) } func (p *PodTestSuite) TestPodWithRWOPVolume() { p.kustomizeDir = "pod-with-rwop-volume" - runTest(p, []string{p.config.IMAGE}, "ready", localVolumeType) + runTest(p, []string{p.config.IMAGE}, waitCondition("ready"), localVolumeType) } func (p *PodTestSuite) TestPodWithSecurityContext() { p.kustomizeDir = "pod-with-security-context" kustomizeDir := testdataFile(p.kustomizeDir) - runTest(p, []string{p.config.IMAGE}, "podscheduled", hostPathVolumeType) + runTest(p, []string{p.config.IMAGE}, waitCondition("podscheduled"), hostPathVolumeType) cmd := fmt.Sprintf(`kubectl get pod -l %s=%s -o=jsonpath='{.items[0].status.conditions[?(@.type=="Ready")].reason}'`, LabelKey, LabelValue) @@ -142,22 +142,33 @@ loop: func (p *PodTestSuite) TestPodWithSubpath() { p.kustomizeDir = "pod-with-subpath" - runTest(p, []string{p.config.IMAGE}, "ready", hostPathVolumeType) + runTest(p, []string{p.config.IMAGE}, waitCondition("ready"), hostPathVolumeType) } func (p *PodTestSuite) TestPodWithMultipleStorageClasses() { p.kustomizeDir = "multiple-storage-classes" - runTest(p, []string{p.config.IMAGE}, "ready", hostPathVolumeType) + runTest(p, []string{p.config.IMAGE}, waitCondition("ready"), hostPathVolumeType) } func (p *PodTestSuite) TestPodWithCustomPathPatternStorageClasses() { p.kustomizeDir = "custom-path-pattern" - runTest(p, []string{p.config.IMAGE}, "ready", hostPathVolumeType) + runTest(p, []string{p.config.IMAGE}, waitCondition("ready"), hostPathVolumeType) } -func runTest(p *PodTestSuite, images []string, waitCondition, volumeType string) { +func (p *PodTestSuite) TestPodWithLegacyAffinityConstraint() { + // The helper pod should be correctly scheduled + p.kustomizeDir = "pv-with-legacy-affinity" + + runTest(p, []string{p.config.IMAGE}, "kubectl wait pv pvc-to-clean-up --for delete --timeout=120s", "") +} + +func waitCondition(waitCondition string) string { + return fmt.Sprintf("kubectl wait pod -l %s=%s --for condition=%s --timeout=120s", LabelKey, LabelValue, waitCondition) +} + +func runTest(p *PodTestSuite, images []string, waitCmd, volumeType string) { kustomizeDir := testdataFile(p.kustomizeDir) var cmds []string @@ -171,7 +182,7 @@ func runTest(p *PodTestSuite, images []string, waitCondition, volumeType string) cmds, fmt.Sprintf("kustomize edit add label %s:%s -f", LabelKey, LabelValue), "kustomize build | kubectl apply -f -", - fmt.Sprintf("kubectl wait pod -l %s=%s --for condition=%s --timeout=120s", LabelKey, LabelValue, waitCondition), + waitCmd, ) for _, cmd := range cmds { @@ -188,13 +199,15 @@ func runTest(p *PodTestSuite, images []string, waitCondition, volumeType string) } } - typeCheckCmd := fmt.Sprintf("kubectl get pv $(%s) -o jsonpath='{.spec.%s}'", "kubectl get pv -o jsonpath='{.items[0].metadata.name}'", volumeType) - c := createCmd(p.T(), typeCheckCmd, kustomizeDir, p.config.envs(), nil) - typeCheckOutput, err := c.CombinedOutput() - if err != nil { - p.FailNow("", "failed to check volume type: %v", err) - } - if len(typeCheckOutput) == 0 || !strings.Contains(string(typeCheckOutput), "path") { - p.FailNow("volume Type not correct") + if volumeType != "" { + typeCheckCmd := fmt.Sprintf("kubectl get pv $(%s) -o jsonpath='{.spec.%s}'", "kubectl get pv -o jsonpath='{.items[0].metadata.name}'", volumeType) + c := createCmd(p.T(), typeCheckCmd, kustomizeDir, p.config.envs(), nil) + typeCheckOutput, err := c.CombinedOutput() + if err != nil { + p.FailNow("", "failed to check volume type: %v", err) + } + if len(typeCheckOutput) == 0 || !strings.Contains(string(typeCheckOutput), "path") { + p.FailNow("volume Type not correct") + } } } diff --git a/test/testdata/kind-cluster.yaml b/test/testdata/kind-cluster.yaml index 5d48018e1..9d1fb8ace 100644 --- a/test/testdata/kind-cluster.yaml +++ b/test/testdata/kind-cluster.yaml @@ -3,4 +3,8 @@ kind: Cluster nodes: - role: control-plane - role: worker + labels: + kubernetes.io/hostname: kind-worker1.hostname - role: worker + labels: + kubernetes.io/hostname: kind-worker2.hostname diff --git a/test/testdata/pod-with-node-affinity/patch.yaml b/test/testdata/pod-with-node-affinity/patch.yaml index 204d775d3..efbf3d19a 100644 --- a/test/testdata/pod-with-node-affinity/patch.yaml +++ b/test/testdata/pod-with-node-affinity/patch.yaml @@ -11,4 +11,4 @@ spec: - key: kubernetes.io/hostname operator: In values: - - kind-worker + - kind-worker1.hostname diff --git a/test/testdata/pv-with-legacy-affinity/kustomization.yaml b/test/testdata/pv-with-legacy-affinity/kustomization.yaml new file mode 100644 index 000000000..b0f1729c7 --- /dev/null +++ b/test/testdata/pv-with-legacy-affinity/kustomization.yaml @@ -0,0 +1,10 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- ../../../deploy +- pv.yaml +commonLabels: + app: local-path-provisioner +images: +- name: rancher/local-path-provisioner + newTag: dev \ No newline at end of file diff --git a/test/testdata/pv-with-legacy-affinity/pv.yaml b/test/testdata/pv-with-legacy-affinity/pv.yaml new file mode 100644 index 000000000..e13781c27 --- /dev/null +++ b/test/testdata/pv-with-legacy-affinity/pv.yaml @@ -0,0 +1,38 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + annotations: + local.path.provisioner/selected-node: kind-worker + pv.kubernetes.io/provisioned-by: rancher.io/local-path + finalizers: + - kubernetes.io/pv-protection + labels: + test/avoid-cleanup: "true" + name: pvc-to-clean-up +spec: + accessModes: + - ReadWriteOnce + capacity: + storage: 100Mi + hostPath: + path: /opt/local-path-provisioner/default/local-path-pvc + type: DirectoryOrCreate + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - kind-worker1.hostname + claimRef: + apiVersion: v1 + kind: PersistentVolumeClaim + name: no-such-pvc + namespace: default + # The PVC "definitely doesn't exist any more" + resourceVersion: "1" + uid: 12345678-1234-5678-9abc-123456789abc + persistentVolumeReclaimPolicy: Delete + storageClassName: local-path-custom-path-pattern + volumeMode: Filesystem diff --git a/test/util.go b/test/util.go index 7aedec0b6..b20cebef7 100644 --- a/test/util.go +++ b/test/util.go @@ -78,7 +78,7 @@ func testdataFile(fields ...string) string { func deleteKustomizeDeployment(t *testing.T, kustomizeDir string, envs []string) error { _, err := runCmd( t, - "kustomize build | kubectl delete --timeout=180s -f -", + "kustomize build | kubectl delete --timeout=180s -f - -l 'test/avoid-cleanup!=true'", testdataFile(kustomizeDir), envs, nil,