From f3b1d5f56b988cac03c9baaad5356e086776fff8 Mon Sep 17 00:00:00 2001 From: enxebre Date: Fri, 10 Jan 2025 00:04:45 +0100 Subject: [PATCH] Allow karpenter to set arbitrary k8s labels on NodeClaim/Nodes nodeClaim Labels are the source for core karpenter to sync Node Labels in a centralized fashion. Some bootstrap userdata provider implementations also consume this labels and pass them through kubelet self setting. That results in a coupling between a centralized and a kubelet self setting approach. This coupling results in conflicting criteria for validations and degraded UX. This PR removes the coupling. As a consequence, Node Labels are not unncessarily restricted for the centralized sync anymore. This better empowers administrators reducing the reliance on self setting kubelet and minimizing the risk of a Node steering privileged workloads to itself. A subset of labels, excluding those disallowed by the node restriction admission, is now stored in json within the "karpenter.sh/node-restricted-labels" NodeClaim annotation. This enables bootstrap userdata providers to continue utilizing the labels if needed. --- pkg/apis/v1/labels.go | 77 +++++++++++++------ pkg/controllers/nodeclaim/lifecycle/launch.go | 15 +++- pkg/scheduling/requirements.go | 25 +++++- 3 files changed, 87 insertions(+), 30 deletions(-) diff --git a/pkg/apis/v1/labels.go b/pkg/apis/v1/labels.go index 9364936dfe..63f19355c1 100644 --- a/pkg/apis/v1/labels.go +++ b/pkg/apis/v1/labels.go @@ -58,22 +58,17 @@ const ( ) var ( - // RestrictedLabelDomains are either prohibited by the kubelet or reserved by karpenter + // RestrictedLabelDomains are reserved by karpenter RestrictedLabelDomains = sets.New( - "kubernetes.io", - "k8s.io", apis.Group, ) - // LabelDomainExceptions are sub-domains of the RestrictedLabelDomains but allowed because - // they are not used in a context where they may be passed as argument to kubelet. - LabelDomainExceptions = sets.New( - "kops.k8s.io", - v1.LabelNamespaceSuffixNode, - v1.LabelNamespaceNodeRestriction, + K8sLabelDomains = sets.New( + "kubernetes.io", + "k8s.io", ) - // WellKnownLabels are labels that belong to the RestrictedLabelDomains but allowed. + // WellKnownLabels are labels that belong to the RestrictedLabelDomains or K8sLabelDomains but allowed. // Karpenter is aware of these labels, and they can be used to further narrow down // the range of the corresponding values by either nodepool or pods. WellKnownLabels = sets.New( @@ -104,38 +99,72 @@ var ( } ) -// IsRestrictedLabel returns an error if the label is restricted. +// IsRestrictedLabel is used for runtime validation of requirements. +// Returns an error if the label is restricted. E.g. using .karpenter.sh suffix. func IsRestrictedLabel(key string) error { if WellKnownLabels.Has(key) { return nil } - if IsRestrictedNodeLabel(key) { - return fmt.Errorf("label %s is restricted; specify a well known label: %v, or a custom label that does not use a restricted domain: %v", key, sets.List(WellKnownLabels), sets.List(RestrictedLabelDomains)) + + labelDomain := GetLabelDomain(key) + for restrictedLabelDomain := range RestrictedLabelDomains { + if labelDomain == restrictedLabelDomain || strings.HasSuffix(labelDomain, "."+restrictedLabelDomain) { + return fmt.Errorf("using label %s is not allowed as it might interfere with the internal provisioning logic; specify a well known label: %v, or a custom label that does not use a restricted domain: %v", key, sets.List(WellKnownLabels), sets.List(RestrictedLabelDomains)) + } + } + + if RestrictedLabels.Has(key) { + return fmt.Errorf("using label %s is not allowed as it might interfere with the internal provisioning logic; specify a well known label: %v, or a custom label that does not use a restricted domain: %v", key, sets.List(WellKnownLabels), sets.List(RestrictedLabelDomains)) } + return nil } -// IsRestrictedNodeLabel returns true if a node label should not be injected by Karpenter. -// They are either known labels that will be injected by cloud providers, -// or label domain managed by other software (e.g., kops.k8s.io managed by kOps). -func IsRestrictedNodeLabel(key string) bool { +// IsValidLabelToSync returns true if the label key is allowed to be synced to the Node object centrally by Karpenter. +func IsValidToSyncCentrallyLabel(key string) bool { + // TODO(enxebre): consider this to be configurable with runtime flag. + notValidToSyncLabel := WellKnownLabels + + return !notValidToSyncLabel.Has(key) +} + +// IsKubeletLabel returns true if the label key is one that kubelets are allowed to set on their own Node object. +// This function is similar the one used by the node restriction admission https://github.com/kubernetes/kubernetes/blob/e319c541f144e9bee6160f1dd8671638a9029f4c/staging/src/k8s.io/kubelet/pkg/apis/well_known_labels.go#L67 +// but karpenter also restricts the known labels to be passed to kubelet. Only the kubeletLabelNamespaces are allowed. +func IsKubeletLabel(key string) bool { if WellKnownLabels.Has(key) { + return false + } + + if !isKubernetesLabel(key) { return true } - labelDomain := GetLabelDomain(key) - for exceptionLabelDomain := range LabelDomainExceptions { - if strings.HasSuffix(labelDomain, exceptionLabelDomain) { - return false + + namespace := GetLabelDomain(key) + for allowedNamespace := range kubeletLabelNamespaces { + if namespace == allowedNamespace || strings.HasSuffix(namespace, "."+allowedNamespace) { + return true } } - for restrictedLabelDomain := range RestrictedLabelDomains { - if strings.HasSuffix(labelDomain, restrictedLabelDomain) { + + return false +} + +func isKubernetesLabel(key string) bool { + for k8sDomain := range K8sLabelDomains { + if key == k8sDomain || strings.HasSuffix(key, "."+k8sDomain) { return true } } - return RestrictedLabels.Has(key) + + return false } +var kubeletLabelNamespaces = sets.NewString( + v1.LabelNamespaceSuffixKubelet, + v1.LabelNamespaceSuffixNode, +) + func GetLabelDomain(key string) string { if parts := strings.SplitN(key, "/", 2); len(parts) == 2 { return parts[0] diff --git a/pkg/controllers/nodeclaim/lifecycle/launch.go b/pkg/controllers/nodeclaim/lifecycle/launch.go index 381b530562..2cde07e2bf 100644 --- a/pkg/controllers/nodeclaim/lifecycle/launch.go +++ b/pkg/controllers/nodeclaim/lifecycle/launch.go @@ -18,6 +18,7 @@ package lifecycle import ( "context" + "encoding/json" "errors" "fmt" @@ -28,6 +29,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/karpenter/pkg/apis" v1 "sigs.k8s.io/karpenter/pkg/apis/v1" "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/events" @@ -124,10 +126,19 @@ func PopulateNodeClaimDetails(nodeClaim, retrieved *v1.NodeClaim) *v1.NodeClaim // or the static nodeClaim labels nodeClaim.Labels = lo.Assign( retrieved.Labels, // CloudProvider-resolved labels - scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...).Labels(), // Single-value requirement resolved labels + scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...).Labels(), // Single-value requirement resolved labels that are synced to the Node object centrally by Karpenter. nodeClaim.Labels, // User-defined labels ) - nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, retrieved.Annotations) + + // We store labels compliant with the node restriction admission as an annotation on the NodeClaim + // so that bootstrap provider implementation can use them for kubelet labels. + kubeletLabels := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...).KubeletLabels() + json, err := json.Marshal(kubeletLabels) + if err != nil { + panic(err) + } + kubeletLabelsAnnotation := map[string]string{apis.Group + "/node-restricted-labels": string(json)} + nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, retrieved.Annotations, kubeletLabelsAnnotation) nodeClaim.Status.ProviderID = retrieved.Status.ProviderID nodeClaim.Status.ImageID = retrieved.Status.ImageID nodeClaim.Status.Allocatable = retrieved.Status.Allocatable diff --git a/pkg/scheduling/requirements.go b/pkg/scheduling/requirements.go index 87c36179c4..bcbbccbc90 100644 --- a/pkg/scheduling/requirements.go +++ b/pkg/scheduling/requirements.go @@ -303,13 +303,30 @@ func (r Requirements) Intersects(requirements Requirements) (errs error) { return errs } +// Labels filters out labels from the requirements that are not allowed to be synced centrally by Karpenter to the Node. func (r Requirements) Labels() map[string]string { labels := map[string]string{} for key, requirement := range r { - if !v1.IsRestrictedNodeLabel(key) { - if value := requirement.Any(); value != "" { - labels[key] = value - } + if !v1.IsValidToSyncCentrallyLabel(key) { + continue + } + if value := requirement.Any(); value != "" { + labels[key] = value + } + } + return labels +} + +// KubeletLabels filters out labels from the requirements that will be rejected by the node restriction admission. +// Bootstrap implementations can choose to pass the resulting list to the kubelet. +func (r Requirements) KubeletLabels() map[string]string { + labels := map[string]string{} + for key, requirement := range r { + if !v1.IsKubeletLabel(key) { + continue + } + if value := requirement.Any(); value != "" { + labels[key] = value } } return labels