diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go index a8c995422ef..b9122cb5b3f 100644 --- a/cluster-autoscaler/config/autoscaling_options.go +++ b/cluster-autoscaler/config/autoscaling_options.go @@ -360,6 +360,7 @@ type AutoscalingOptions struct { MaxNodeSkipEvalTimeTrackerEnabled bool // NodeRemovalLatencyTrackingEnabled is used to enable/disable node removal latency tracking. NodeRemovalLatencyTrackingEnabled bool + CapacityQuotasEnabled bool } // KubeClientOptions specify options for kube client diff --git a/cluster-autoscaler/config/flags/flags.go b/cluster-autoscaler/config/flags/flags.go index 364c35d030d..b695efdea6b 100644 --- a/cluster-autoscaler/config/flags/flags.go +++ b/cluster-autoscaler/config/flags/flags.go @@ -237,6 +237,7 @@ var ( capacitybufferPodInjectionEnabled = flag.Bool("capacity-buffer-pod-injection-enabled", false, "Whether to enable pod list processor that processes ready capacity buffers and injects fake pods accordingly") nodeRemovalLatencyTrackingEnabled = flag.Bool("node-removal-latency-tracking-enabled", false, "Whether to track latency from when an unneeded node is eligible for scale down until it is removed or needed again.") maxNodeSkipEvalTimeTrackerEnabled = flag.Bool("max-node-skip-eval-time-tracker-enabled", false, "Whether to enable the tracking of the maximum time of node being skipped during ScaleDown") + capacityQuotasEnabled = flag.Bool("capacity-quotas-enabled", false, "Whether to enable CapacityQuota CRD support.") // Deprecated flags ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group (Deprecated, use startup-taints instead)") @@ -434,6 +435,7 @@ func createAutoscalingOptions() config.AutoscalingOptions { CapacitybufferPodInjectionEnabled: *capacitybufferPodInjectionEnabled, NodeRemovalLatencyTrackingEnabled: *nodeRemovalLatencyTrackingEnabled, MaxNodeSkipEvalTimeTrackerEnabled: *maxNodeSkipEvalTimeTrackerEnabled, + CapacityQuotasEnabled: *capacityQuotasEnabled, } } diff --git a/cluster-autoscaler/core/autoscaler.go b/cluster-autoscaler/core/autoscaler.go index 3a5b3b62d5a..f18ecbf56ac 100644 --- a/cluster-autoscaler/core/autoscaler.go +++ b/cluster-autoscaler/core/autoscaler.go @@ -17,9 +17,11 @@ limitations under the License. package core import ( + "context" "strings" "time" + cqv1alpha1 "k8s.io/autoscaler/cluster-autoscaler/apis/capacityquota/autoscaling.x-k8s.io/v1alpha1" cloudBuilder "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/builder" ca_context "k8s.io/autoscaler/cluster-autoscaler/context" coreoptions "k8s.io/autoscaler/cluster-autoscaler/core/options" @@ -30,6 +32,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/observers/loopstart" ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors" "k8s.io/autoscaler/cluster-autoscaler/resourcequotas" + "k8s.io/autoscaler/cluster-autoscaler/resourcequotas/capacityquota" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/predicate" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules" @@ -146,8 +149,16 @@ func initializeDefaultOptions(opts *coreoptions.AutoscalerOptions, informerFacto opts.ExpanderStrategy = expanderStrategy } if opts.QuotasTrackerOptions.QuotaProvider == nil { - cloudQuotasProvider := resourcequotas.NewCloudQuotasProvider(opts.CloudProvider) - opts.QuotasTrackerOptions.QuotaProvider = resourcequotas.NewCombinedQuotasProvider([]resourcequotas.Provider{cloudQuotasProvider}) + providers := []resourcequotas.Provider{resourcequotas.NewCloudQuotasProvider(opts.CloudProvider)} + + if opts.CapacityQuotasEnabled { + // register informer here to disable lazy initialization + if _, err := opts.KubeCache.GetInformer(context.TODO(), &cqv1alpha1.CapacityQuota{}); err != nil { + return err + } + providers = append(providers, capacityquota.NewCapacityQuotasProvider(opts.KubeClientNew)) + } + opts.QuotasTrackerOptions.QuotaProvider = resourcequotas.NewCombinedQuotasProvider(providers) } if opts.QuotasTrackerOptions.CustomResourcesProcessor == nil { opts.QuotasTrackerOptions.CustomResourcesProcessor = opts.Processors.CustomResourcesProcessor diff --git a/cluster-autoscaler/core/options/autoscaler.go b/cluster-autoscaler/core/options/autoscaler.go index 2be37c86547..f07bf301019 100644 --- a/cluster-autoscaler/core/options/autoscaler.go +++ b/cluster-autoscaler/core/options/autoscaler.go @@ -36,6 +36,8 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/utils/backoff" "k8s.io/client-go/informers" kube_client "k8s.io/client-go/kubernetes" + "sigs.k8s.io/controller-runtime/pkg/cache" + "sigs.k8s.io/controller-runtime/pkg/client" ) // AutoscalerOptions is the whole set of options for configuring an autoscaler @@ -59,4 +61,6 @@ type AutoscalerOptions struct { DrainabilityRules rules.Rules DraProvider *draprovider.Provider QuotasTrackerOptions resourcequotas.TrackerOptions + KubeClientNew client.Client + KubeCache cache.Cache } diff --git a/cluster-autoscaler/go.mod b/cluster-autoscaler/go.mod index 2d826c6f45b..07aae7c501a 100644 --- a/cluster-autoscaler/go.mod +++ b/cluster-autoscaler/go.mod @@ -66,6 +66,7 @@ require ( k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 sigs.k8s.io/cloud-provider-azure v1.29.4 sigs.k8s.io/cloud-provider-azure/pkg/azclient v0.0.13 + sigs.k8s.io/controller-runtime v0.22.4 sigs.k8s.io/yaml v1.6.0 ) @@ -131,6 +132,7 @@ require ( github.com/docker/go-units v0.5.0 // indirect github.com/emicklei/go-restful/v3 v3.12.2 // indirect github.com/euank/go-kmsg-parser v2.0.0+incompatible // indirect + github.com/evanphx/json-patch/v5 v5.9.11 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/fxamacker/cbor/v2 v2.9.0 // indirect @@ -161,7 +163,7 @@ require ( github.com/hashicorp/go-cleanhttp v0.5.2 // indirect github.com/hashicorp/go-retryablehttp v0.7.7 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/jessevdk/go-flags v1.4.1-0.20181029123624-5de817a9aa20 // indirect + github.com/jessevdk/go-flags v1.6.1 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/karrick/godirwalk v1.17.0 // indirect github.com/kylelemons/godebug v1.1.0 // indirect @@ -221,6 +223,7 @@ require ( golang.org/x/text v0.29.0 // indirect golang.org/x/time v0.9.0 // indirect golang.org/x/tools v0.36.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a // indirect gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect diff --git a/cluster-autoscaler/go.sum b/cluster-autoscaler/go.sum index 6b4201d410d..6e28d481b31 100644 --- a/cluster-autoscaler/go.sum +++ b/cluster-autoscaler/go.sum @@ -195,6 +195,10 @@ github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1m github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/euank/go-kmsg-parser v2.0.0+incompatible h1:cHD53+PLQuuQyLZeriD1V/esuG4MuU0Pjs5y6iknohY= github.com/euank/go-kmsg-parser v2.0.0+incompatible/go.mod h1:MhmAMZ8V4CYH4ybgdRwPr2TU5ThnS43puaKEMpja1uw= +github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls= +github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= +github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= @@ -280,6 +284,8 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6 h1:EEHtgt9IwisQ2AZ4pIsMjahcegHh6rmhqxzIRQIyepY= github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6/go.mod h1:I6V7YzU0XDpsHqbsyrghnFZLO1gwK6NPTNvmetQIk9U= github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o= @@ -309,8 +315,8 @@ github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISH github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/jessevdk/go-flags v1.4.1-0.20181029123624-5de817a9aa20 h1:dAOsPLhnBzIyxu0VvmnKjlNcIlgMK+erD6VRHDtweMI= -github.com/jessevdk/go-flags v1.4.1-0.20181029123624-5de817a9aa20/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= +github.com/jessevdk/go-flags v1.6.1 h1:Cvu5U8UGrLay1rZfv/zP7iLpSHGUZ/Ou68T0iX1bBK4= +github.com/jessevdk/go-flags v1.6.1/go.mod h1:Mk8T1hIAWpOiJiHa9rJASDK2UGWji0EuPGBnNLMooyc= github.com/jmattheis/goverter v1.4.0 h1:SrboBYMpGkj1XSgFhWwqzdP024zIa1+58YzUm+0jcBE= github.com/jmattheis/goverter v1.4.0/go.mod h1:iVIl/4qItWjWj2g3vjouGoYensJbRqDHpzlEVMHHFeY= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= @@ -623,6 +629,8 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= +gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= google.golang.org/api v0.151.0 h1:FhfXLO/NFdJIzQtCqjpysWwqKk8AzGWBUhMIx67cVDU= google.golang.org/api v0.151.0/go.mod h1:ccy+MJ6nrYFgE3WgRx/AMXOxOmU8Q4hSa+jjibzhxcg= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= @@ -735,6 +743,8 @@ sigs.k8s.io/cloud-provider-azure/pkg/azclient v0.0.13 h1:dxpo41/N6m2R//9fmqKgqYZ sigs.k8s.io/cloud-provider-azure/pkg/azclient v0.0.13/go.mod h1:tN2BDTM6RDyQsae6JRvaaA14LVxDsRaLU3Ea2MRUBjg= sigs.k8s.io/cloud-provider-azure/pkg/azclient/configloader v0.0.4 h1:CFMHYo6/OQpLTycJGQIze2pchNeJQ7L2TQC6fDo4JGY= sigs.k8s.io/cloud-provider-azure/pkg/azclient/configloader v0.0.4/go.mod h1:PvXgFxPcfve6yBiWNIO/fqAMvGVC9W7qN6M2vIj4zmY= +sigs.k8s.io/controller-runtime v0.22.4 h1:GEjV7KV3TY8e+tJ2LCTxUTanW4z/FmNB7l327UfMq9A= +sigs.k8s.io/controller-runtime v0.22.4/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 123881daae2..d49b6424604 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -26,7 +26,9 @@ import ( "time" "github.com/spf13/pflag" - + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + cqv1alpha1 "k8s.io/autoscaler/cluster-autoscaler/apis/capacityquota/autoscaling.x-k8s.io/v1alpha1" capacityclient "k8s.io/autoscaler/cluster-autoscaler/capacitybuffer/client" "k8s.io/autoscaler/cluster-autoscaler/capacitybuffer/common" "k8s.io/autoscaler/cluster-autoscaler/config/flags" @@ -41,7 +43,12 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" "k8s.io/kubernetes/pkg/features" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/cache" + "sigs.k8s.io/controller-runtime/pkg/manager" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -85,6 +92,16 @@ import ( "k8s.io/klog/v2" ) +var ( + scheme = runtime.NewScheme() +) + +func init() { + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(cqv1alpha1.AddToScheme(scheme)) + // TODO: add other CRDs +} + func registerSignalHandlers(autoscaler core.Autoscaler) { sigs := make(chan os.Signal, 1) signal.Notify(sigs, os.Interrupt, os.Kill, syscall.SIGTERM, syscall.SIGQUIT) @@ -100,7 +117,7 @@ func registerSignalHandlers(autoscaler core.Autoscaler) { }() } -func buildAutoscaler(ctx context.Context, debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter) (core.Autoscaler, *loop.LoopTrigger, error) { +func buildAutoscaler(ctx context.Context, debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter, mgr manager.Manager) (core.Autoscaler, *loop.LoopTrigger, error) { // Get AutoscalingOptions from flags. autoscalingOptions := flags.AutoscalingOptions() @@ -133,6 +150,8 @@ func buildAutoscaler(ctx context.Context, debuggingSnapshotter debuggingsnapshot DeleteOptions: deleteOptions, DrainabilityRules: drainabilityRules, ScaleUpOrchestrator: orchestrator.New(), + KubeClientNew: mgr.GetClient(), + KubeCache: mgr.GetCache(), } opts.Processors = ca_processors.DefaultProcessors(autoscalingOptions) @@ -304,12 +323,29 @@ func run(healthCheck *metrics.HealthCheck, debuggingSnapshotter debuggingsnapsho ctx, cancel := context.WithCancel(context.Background()) defer cancel() - autoscaler, trigger, err := buildAutoscaler(ctx, debuggingSnapshotter) + restConfig := kube_util.GetKubeConfig(autoscalingOpts.KubeClientOpts) + mgr, err := ctrl.NewManager(restConfig, ctrl.Options{ + Scheme: scheme, + Cache: cache.Options{ + DefaultTransform: cache.TransformStripManagedFields(), + }, + // TODO: migrate leader election, metrics, healthcheck, pprof servers to Manager + LeaderElection: false, + Metrics: metricsserver.Options{BindAddress: "0"}, + HealthProbeBindAddress: "0", + PprofBindAddress: "0", + }) + if err != nil { + klog.Fatalf("Failed to create manager: %v", err) + } + + autoscaler, trigger, err := buildAutoscaler(ctx, debuggingSnapshotter, mgr) if err != nil { klog.Fatalf("Failed to create autoscaler: %v", err) } // Register signal handlers for graceful shutdown. + // TODO: replace with ctrl.SetupSignalHandlers() and handle graceful shutdown with context registerSignalHandlers(autoscaler) // Start updating health check endpoint. @@ -320,22 +356,42 @@ func run(healthCheck *metrics.HealthCheck, debuggingSnapshotter debuggingsnapsho klog.Fatalf("Failed to autoscaler background components: %v", err) } - // Autoscale ad infinitum. - if autoscalingOpts.FrequentLoopsEnabled { - // We need to have two timestamps because the scaleUp activity alternates between processing ProvisioningRequests, - // so we need to pass the older timestamp (previousRun) to trigger.Wait to run immediately if only one of the activities is productive. - lastRun := time.Now() - previousRun := time.Now() - for { - trigger.Wait(previousRun) - previousRun, lastRun = lastRun, time.Now() - loop.RunAutoscalerOnce(autoscaler, healthCheck, lastRun) - } - } else { - for { - time.Sleep(autoscalingOpts.ScanInterval) - loop.RunAutoscalerOnce(autoscaler, healthCheck, time.Now()) + err = mgr.Add(manager.RunnableFunc(func(ctx context.Context) error { + // Autoscale ad infinitum. + if autoscalingOpts.FrequentLoopsEnabled { + // We need to have two timestamps because the scaleUp activity alternates between processing ProvisioningRequests, + // so we need to pass the older timestamp (previousRun) to trigger.Wait to run immediately if only one of the activities is productive. + lastRun := time.Now() + previousRun := time.Now() + for { + select { + case <-ctx.Done(): + // TODO: handle graceful shutdown with context + return nil + default: + trigger.Wait(previousRun) + previousRun, lastRun = lastRun, time.Now() + loop.RunAutoscalerOnce(autoscaler, healthCheck, lastRun) + } + } + } else { + for { + select { + case <-ctx.Done(): + // TODO: handle graceful shutdown with context + return nil + case <-time.After(autoscalingOpts.ScanInterval): + loop.RunAutoscalerOnce(autoscaler, healthCheck, time.Now()) + } + } } + })) + if err != nil { + klog.Fatalf("Failed to add runnable to manager: %v", err) + } + + if err := mgr.Start(ctx); err != nil { + klog.Fatalf("Manager exited with error: %v", err) } } @@ -373,6 +429,7 @@ func main() { if err := logsapi.ValidateAndApply(loggingConfig, featureGate); err != nil { klog.Fatalf("Failed to validate and apply logging configuration: %v", err) } + ctrl.SetLogger(klog.NewKlogr()) healthCheck := metrics.NewHealthCheck(autoscalingOpts.MaxInactivityTime, autoscalingOpts.MaxFailingTime) diff --git a/cluster-autoscaler/resourcequotas/capacityquota/provider.go b/cluster-autoscaler/resourcequotas/capacityquota/provider.go new file mode 100644 index 00000000000..7c9db6ea190 --- /dev/null +++ b/cluster-autoscaler/resourcequotas/capacityquota/provider.go @@ -0,0 +1,104 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package capacityquota + +import ( + "context" + "fmt" + + apiv1 "k8s.io/api/core/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + cqv1alpha1 "k8s.io/autoscaler/cluster-autoscaler/apis/capacityquota/autoscaling.x-k8s.io/v1alpha1" + "k8s.io/autoscaler/cluster-autoscaler/resourcequotas" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// Provider provides quotas from CapacityQuota custom resource. +type Provider struct { + kubeClient client.Client +} + +// NewCapacityQuotasProvider returns a new Provider. +func NewCapacityQuotasProvider(kubeClient client.Client) *Provider { + return &Provider{kubeClient: kubeClient} +} + +// Quotas returns quotas built from CapacityQuota resources in the cluster. +func (p *Provider) Quotas() ([]resourcequotas.Quota, error) { + capacityQuotas := &cqv1alpha1.CapacityQuotaList{} + err := p.kubeClient.List(context.TODO(), capacityQuotas) + if err != nil { + return nil, err + } + var quotas []resourcequotas.Quota + for _, cq := range capacityQuotas.Items { + quota, err := newFromCapacityQuota(cq) + if err != nil { + klog.Errorf("Skipping CapacityQuota %q, err: %v", cq.Name, err) + continue + } + quotas = append(quotas, quota) + } + return quotas, nil +} + +type labelSelectorQuota struct { + id string + selector labels.Selector + limits map[string]int64 +} + +func (lsq *labelSelectorQuota) ID() string { + return lsq.id +} + +func (lsq *labelSelectorQuota) AppliesTo(node *apiv1.Node) bool { + return lsq.selector.Matches(labels.Set(node.Labels)) +} + +func (lsq *labelSelectorQuota) Limits() map[string]int64 { + return lsq.limits +} + +func labelSelectorAsSelector(ls *v1.LabelSelector) (labels.Selector, error) { + if ls == nil { + return labels.Everything(), nil + } + selector, err := v1.LabelSelectorAsSelector(ls) + if err != nil { + return nil, fmt.Errorf("invalid label selector: %w", err) + } + return selector, nil +} + +func newFromCapacityQuota(cq cqv1alpha1.CapacityQuota) (*labelSelectorQuota, error) { + selector, err := labelSelectorAsSelector(cq.Spec.Selector) + if err != nil { + return nil, err + } + limits := make(map[string]int64, len(cq.Spec.Limits.Resources)) + for resource, limit := range cq.Spec.Limits.Resources { + limits[string(resource)] = limit.Value() + } + return &labelSelectorQuota{ + id: fmt.Sprintf("CapacityQuota/%s", cq.Name), + selector: selector, + limits: limits, + }, nil +} diff --git a/cluster-autoscaler/resourcequotas/capacityquota/provider_test.go b/cluster-autoscaler/resourcequotas/capacityquota/provider_test.go new file mode 100644 index 00000000000..a154a2ceb8c --- /dev/null +++ b/cluster-autoscaler/resourcequotas/capacityquota/provider_test.go @@ -0,0 +1,290 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package capacityquota + +import ( + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + cqv1alpha1 "k8s.io/autoscaler/cluster-autoscaler/apis/capacityquota/autoscaling.x-k8s.io/v1alpha1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestProvider_Quotas(t *testing.T) { + scheme := runtime.NewScheme() + _ = cqv1alpha1.AddToScheme(scheme) + + testCases := []struct { + name string + existingCQs []client.Object + wantQuotas []string + }{ + { + name: "no-capacity-quotas", + existingCQs: []client.Object{}, + wantQuotas: nil, + }, + { + name: "single-capacity-quota", + existingCQs: []client.Object{ + &cqv1alpha1.CapacityQuota{ + ObjectMeta: metav1.ObjectMeta{Name: "cq1"}, + Spec: cqv1alpha1.CapacityQuotaSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"foo": "bar"}}, + Limits: cqv1alpha1.CapacityQuotaLimits{ + Resources: cqv1alpha1.ResourceList{ + cqv1alpha1.ResourceCPU: resource.MustParse("10"), + cqv1alpha1.ResourceMemory: resource.MustParse("20Gi"), + }, + }, + }, + }, + }, + wantQuotas: []string{"CapacityQuota/cq1"}, + }, + { + name: "multiple-capacity-quotas", + existingCQs: []client.Object{ + &cqv1alpha1.CapacityQuota{ + ObjectMeta: metav1.ObjectMeta{Name: "cq1"}, + Spec: cqv1alpha1.CapacityQuotaSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"foo": "bar"}, + }, + Limits: cqv1alpha1.CapacityQuotaLimits{ + Resources: cqv1alpha1.ResourceList{ + cqv1alpha1.ResourceCPU: resource.MustParse("10"), + }, + }, + }, + }, + &cqv1alpha1.CapacityQuota{ + ObjectMeta: metav1.ObjectMeta{Name: "cq2"}, + Spec: cqv1alpha1.CapacityQuotaSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"baz": "qux"}, + }, + Limits: cqv1alpha1.CapacityQuotaLimits{ + Resources: cqv1alpha1.ResourceList{ + cqv1alpha1.ResourceMemory: resource.MustParse("5Gi"), + }, + }, + }, + }, + }, + wantQuotas: []string{"CapacityQuota/cq1", "CapacityQuota/cq2"}, + }, + { + name: "capacity-quota-with-invalid-selector-is-skipped", + existingCQs: []client.Object{ + &cqv1alpha1.CapacityQuota{ + ObjectMeta: metav1.ObjectMeta{Name: "cq_invalid"}, + Spec: cqv1alpha1.CapacityQuotaSpec{ + Selector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "invalidKey!!!!", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + Limits: cqv1alpha1.CapacityQuotaLimits{ + Resources: cqv1alpha1.ResourceList{ + cqv1alpha1.ResourceCPU: resource.MustParse("10"), + }, + }, + }, + }, + &cqv1alpha1.CapacityQuota{ + ObjectMeta: metav1.ObjectMeta{Name: "cq_valid"}, + Spec: cqv1alpha1.CapacityQuotaSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"valid": "true"}, + }, + Limits: cqv1alpha1.CapacityQuotaLimits{ + Resources: cqv1alpha1.ResourceList{ + cqv1alpha1.ResourceMemory: resource.MustParse("1Gi"), + }, + }, + }, + }, + }, + wantQuotas: []string{"CapacityQuota/cq_valid"}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tc.existingCQs...).Build() + p := &Provider{kubeClient: fakeClient} + + gotQuotas, err := p.Quotas() + if err != nil { + t.Fatalf("Provider.Quotas() unexpected error: %v", err) + } + + var gotIDs []string + for _, q := range gotQuotas { + gotIDs = append(gotIDs, q.ID()) + } + + if diff := cmp.Diff(tc.wantQuotas, gotIDs, cmpopts.SortSlices(func(a, b string) bool { return a < b })); diff != "" { + t.Errorf("Provider.Quotas() mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestCapacityQuota_Selector(t *testing.T) { + testCases := []struct { + name string + cq cqv1alpha1.CapacityQuota + node *corev1.Node + wantAppliesTo bool + wantErrMsg string + }{ + { + name: "matchLabels-matches", + cq: cqv1alpha1.CapacityQuota{ + ObjectMeta: metav1.ObjectMeta{Name: "cq1"}, + Spec: cqv1alpha1.CapacityQuotaSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + Limits: cqv1alpha1.CapacityQuotaLimits{Resources: cqv1alpha1.ResourceList{cqv1alpha1.ResourceCPU: resource.MustParse("1")}}, + }, + }, + node: &corev1.Node{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar"}}}, + wantAppliesTo: true, + }, + { + name: "matchLabels-does-not-match", + cq: cqv1alpha1.CapacityQuota{ + ObjectMeta: metav1.ObjectMeta{Name: "cq1"}, + Spec: cqv1alpha1.CapacityQuotaSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + Limits: cqv1alpha1.CapacityQuotaLimits{Resources: cqv1alpha1.ResourceList{cqv1alpha1.ResourceCPU: resource.MustParse("1")}}, + }, + }, + node: &corev1.Node{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "baz"}}}, + wantAppliesTo: false, + }, + { + name: "nil-selector-matches-everything", + cq: cqv1alpha1.CapacityQuota{ + ObjectMeta: metav1.ObjectMeta{Name: "cq1"}, + Spec: cqv1alpha1.CapacityQuotaSpec{ + Selector: nil, + Limits: cqv1alpha1.CapacityQuotaLimits{Resources: cqv1alpha1.ResourceList{cqv1alpha1.ResourceCPU: resource.MustParse("1")}}, + }, + }, + node: &corev1.Node{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"any": "label"}}}, + wantAppliesTo: true, + }, + { + name: "invalid-selector", + cq: cqv1alpha1.CapacityQuota{ + ObjectMeta: metav1.ObjectMeta{Name: "cq1"}, + Spec: cqv1alpha1.CapacityQuotaSpec{ + Selector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "invalidKey!!!!", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + }, + }, + wantErrMsg: "invalid label selector", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + q, err := newFromCapacityQuota(tc.cq) + if tc.wantErrMsg == "" { + if err != nil { + t.Fatalf("newFromCapacityQuota() unexpected error: %v", err) + } + if got := q.AppliesTo(tc.node); got != tc.wantAppliesTo { + t.Errorf("AppliesTo() = %v, want %v", got, tc.wantAppliesTo) + } + } else { + if err == nil || !strings.Contains(err.Error(), tc.wantErrMsg) { + t.Errorf("newFromCapacityQuota() want err containing %q, got %v", tc.wantErrMsg, err) + } + } + }) + } +} + +func TestCapacityQuota_Limits(t *testing.T) { + testCases := []struct { + name string + cq cqv1alpha1.CapacityQuota + wantLimits map[string]int64 + }{ + { + name: "cpu-and-memory-limits", + cq: cqv1alpha1.CapacityQuota{ + ObjectMeta: metav1.ObjectMeta{Name: "cq1"}, + Spec: cqv1alpha1.CapacityQuotaSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + Limits: cqv1alpha1.CapacityQuotaLimits{ + Resources: cqv1alpha1.ResourceList{ + cqv1alpha1.ResourceCPU: resource.MustParse("5"), + cqv1alpha1.ResourceMemory: resource.MustParse("10Gi"), + }, + }, + }, + }, + wantLimits: map[string]int64{"cpu": 5, "memory": 10 * 1024 * 1024 * 1024}, + }, + { + name: "empty-limits", + cq: cqv1alpha1.CapacityQuota{ + ObjectMeta: metav1.ObjectMeta{Name: "cq1"}, + Spec: cqv1alpha1.CapacityQuotaSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + Limits: cqv1alpha1.CapacityQuotaLimits{ + Resources: cqv1alpha1.ResourceList{}, + }, + }, + }, + wantLimits: map[string]int64{}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + q, err := newFromCapacityQuota(tc.cq) + if err != nil { + t.Fatalf("newFromCapacityQuota() unexpected error: %v", err) + } + if diff := cmp.Diff(tc.wantLimits, q.Limits()); diff != "" { + t.Errorf("Limits() mismatch (-want +got):\n%s", diff) + } + }) + } +}