Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ type AutoscalingOptions struct {
MaxNodeSkipEvalTimeTrackerEnabled bool
// NodeRemovalLatencyTrackingEnabled is used to enable/disable node removal latency tracking.
NodeRemovalLatencyTrackingEnabled bool
CapacityQuotasEnabled bool
}

// KubeClientOptions specify options for kube client
Expand Down
2 changes: 2 additions & 0 deletions cluster-autoscaler/config/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ var (
capacitybufferPodInjectionEnabled = flag.Bool("capacity-buffer-pod-injection-enabled", false, "Whether to enable pod list processor that processes ready capacity buffers and injects fake pods accordingly")
nodeRemovalLatencyTrackingEnabled = flag.Bool("node-removal-latency-tracking-enabled", false, "Whether to track latency from when an unneeded node is eligible for scale down until it is removed or needed again.")
maxNodeSkipEvalTimeTrackerEnabled = flag.Bool("max-node-skip-eval-time-tracker-enabled", false, "Whether to enable the tracking of the maximum time of node being skipped during ScaleDown")
capacityQuotasEnabled = flag.Bool("capacity-quotas-enabled", false, "Whether to enable CapacityQuota CRD support.")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like that's our common pattern for gating new features. Wouldn't it be better to introduce a k8s-like feature-gates flag? I think they would be easier to clean up. Also it would limit the number of flags we're introducing into CAS, which might be beneficial, considering how many are already there


// Deprecated flags
ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group (Deprecated, use startup-taints instead)")
Expand Down Expand Up @@ -434,6 +435,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
CapacitybufferPodInjectionEnabled: *capacitybufferPodInjectionEnabled,
NodeRemovalLatencyTrackingEnabled: *nodeRemovalLatencyTrackingEnabled,
MaxNodeSkipEvalTimeTrackerEnabled: *maxNodeSkipEvalTimeTrackerEnabled,
CapacityQuotasEnabled: *capacityQuotasEnabled,
}
}

Expand Down
15 changes: 13 additions & 2 deletions cluster-autoscaler/core/autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ limitations under the License.
package core

import (
"context"
"strings"
"time"

cqv1alpha1 "k8s.io/autoscaler/cluster-autoscaler/apis/capacityquota/autoscaling.x-k8s.io/v1alpha1"
cloudBuilder "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/builder"
ca_context "k8s.io/autoscaler/cluster-autoscaler/context"
coreoptions "k8s.io/autoscaler/cluster-autoscaler/core/options"
Expand All @@ -30,6 +32,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/observers/loopstart"
ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors"
"k8s.io/autoscaler/cluster-autoscaler/resourcequotas"
"k8s.io/autoscaler/cluster-autoscaler/resourcequotas/capacityquota"
"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/predicate"
"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store"
"k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules"
Expand Down Expand Up @@ -146,8 +149,16 @@ func initializeDefaultOptions(opts *coreoptions.AutoscalerOptions, informerFacto
opts.ExpanderStrategy = expanderStrategy
}
if opts.QuotasTrackerOptions.QuotaProvider == nil {
cloudQuotasProvider := resourcequotas.NewCloudQuotasProvider(opts.CloudProvider)
opts.QuotasTrackerOptions.QuotaProvider = resourcequotas.NewCombinedQuotasProvider([]resourcequotas.Provider{cloudQuotasProvider})
providers := []resourcequotas.Provider{resourcequotas.NewCloudQuotasProvider(opts.CloudProvider)}

if opts.CapacityQuotasEnabled {
// register informer here to disable lazy initialization
if _, err := opts.KubeCache.GetInformer(context.TODO(), &cqv1alpha1.CapacityQuota{}); err != nil {
return err
}
providers = append(providers, capacityquota.NewCapacityQuotasProvider(opts.KubeClientNew))
}
opts.QuotasTrackerOptions.QuotaProvider = resourcequotas.NewCombinedQuotasProvider(providers)
}
if opts.QuotasTrackerOptions.CustomResourcesProcessor == nil {
opts.QuotasTrackerOptions.CustomResourcesProcessor = opts.Processors.CustomResourcesProcessor
Expand Down
4 changes: 4 additions & 0 deletions cluster-autoscaler/core/options/autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/utils/backoff"
"k8s.io/client-go/informers"
kube_client "k8s.io/client-go/kubernetes"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
)

// AutoscalerOptions is the whole set of options for configuring an autoscaler
Expand All @@ -59,4 +61,6 @@ type AutoscalerOptions struct {
DrainabilityRules rules.Rules
DraProvider *draprovider.Provider
QuotasTrackerOptions resourcequotas.TrackerOptions
KubeClientNew client.Client
KubeCache cache.Cache
}
5 changes: 4 additions & 1 deletion cluster-autoscaler/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ require (
k8s.io/utils v0.0.0-20251002143259-bc988d571ff4
sigs.k8s.io/cloud-provider-azure v1.29.4
sigs.k8s.io/cloud-provider-azure/pkg/azclient v0.0.13
sigs.k8s.io/controller-runtime v0.22.4
sigs.k8s.io/yaml v1.6.0
)

Expand Down Expand Up @@ -131,6 +132,7 @@ require (
github.com/docker/go-units v0.5.0 // indirect
github.com/emicklei/go-restful/v3 v3.12.2 // indirect
github.com/euank/go-kmsg-parser v2.0.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.9.11 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.9.0 // indirect
github.com/fxamacker/cbor/v2 v2.9.0 // indirect
Expand Down Expand Up @@ -161,7 +163,7 @@ require (
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-retryablehttp v0.7.7 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jessevdk/go-flags v1.4.1-0.20181029123624-5de817a9aa20 // indirect
github.com/jessevdk/go-flags v1.6.1 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/karrick/godirwalk v1.17.0 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
Expand Down Expand Up @@ -221,6 +223,7 @@ require (
golang.org/x/text v0.29.0 // indirect
golang.org/x/time v0.9.0 // indirect
golang.org/x/tools v0.36.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a // indirect
gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
Expand Down
14 changes: 12 additions & 2 deletions cluster-autoscaler/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,10 @@ github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1m
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/euank/go-kmsg-parser v2.0.0+incompatible h1:cHD53+PLQuuQyLZeriD1V/esuG4MuU0Pjs5y6iknohY=
github.com/euank/go-kmsg-parser v2.0.0+incompatible/go.mod h1:MhmAMZ8V4CYH4ybgdRwPr2TU5ThnS43puaKEMpja1uw=
github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls=
github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM=
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
Expand Down Expand Up @@ -280,6 +284,8 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX
github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8=
github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6 h1:EEHtgt9IwisQ2AZ4pIsMjahcegHh6rmhqxzIRQIyepY=
github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6/go.mod h1:I6V7YzU0XDpsHqbsyrghnFZLO1gwK6NPTNvmetQIk9U=
github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o=
Expand Down Expand Up @@ -309,8 +315,8 @@ github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISH
github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jessevdk/go-flags v1.4.1-0.20181029123624-5de817a9aa20 h1:dAOsPLhnBzIyxu0VvmnKjlNcIlgMK+erD6VRHDtweMI=
github.com/jessevdk/go-flags v1.4.1-0.20181029123624-5de817a9aa20/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
github.com/jessevdk/go-flags v1.6.1 h1:Cvu5U8UGrLay1rZfv/zP7iLpSHGUZ/Ou68T0iX1bBK4=
github.com/jessevdk/go-flags v1.6.1/go.mod h1:Mk8T1hIAWpOiJiHa9rJASDK2UGWji0EuPGBnNLMooyc=
github.com/jmattheis/goverter v1.4.0 h1:SrboBYMpGkj1XSgFhWwqzdP024zIa1+58YzUm+0jcBE=
github.com/jmattheis/goverter v1.4.0/go.mod h1:iVIl/4qItWjWj2g3vjouGoYensJbRqDHpzlEVMHHFeY=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
Expand Down Expand Up @@ -623,6 +629,8 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw=
gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
google.golang.org/api v0.151.0 h1:FhfXLO/NFdJIzQtCqjpysWwqKk8AzGWBUhMIx67cVDU=
google.golang.org/api v0.151.0/go.mod h1:ccy+MJ6nrYFgE3WgRx/AMXOxOmU8Q4hSa+jjibzhxcg=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
Expand Down Expand Up @@ -735,6 +743,8 @@ sigs.k8s.io/cloud-provider-azure/pkg/azclient v0.0.13 h1:dxpo41/N6m2R//9fmqKgqYZ
sigs.k8s.io/cloud-provider-azure/pkg/azclient v0.0.13/go.mod h1:tN2BDTM6RDyQsae6JRvaaA14LVxDsRaLU3Ea2MRUBjg=
sigs.k8s.io/cloud-provider-azure/pkg/azclient/configloader v0.0.4 h1:CFMHYo6/OQpLTycJGQIze2pchNeJQ7L2TQC6fDo4JGY=
sigs.k8s.io/cloud-provider-azure/pkg/azclient/configloader v0.0.4/go.mod h1:PvXgFxPcfve6yBiWNIO/fqAMvGVC9W7qN6M2vIj4zmY=
sigs.k8s.io/controller-runtime v0.22.4 h1:GEjV7KV3TY8e+tJ2LCTxUTanW4z/FmNB7l327UfMq9A=
sigs.k8s.io/controller-runtime v0.22.4/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8=
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg=
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
Expand Down
93 changes: 75 additions & 18 deletions cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ import (
"time"

"github.com/spf13/pflag"

"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
cqv1alpha1 "k8s.io/autoscaler/cluster-autoscaler/apis/capacityquota/autoscaling.x-k8s.io/v1alpha1"
capacityclient "k8s.io/autoscaler/cluster-autoscaler/capacitybuffer/client"
"k8s.io/autoscaler/cluster-autoscaler/capacitybuffer/common"
"k8s.io/autoscaler/cluster-autoscaler/config/flags"
Expand All @@ -41,7 +43,12 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store"
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
"k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/kubernetes/pkg/features"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/manager"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"

"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -85,6 +92,16 @@ import (
"k8s.io/klog/v2"
)

var (
scheme = runtime.NewScheme()
)

func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
utilruntime.Must(cqv1alpha1.AddToScheme(scheme))
// TODO: add other CRDs
}

func registerSignalHandlers(autoscaler core.Autoscaler) {
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, os.Interrupt, os.Kill, syscall.SIGTERM, syscall.SIGQUIT)
Expand All @@ -100,7 +117,7 @@ func registerSignalHandlers(autoscaler core.Autoscaler) {
}()
}

func buildAutoscaler(ctx context.Context, debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter) (core.Autoscaler, *loop.LoopTrigger, error) {
func buildAutoscaler(ctx context.Context, debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter, mgr manager.Manager) (core.Autoscaler, *loop.LoopTrigger, error) {
// Get AutoscalingOptions from flags.
autoscalingOptions := flags.AutoscalingOptions()

Expand Down Expand Up @@ -133,6 +150,8 @@ func buildAutoscaler(ctx context.Context, debuggingSnapshotter debuggingsnapshot
DeleteOptions: deleteOptions,
DrainabilityRules: drainabilityRules,
ScaleUpOrchestrator: orchestrator.New(),
KubeClientNew: mgr.GetClient(),
KubeCache: mgr.GetCache(),
}

opts.Processors = ca_processors.DefaultProcessors(autoscalingOptions)
Expand Down Expand Up @@ -304,12 +323,29 @@ func run(healthCheck *metrics.HealthCheck, debuggingSnapshotter debuggingsnapsho
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

autoscaler, trigger, err := buildAutoscaler(ctx, debuggingSnapshotter)
restConfig := kube_util.GetKubeConfig(autoscalingOpts.KubeClientOpts)
mgr, err := ctrl.NewManager(restConfig, ctrl.Options{
Scheme: scheme,
Cache: cache.Options{
DefaultTransform: cache.TransformStripManagedFields(),
},
// TODO: migrate leader election, metrics, healthcheck, pprof servers to Manager
LeaderElection: false,
Metrics: metricsserver.Options{BindAddress: "0"},
HealthProbeBindAddress: "0",
PprofBindAddress: "0",
})
if err != nil {
klog.Fatalf("Failed to create manager: %v", err)
}

autoscaler, trigger, err := buildAutoscaler(ctx, debuggingSnapshotter, mgr)
if err != nil {
klog.Fatalf("Failed to create autoscaler: %v", err)
}

// Register signal handlers for graceful shutdown.
// TODO: replace with ctrl.SetupSignalHandlers() and handle graceful shutdown with context
registerSignalHandlers(autoscaler)

// Start updating health check endpoint.
Expand All @@ -320,22 +356,42 @@ func run(healthCheck *metrics.HealthCheck, debuggingSnapshotter debuggingsnapsho
klog.Fatalf("Failed to autoscaler background components: %v", err)
}

// Autoscale ad infinitum.
if autoscalingOpts.FrequentLoopsEnabled {
// We need to have two timestamps because the scaleUp activity alternates between processing ProvisioningRequests,
// so we need to pass the older timestamp (previousRun) to trigger.Wait to run immediately if only one of the activities is productive.
lastRun := time.Now()
previousRun := time.Now()
for {
trigger.Wait(previousRun)
previousRun, lastRun = lastRun, time.Now()
loop.RunAutoscalerOnce(autoscaler, healthCheck, lastRun)
}
} else {
for {
time.Sleep(autoscalingOpts.ScanInterval)
loop.RunAutoscalerOnce(autoscaler, healthCheck, time.Now())
err = mgr.Add(manager.RunnableFunc(func(ctx context.Context) error {
// Autoscale ad infinitum.
if autoscalingOpts.FrequentLoopsEnabled {
// We need to have two timestamps because the scaleUp activity alternates between processing ProvisioningRequests,
// so we need to pass the older timestamp (previousRun) to trigger.Wait to run immediately if only one of the activities is productive.
lastRun := time.Now()
previousRun := time.Now()
for {
select {
case <-ctx.Done():
// TODO: handle graceful shutdown with context
return nil
default:
trigger.Wait(previousRun)
previousRun, lastRun = lastRun, time.Now()
loop.RunAutoscalerOnce(autoscaler, healthCheck, lastRun)
}
}
} else {
for {
select {
case <-ctx.Done():
// TODO: handle graceful shutdown with context
return nil
case <-time.After(autoscalingOpts.ScanInterval):
loop.RunAutoscalerOnce(autoscaler, healthCheck, time.Now())
}
}
}
}))
if err != nil {
klog.Fatalf("Failed to add runnable to manager: %v", err)
}

if err := mgr.Start(ctx); err != nil {
klog.Fatalf("Manager exited with error: %v", err)
}
}

Expand Down Expand Up @@ -373,6 +429,7 @@ func main() {
if err := logsapi.ValidateAndApply(loggingConfig, featureGate); err != nil {
klog.Fatalf("Failed to validate and apply logging configuration: %v", err)
}
ctrl.SetLogger(klog.NewKlogr())

healthCheck := metrics.NewHealthCheck(autoscalingOpts.MaxInactivityTime, autoscalingOpts.MaxFailingTime)

Expand Down
Loading