Skip to content

Commit f9c0302

Browse files
committed
Use cached TemplateNodeInfos in DRA processor
This change makes the DRACustomResourcesProcessor use TemplateNodeInfo from the previous autoscaling loop iteration preserving custom node options and devices. It falls back to NodeGroup.TemplateNodeInfo() if there are no good candidates. Fixes #8881
1 parent e6e5f15 commit f9c0302

File tree

2 files changed

+62
-6
lines changed

2 files changed

+62
-6
lines changed

cluster-autoscaler/processors/customresources/dra_processor.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package customresources
1919
import (
2020
apiv1 "k8s.io/api/core/v1"
2121
resourceapi "k8s.io/api/resource/v1"
22+
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
2223

2324
"k8s.io/apimachinery/pkg/util/sets"
2425
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
@@ -57,11 +58,19 @@ func (p *DraCustomResourcesProcessor) FilterOutNodesWithUnreadyResources(autosca
5758
continue
5859
}
5960

60-
nodeInfo, err := ng.TemplateNodeInfo()
61-
if err != nil {
62-
newReadyNodes = append(newReadyNodes, node)
63-
klog.Warningf("Failed to get template node info for node group %s with error: %v", ng.Id(), err)
64-
continue
61+
var nodeInfo *framework.NodeInfo
62+
if autoscalingCtx.TemplateNodeInfoRegistry != nil {
63+
if ni, found := autoscalingCtx.TemplateNodeInfoRegistry.GetNodeInfo(ng.Id()); found {
64+
nodeInfo = ni
65+
}
66+
}
67+
if nodeInfo == nil {
68+
nodeInfo, err = ng.TemplateNodeInfo()
69+
if err != nil {
70+
newReadyNodes = append(newReadyNodes, node)
71+
klog.Warningf("Failed to get template node info for node group %s with error: %v", ng.Id(), err)
72+
continue
73+
}
6574
}
6675

6776
nodeResourcesSlices, _ := draSnapshot.NodeResourceSlices(node.Name)

cluster-autoscaler/processors/customresources/dra_processor_test.go

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,32 @@ import (
3535
utils "k8s.io/autoscaler/cluster-autoscaler/utils/test"
3636
)
3737

38+
type mockTemplateNodeInfoRegistry struct {
39+
nodeInfos map[string]*framework.NodeInfo
40+
}
41+
42+
func newMockTemplateNodeInfoRegistry(nodeInfos map[string]*framework.NodeInfo) *mockTemplateNodeInfoRegistry {
43+
return &mockTemplateNodeInfoRegistry{
44+
nodeInfos: nodeInfos,
45+
}
46+
}
47+
48+
func (m *mockTemplateNodeInfoRegistry) GetNodeInfo(id string) (*framework.NodeInfo, bool) {
49+
nodeInfo, found := m.nodeInfos[id]
50+
return nodeInfo, found
51+
}
52+
53+
func (m *mockTemplateNodeInfoRegistry) GetNodeInfos() map[string]*framework.NodeInfo {
54+
return m.nodeInfos
55+
}
56+
3857
func TestFilterOutNodesWithUnreadyDRAResources(t *testing.T) {
3958
testCases := map[string]struct {
4059
nodeGroupsAllNodes map[string][]*apiv1.Node
4160
nodeGroupsTemplatesSlices map[string][]*resourceapi.ResourceSlice
4261
nodesSlices map[string][]*resourceapi.ResourceSlice
4362
expectedNodesReadiness map[string]bool
63+
registryNodeInfos map[string]*framework.NodeInfo
4464
}{
4565
"1 DRA node group all totally ready": {
4666
nodeGroupsAllNodes: map[string][]*apiv1.Node{
@@ -306,6 +326,29 @@ func TestFilterOutNodesWithUnreadyDRAResources(t *testing.T) {
306326
"node_7": true,
307327
},
308328
},
329+
"Custom DRA driver retrieved via cached template node info": {
330+
nodeGroupsAllNodes: map[string][]*apiv1.Node{
331+
"ng1": {
332+
buildTestNode("node_1", true),
333+
buildTestNode("node_2", true),
334+
},
335+
},
336+
nodeGroupsTemplatesSlices: map[string][]*resourceapi.ResourceSlice{},
337+
registryNodeInfos: map[string]*framework.NodeInfo{
338+
"ng1": framework.NewNodeInfo(
339+
buildTestNode("ng1_template", true),
340+
createNodeResourceSlices("ng1_template", []int{1}),
341+
),
342+
},
343+
nodesSlices: map[string][]*resourceapi.ResourceSlice{
344+
"node_1": createNodeResourceSlices("node_1", []int{1}),
345+
"node_2": {},
346+
},
347+
expectedNodesReadiness: map[string]bool{
348+
"node_1": true,
349+
"node_2": false,
350+
},
351+
},
309352
}
310353

311354
for tcName, tc := range testCases {
@@ -336,7 +379,11 @@ func TestFilterOutNodesWithUnreadyDRAResources(t *testing.T) {
336379
clusterSnapshotStore.SetClusterState([]*apiv1.Node{}, []*apiv1.Pod{}, draSnapshot)
337380
clusterSnapshot, _, _ := testsnapshot.NewCustomTestSnapshotAndHandle(clusterSnapshotStore)
338381

339-
autoscalingCtx := &ca_context.AutoscalingContext{CloudProvider: provider, ClusterSnapshot: clusterSnapshot}
382+
autoscalingCtx := &ca_context.AutoscalingContext{
383+
CloudProvider: provider,
384+
ClusterSnapshot: clusterSnapshot,
385+
TemplateNodeInfoRegistry: newMockTemplateNodeInfoRegistry(tc.registryNodeInfos),
386+
}
340387
processor := DraCustomResourcesProcessor{}
341388
newAllNodes, newReadyNodes := processor.FilterOutNodesWithUnreadyResources(autoscalingCtx, initialAllNodes, initialReadyNodes, draSnapshot)
342389

0 commit comments

Comments
 (0)