Skip to content

Commit 6e92d20

Browse files
bart0shklueska
andcommitted
DRA: refactor checkpointing
Co-authored-by: Kevin Klues <[email protected]>
1 parent d11b58e commit 6e92d20

File tree

13 files changed

+584
-459
lines changed

13 files changed

+584
-459
lines changed

pkg/kubelet/checkpointmanager/checkpoint_manager.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,6 @@ func (manager *impl) GetCheckpoint(checkpointKey string, checkpoint Checkpoint)
8585
return err
8686
}
8787
err = checkpoint.UnmarshalCheckpoint(blob)
88-
if err == nil {
89-
err = checkpoint.VerifyChecksum()
90-
}
9188
return err
9289
}
9390

pkg/kubelet/cm/dra/state/state_checkpoint.go renamed to pkg/kubelet/cm/dra/checkpoint/state_checkpoint.go

Lines changed: 6 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -20,55 +20,19 @@ import (
2020
"fmt"
2121
"sync"
2222

23-
"k8s.io/apimachinery/pkg/types"
24-
"k8s.io/apimachinery/pkg/util/sets"
2523
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
2624
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
25+
checkpointapi "k8s.io/kubernetes/pkg/kubelet/cm/dra/checkpoint/v1"
2726
)
2827

2928
var _ CheckpointState = &stateCheckpoint{}
3029

31-
// CheckpointState interface provides to get and store state
3230
type CheckpointState interface {
3331
GetOrCreate() (ClaimInfoStateList, error)
3432
Store(ClaimInfoStateList) error
3533
}
3634

37-
// ClaimInfoState is used to store claim info state in a checkpoint
38-
// +k8s:deepcopy-gen=true
39-
type ClaimInfoState struct {
40-
// ClaimUID is an UID of the resource claim
41-
ClaimUID types.UID
42-
43-
// ClaimName is a name of the resource claim
44-
ClaimName string
45-
46-
// Namespace is a claim namespace
47-
Namespace string
48-
49-
// PodUIDs is a set of pod UIDs that reference a resource
50-
PodUIDs sets.Set[string]
51-
52-
// DriverState contains information about all drivers which have allocation
53-
// results in the claim, even if they don't provide devices for their results.
54-
DriverState map[string]DriverState
55-
}
56-
57-
// DriverState is used to store per-device claim info state in a checkpoint
58-
// +k8s:deepcopy-gen=true
59-
type DriverState struct {
60-
Devices []Device
61-
}
62-
63-
// Device is how a DRA driver described an allocated device in a claim
64-
// to kubelet. RequestName and CDI device IDs are optional.
65-
// +k8s:deepcopy-gen=true
66-
type Device struct {
67-
PoolName string
68-
DeviceName string
69-
RequestNames []string
70-
CDIDeviceIDs []string
71-
}
35+
type ClaimInfoStateList []checkpointapi.ClaimInfoState
7236

7337
type stateCheckpoint struct {
7438
sync.RWMutex
@@ -77,7 +41,7 @@ type stateCheckpoint struct {
7741
}
7842

7943
// NewCheckpointState creates new State for keeping track of claim info with checkpoint backend
80-
func NewCheckpointState(stateDir, checkpointName string) (*stateCheckpoint, error) {
44+
func NewCheckpointState(stateDir, checkpointName string) (CheckpointState, error) {
8145
if len(checkpointName) == 0 {
8246
return nil, fmt.Errorf("received empty string instead of checkpointName")
8347
}
@@ -99,7 +63,7 @@ func (sc *stateCheckpoint) GetOrCreate() (ClaimInfoStateList, error) {
9963
sc.Lock()
10064
defer sc.Unlock()
10165

102-
checkpoint := NewDRAManagerCheckpoint()
66+
checkpoint := checkpointapi.New(nil)
10367
err := sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpoint)
10468
if err == errors.ErrCheckpointNotFound {
10569
sc.store(ClaimInfoStateList{})
@@ -109,7 +73,7 @@ func (sc *stateCheckpoint) GetOrCreate() (ClaimInfoStateList, error) {
10973
return nil, fmt.Errorf("failed to get checkpoint %v: %w", sc.checkpointName, err)
11074
}
11175

112-
return checkpoint.Entries, nil
76+
return checkpoint.Data, nil
11377
}
11478

11579
// saves state to a checkpoint
@@ -122,8 +86,7 @@ func (sc *stateCheckpoint) Store(claimInfoStateList ClaimInfoStateList) error {
12286

12387
// saves state to a checkpoint, caller is responsible for locking
12488
func (sc *stateCheckpoint) store(claimInfoStateList ClaimInfoStateList) error {
125-
checkpoint := NewDRAManagerCheckpoint()
126-
checkpoint.Entries = claimInfoStateList
89+
checkpoint := checkpointapi.New(claimInfoStateList)
12790

12891
err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint)
12992
if err != nil {

0 commit comments

Comments
 (0)