From 42459e5e48442f1b3e931619ff913afe8e27143a Mon Sep 17 00:00:00 2001 From: Matthew Cary Date: Wed, 22 Oct 2025 22:44:58 +0000 Subject: [PATCH] Unpublish legacy volume even if it appears in multiple zones Change-Id: I4e8cd457ab869f52e9202d2d1640c902b169f330 --- pkg/gce-cloud-provider/compute/fake-gce.go | 32 +---- pkg/gce-cloud-provider/compute/gce-compute.go | 52 ++++---- .../compute/gce-compute_test.go | 118 ++++++++++++++++++ pkg/gce-pd-csi-driver/controller.go | 10 +- 4 files changed, 148 insertions(+), 64 deletions(-) diff --git a/pkg/gce-cloud-provider/compute/fake-gce.go b/pkg/gce-cloud-provider/compute/fake-gce.go index 9e8b8ca53..a9328284e 100644 --- a/pkg/gce-cloud-provider/compute/fake-gce.go +++ b/pkg/gce-cloud-provider/compute/fake-gce.go @@ -30,7 +30,6 @@ import ( "google.golang.org/grpc/status" "k8s.io/klog/v2" "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common" - "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/constants" "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/parameters" "k8s.io/apimachinery/pkg/util/sets" @@ -103,35 +102,8 @@ func (cloud *FakeCloudProvider) GetDefaultZone() string { return cloud.zone } -func (cloud *FakeCloudProvider) RepairUnderspecifiedVolumeKey(ctx context.Context, project string, volumeKey *meta.Key) (string, *meta.Key, error) { - if project == constants.UnspecifiedValue { - project = cloud.project - } - switch volumeKey.Type() { - case meta.Zonal: - if volumeKey.Zone != constants.UnspecifiedValue { - return project, volumeKey, nil - } - for diskVolKey, d := range cloud.disks { - if diskVolKey == volumeKey.String() { - volumeKey.Zone = d.GetZone() - return project, volumeKey, nil - } - } - return "", nil, notFoundError() - case meta.Regional: - if volumeKey.Region != constants.UnspecifiedValue { - return project, volumeKey, nil - } - r, err := common.GetRegionFromZones([]string{cloud.zone}) - if err != nil { - return "", nil, fmt.Errorf("failed to get region from zones: %w", err) - } - volumeKey.Region = r - return project, volumeKey, nil - default: - return "", nil, fmt.Errorf("Volume key %v not zonal nor regional", volumeKey.Name) - } +func (cloud *FakeCloudProvider) RepairUnderspecifiedVolumeKey(ctx context.Context, project string, volumeKey *meta.Key, fallbackZone string) (string, *meta.Key, error) { + return repairUnderspecifiedVolumeKeyWithProvider(ctx, cloud, project, volumeKey, fallbackZone) } func (cloud *FakeCloudProvider) ListZones(ctx context.Context, region string) ([]string, error) { diff --git a/pkg/gce-cloud-provider/compute/gce-compute.go b/pkg/gce-cloud-provider/compute/gce-compute.go index de06d9db7..6e5ed1750 100644 --- a/pkg/gce-cloud-provider/compute/gce-compute.go +++ b/pkg/gce-cloud-provider/compute/gce-compute.go @@ -21,6 +21,7 @@ import ( "fmt" "net/http" "regexp" + "slices" "strings" "time" @@ -39,7 +40,6 @@ import ( "google.golang.org/grpc/status" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" - "k8s.io/utils/strings/slices" "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common" "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/constants" "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/parameters" @@ -101,7 +101,7 @@ type GCECompute interface { GetDefaultZone() string // Disk Methods GetDisk(ctx context.Context, project string, volumeKey *meta.Key) (*CloudDisk, error) - RepairUnderspecifiedVolumeKey(ctx context.Context, project string, volumeKey *meta.Key) (string, *meta.Key, error) + RepairUnderspecifiedVolumeKey(ctx context.Context, project string, volumeKey *meta.Key, fallbackZone string) (string, *meta.Key, error) InsertDisk(ctx context.Context, project string, volKey *meta.Key, params parameters.DiskParameters, capBytes int64, capacityRange *csi.CapacityRange, replicaZones []string, snapshotID string, volumeContentSourceVolumeID string, multiWriter bool, accessMode string) error DeleteDisk(ctx context.Context, project string, volumeKey *meta.Key) error UpdateDisk(ctx context.Context, project string, volKey *meta.Key, existingDisk *CloudDisk, params parameters.ModifyVolumeParameters) error @@ -292,26 +292,30 @@ func (cloud *CloudProvider) listInstancesForProject(service *computev1.Service, // RepairUnderspecifiedVolumeKey will query the cloud provider and check each zone for the disk specified // by the volume key and return a volume key with a correct zone -func (cloud *CloudProvider) RepairUnderspecifiedVolumeKey(ctx context.Context, project string, volumeKey *meta.Key) (string, *meta.Key, error) { +func (cloud *CloudProvider) RepairUnderspecifiedVolumeKey(ctx context.Context, project string, volumeKey *meta.Key, fallbackZone string) (string, *meta.Key, error) { + return repairUnderspecifiedVolumeKeyWithProvider(ctx, cloud, project, volumeKey, fallbackZone) +} + +func repairUnderspecifiedVolumeKeyWithProvider(ctx context.Context, cloud GCECompute, project string, volumeKey *meta.Key, fallbackZone string) (string, *meta.Key, error) { klog.V(5).Infof("Repairing potentially underspecified volume key %v", volumeKey) if project == constants.UnspecifiedValue { - project = cloud.project + project = cloud.GetDefaultProject() } - region, err := common.GetRegionFromZones([]string{cloud.zone}) + region, err := common.GetRegionFromZones([]string{cloud.GetDefaultZone()}) if err != nil { return "", nil, fmt.Errorf("failed to get region from zones: %w", err) } switch volumeKey.Type() { case meta.Zonal: - foundZone := "" if volumeKey.Zone == constants.UnspecifiedValue { // list all zones, try to get disk in each zone zones, err := cloud.ListZones(ctx, region) if err != nil { return "", nil, err } + diskZones := []string{} for _, zone := range zones { - _, err := cloud.getZonalDiskOrError(ctx, project, zone, volumeKey.Name) + _, err := cloud.GetDisk(ctx, project, &meta.Key{Name: volumeKey.Name, Zone: zone}) if err != nil { if IsGCENotFoundError(err) { // Couldn't find the disk in this zone so we keep @@ -322,16 +326,22 @@ func (cloud *CloudProvider) RepairUnderspecifiedVolumeKey(ctx context.Context, p // so we return error immediately return "", nil, err } - if len(foundZone) > 0 { - return "", nil, fmt.Errorf("found disk %s in more than one zone: %s and %s", volumeKey.Name, foundZone, zone) - } - foundZone = zone + diskZones = append(diskZones, zone) } - if len(foundZone) == 0 { + if len(diskZones) == 0 { return "", nil, notFoundError() + } else if len(diskZones) > 1 && fallbackZone == "" { + return "", nil, fmt.Errorf("found disk %s in more than one zone and no fallback: %v", volumeKey.Name, diskZones) + } else if len(diskZones) > 1 && fallbackZone != "" { + if !slices.Contains(diskZones, fallbackZone) { + return "", nil, fmt.Errorf("found disk %s in more than one zone (%v) but none match fallback zone %s", volumeKey.Name, diskZones, fallbackZone) + } + volumeKey.Zone = fallbackZone + } else { + volumeKey.Zone = diskZones[0] } - volumeKey.Zone = foundZone + return project, volumeKey, nil } return project, volumeKey, nil @@ -394,22 +404,6 @@ func (cloud *CloudProvider) GetDisk(ctx context.Context, project string, key *me } } -func (cloud *CloudProvider) getZonalDiskOrError(ctx context.Context, project, volumeZone, volumeName string) (*computev1.Disk, error) { - disk, err := cloud.service.Disks.Get(project, volumeZone, volumeName).Context(ctx).Do() - if err != nil { - return nil, err - } - return disk, nil -} - -func (cloud *CloudProvider) getRegionalDiskOrError(ctx context.Context, project, volumeRegion, volumeName string) (*computev1.Disk, error) { - disk, err := cloud.service.RegionDisks.Get(project, volumeRegion, volumeName).Context(ctx).Do() - if err != nil { - return nil, err - } - return disk, nil -} - func (cloud *CloudProvider) getZonalBetaDiskOrError(ctx context.Context, project, volumeZone, volumeName string) (*computebeta.Disk, error) { disk, err := cloud.betaService.Disks.Get(project, volumeZone, volumeName).Context(ctx).Do() if err != nil { diff --git a/pkg/gce-cloud-provider/compute/gce-compute_test.go b/pkg/gce-cloud-provider/compute/gce-compute_test.go index ebfef1c60..537d68a18 100644 --- a/pkg/gce-cloud-provider/compute/gce-compute_test.go +++ b/pkg/gce-cloud-provider/compute/gce-compute_test.go @@ -16,9 +16,12 @@ package gcecloudprovider import ( "context" + "fmt" "testing" + "github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta" computebeta "google.golang.org/api/compute/v0.beta" + "google.golang.org/api/compute/v1" computev1 "google.golang.org/api/compute/v1" "google.golang.org/grpc/codes" "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common" @@ -331,3 +334,118 @@ func TestCodeForGCEOpError(t *testing.T) { } } } + +func TestRepairUnderspecifiedVolumeKey(t *testing.T) { + cloudProvider, err := CreateFakeCloudProvider("project-id", "country-region-fakefirstzone", []*CloudDisk{ + CloudDiskFromV1(&compute.Disk{ + Name: "disk-a", + Zone: "country-region-fakefirstzone", + SelfLink: fmt.Sprintf("https://www.googleapis.com/compute/v1/projects/project-id/zones/country-region-fakefirstzone/disks/disk-a"), + }), + CloudDiskFromV1(&compute.Disk{ + Name: "disk-ab", + Zone: "country-region-fakefirstzone", + SelfLink: fmt.Sprintf("https://www.googleapis.com/compute/v1/projects/project-id/zones/country-region-fakefirstzone/disks/disk-ab"), + }), + CloudDiskFromV1(&compute.Disk{ + Name: "disk-ab", + Zone: "country-region-fakesecondzone", + SelfLink: fmt.Sprintf("https://www.googleapis.com/compute/v1/projects/project-id/zones/country-region-fakesecondzone/disks/disk-ab"), + }), + }) + if err != nil { + t.Fatalf("can't create fake cloud provider: %v", err) + } + + for _, tc := range []struct { + testName string + project string + key meta.Key + fallback string + expectedProject string + expectedKey meta.Key + expectError bool + }{ + { + testName: "fully specified", + project: "my-project", + key: meta.Key{Name: "disk", Zone: "zone-1"}, + expectedProject: "my-project", + expectedKey: meta.Key{Name: "disk", Zone: "zone-1"}, + }, + { + testName: "fully specified, fallback ignored", + project: "my-project", + key: meta.Key{Name: "disk", Zone: "zone-1"}, + fallback: "zone-2", + expectedProject: "my-project", + expectedKey: meta.Key{Name: "disk", Zone: "zone-1"}, + }, + { + testName: "unspecified zonal", + project: "UNSPECIFIED", + key: meta.Key{Name: "disk-a", Zone: "UNSPECIFIED"}, + expectedProject: "project-id", + expectedKey: meta.Key{Name: "disk-a", Zone: "country-region-fakefirstzone"}, + }, + { + testName: "unspecified regional", + project: "UNSPECIFIED", + key: meta.Key{Name: "disk-a", Region: "UNSPECIFIED"}, + expectedProject: "project-id", + expectedKey: meta.Key{Name: "disk-a", Region: "country-region"}, + }, + { + testName: "multizone regional", + project: "UNSPECIFIED", + key: meta.Key{Name: "disk-ab", Region: "UNSPECIFIED"}, + expectedProject: "project-id", + expectedKey: meta.Key{Name: "disk-ab", Region: "country-region"}, + }, + { + testName: "multi-zone, no fallback", + project: "project-id", + key: meta.Key{Name: "disk-ab", Zone: "UNSPECIFIED"}, + expectError: true, + }, + { + testName: "multi-zone, no matching fallback", + project: "project-id", + key: meta.Key{Name: "disk-ab", Zone: "UNSPECIFIED"}, + fallback: "unknown-zone", + expectError: true, + }, + { + testName: "multi-zone, fallback", + project: "my-project", + key: meta.Key{Name: "disk-ab", Zone: "UNSPECIFIED"}, + fallback: "country-region-fakesecondzone", + expectedProject: "my-project", + expectedKey: meta.Key{Name: "disk-ab", Zone: "country-region-fakesecondzone"}, + }, + } { + t.Run(tc.testName, func(t *testing.T) { + // RepairUnderspecifiedVolumeKey mutates the argument as well as returning it, sigh. We verify those semantics here + key := tc.key + prj, retKey, err := cloudProvider.RepairUnderspecifiedVolumeKey(context.Background(), tc.project, &key, tc.fallback) + if tc.expectError { + if err == nil { + t.Error("Expected error but got none") + } + } else { + if err != nil { + t.Errorf("Expected no error but got %v", err) + } + if retKey != &key { + t.Error("Did not return argument key") + } + if prj != tc.expectedProject { + t.Errorf("Got project %s, expected %s", prj, tc.expectedProject) + } + if key.Name != tc.expectedKey.Name || key.Zone != tc.expectedKey.Zone || key.Region != tc.expectedKey.Region { + t.Errorf("Got key %+v, expected %+v", key, tc.expectedKey) + } + } + }) + } +} diff --git a/pkg/gce-pd-csi-driver/controller.go b/pkg/gce-pd-csi-driver/controller.go index 8b6126f43..5899db8ef 100644 --- a/pkg/gce-pd-csi-driver/controller.go +++ b/pkg/gce-pd-csi-driver/controller.go @@ -970,7 +970,7 @@ func (gceCS *GCEControllerServer) deleteMultiZoneDisk(ctx context.Context, req * func (gceCS *GCEControllerServer) deleteSingleDeviceDisk(ctx context.Context, req *csi.DeleteVolumeRequest, project string, volKey *meta.Key) (*csi.DeleteVolumeResponse, error) { var err error volumeID := req.GetVolumeId() - project, volKey, err = gceCS.CloudProvider.RepairUnderspecifiedVolumeKey(ctx, project, volKey) + project, volKey, err = gceCS.CloudProvider.RepairUnderspecifiedVolumeKey(ctx, project, volKey, "") if err != nil { if gce.IsGCENotFoundError(err) { klog.Warningf("DeleteVolume treating volume as deleted because cannot find volume %v: %v", volumeID, err.Error()) @@ -1146,7 +1146,7 @@ func (gceCS *GCEControllerServer) executeControllerPublishVolume(ctx context.Con volKey = convertMultiZoneVolKeyToZoned(volKey, instanceZone) } - project, volKey, err = gceCS.CloudProvider.RepairUnderspecifiedVolumeKey(ctx, project, volKey) + project, volKey, err = gceCS.CloudProvider.RepairUnderspecifiedVolumeKey(ctx, project, volKey, "") if err != nil { if gce.IsGCENotFoundError(err) { return nil, status.Errorf(codes.NotFound, "ControllerPublishVolume could not find volume with ID %v: %v", volumeID, err.Error()), nil @@ -1294,7 +1294,7 @@ func (gceCS *GCEControllerServer) executeControllerUnpublishVolume(ctx context.C volKey = convertMultiZoneVolKeyToZoned(volKey, instanceZone) } - project, volKey, err = gceCS.CloudProvider.RepairUnderspecifiedVolumeKey(ctx, project, volKey) + project, volKey, err = gceCS.CloudProvider.RepairUnderspecifiedVolumeKey(ctx, project, volKey, instanceZone) if err != nil { if gce.IsGCENotFoundError(err) { klog.Warningf("Treating volume %v as unpublished because it could not be found", volumeID) @@ -1366,7 +1366,7 @@ func (gceCS *GCEControllerServer) ValidateVolumeCapabilities(ctx context.Context return nil, status.Errorf(codes.InvalidArgument, "Volume ID is invalid: %v", err.Error()) } - project, volKey, err = gceCS.CloudProvider.RepairUnderspecifiedVolumeKey(ctx, project, volKey) + project, volKey, err = gceCS.CloudProvider.RepairUnderspecifiedVolumeKey(ctx, project, volKey, "") if err != nil { if gce.IsGCENotFoundError(err) { return nil, status.Errorf(codes.NotFound, "ValidateVolumeCapabilities could not find volume with ID %v: %v", volumeID, err.Error()) @@ -1962,7 +1962,7 @@ func (gceCS *GCEControllerServer) ControllerExpandVolume(ctx context.Context, re if err != nil { return nil, status.Errorf(codes.InvalidArgument, "ControllerExpandVolume Volume ID is invalid: %v", err.Error()) } - project, volKey, err = gceCS.CloudProvider.RepairUnderspecifiedVolumeKey(ctx, project, volKey) + project, volKey, err = gceCS.CloudProvider.RepairUnderspecifiedVolumeKey(ctx, project, volKey, "") if err != nil { if gce.IsGCENotFoundError(err) {