Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert E2E tests to running the PD CSI driver in a container instead of a binary directly #1900

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ else
$(warning gcp-pd-driver-windows only supports amd64.)
endif

build-container-local: init-buildx
$(DOCKER) buildx build --platform=linux --progress=plain \
-t local:local \
--build-arg BUILDPLATFORM=linux \
--build-arg STAGINGVERSION=local \
--load .

build-container: require-GCE_PD_CSI_STAGING_IMAGE require-GCE_PD_CSI_STAGING_VERSION init-buildx
$(DOCKER) buildx build --platform=linux --progress=plain \
-t $(STAGINGIMAGE):$(STAGINGVERSION) \
Expand Down
5 changes: 3 additions & 2 deletions pkg/deviceutils/device-utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,8 @@ func filterAvailableNvmeDevFsPaths(devNvmePaths []string) []string {
return diskNvmePaths
}

func findAvailableDevFsPaths() ([]string, error) {
// FindAvailableDevFsPaths returns a list of the paths of accessible devices.
func FindAvailableDevFsPaths() ([]string, error) {
diskSDPaths, err := filepath.Glob(diskSDGlob)
if err != nil {
return nil, fmt.Errorf("failed to filepath.Glob(\"%s\"): %w", diskSDGlob, err)
Expand All @@ -340,7 +341,7 @@ func findAvailableDevFsPaths() ([]string, error) {

func udevadmTriggerForDiskIfExists(deviceName string) error {
devFsPathToSerial := map[string]string{}
devFsPaths, err := findAvailableDevFsPaths()
devFsPaths, err := FindAvailableDevFsPaths()
if err != nil {
return err
}
Expand Down
12 changes: 4 additions & 8 deletions test/e2e/tests/multi_zone_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1089,8 +1089,7 @@ func testAttachAndMount(volID string, volName string, instance *remote.InstanceI

detach := func() {
// Detach Disk
err = client.ControllerUnpublishVolume(volID, instance.GetNodeID())
if err != nil {
if err := client.ControllerUnpublishVolume(volID, instance.GetNodeID()); err != nil {
klog.Errorf("Failed to detach disk: %v", err)
}
}
Expand All @@ -1110,13 +1109,11 @@ func testAttachAndMount(volID string, volName string, instance *remote.InstanceI

unstageAndDetach := func() {
// Unstage Disk
err = client.NodeUnstageVolume(volID, stageDir)
if err != nil {
if err := client.NodeUnstageVolume(volID, stageDir); err != nil {
klog.Errorf("Failed to unstage volume: %v", err)
}
fp := filepath.Join("/tmp/", volName)
err = testutils.RmAll(instance, fp)
if err != nil {
if err := testutils.RmAll(instance, fp); err != nil {
klog.Errorf("Failed to rm file path %s: %v", fp, err)
}

Expand All @@ -1139,8 +1136,7 @@ func testAttachAndMount(volID string, volName string, instance *remote.InstanceI

unpublish := func() {
// Unpublish Disk
err = client.NodeUnpublishVolume(volID, publishDir)
if err != nil {
if err := client.NodeUnpublishVolume(volID, publishDir); err != nil {
klog.Errorf("Failed to unpublish volume: %v", err)
}
}
Expand Down
12 changes: 10 additions & 2 deletions test/e2e/tests/setup_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ var (
computeAlphaService *computealpha.Service
computeBetaService *computebeta.Service
kmsClient *cloudkms.KeyManagementClient
archivePath string
)

func init() {
Expand Down Expand Up @@ -98,6 +99,9 @@ var _ = BeforeSuite(func() {

klog.Infof("Running in project %v with service account %v", *project, *serviceAccount)

archivePath, err = testutils.BuildLocalImage()
Expect(err).To(BeNil())

for _, zone := range zones {
go func(curZone string) {
defer GinkgoRecover()
Expand All @@ -120,6 +124,9 @@ var _ = AfterSuite(func() {
tc.Instance.DeleteInstance()
}
}

err := testutils.RemoveLocalImage()
Expect(err).To(BeNil())
})

func notEmpty(v string) bool {
Expand All @@ -128,8 +135,9 @@ func notEmpty(v string) bool {

func getDriverConfig() testutils.DriverConfig {
return testutils.DriverConfig{
ExtraFlags: slices.Filter(nil, strings.Split(*extraDriverFlags, ","), notEmpty),
Zones: strings.Split(*zones, ","),
ExtraFlags: slices.Filter(nil, strings.Split(*extraDriverFlags, ","), notEmpty),
Zones: strings.Split(*zones, ","),
ArchivePath: archivePath,
}
}

Expand Down
27 changes: 27 additions & 0 deletions test/e2e/utils/setup-remote.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash

## Install Kubectl: https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl

## Install Docker: https://docs.docker.com/engine/install/debian/
sudo apt-get update
sudo apt-get install ca-certificates curl
sudo install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/debian/gpg -o /etc/apt/keyrings/docker.asc
sudo chmod a+r /etc/apt/keyrings/docker.asc

# Add the repository to Apt sources:
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian \
bookworm stable" | \
tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update

sudo apt-get install -y uidmap docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin dbus-user-session slirp4netns docker-ce-rootless-extras

# systemctl disable --now docker.service docker.socket
# rm /var/run/docker.sock

# touch /tmp/startup-configured.txt

50 changes: 43 additions & 7 deletions test/e2e/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,17 @@ import (
"fmt"
"math/rand"
"os"
"os/exec"
"path"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"

"golang.org/x/oauth2/google"
cloudresourcemanager "google.golang.org/api/cloudresourcemanager/v1"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/klog/v2"
boskosclient "sigs.k8s.io/boskos/client"
"sigs.k8s.io/boskos/common"
Expand All @@ -41,22 +44,31 @@ const (

var (
boskos, _ = boskosclient.NewClient(os.Getenv("JOB_NAME"), "http://boskos", "", "")

pkgPath string
binPath string

archiveName = fmt.Sprintf("e2e_driver_binaries_%s.tar.gz", uuid.NewUUID())
)

type DriverConfig struct {
ComputeEndpoint string
ExtraFlags []string
Zones []string
ArchivePath string
}

func GCEClientAndDriverSetup(instance *remote.InstanceInfo, driverConfig DriverConfig) (*remote.TestContext, error) {
port := fmt.Sprintf("%v", 1024+rand.Intn(10000))
func init() {
goPath, ok := os.LookupEnv("GOPATH")
if !ok {
return nil, fmt.Errorf("Could not find environment variable GOPATH")
panic(fmt.Errorf("Could not find environment variable GOPATH"))
}
pkgPath := path.Join(goPath, "src/sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/")
binPath := path.Join(pkgPath, "bin/gce-pd-csi-driver")
pkgPath = path.Join(goPath, "src/sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/")
binPath = path.Join(pkgPath, "bin/gce-pd-csi-driver")
}

func GCEClientAndDriverSetup(instance *remote.InstanceInfo, driverConfig DriverConfig) (*remote.TestContext, error) {
port := fmt.Sprintf("%v", 1024+rand.Intn(10000))

endpoint := fmt.Sprintf("tcp://localhost:%s", port)
extra_flags := []string{
Expand All @@ -76,14 +88,15 @@ func GCEClientAndDriverSetup(instance *remote.InstanceInfo, driverConfig DriverC
workspace := remote.NewWorkspaceDir("gce-pd-e2e-")
// Log at V(6) as the compute API calls are emitted at that level and it's
// useful to see what's happening when debugging tests.
driverRunCmd := fmt.Sprintf("sh -c '/usr/bin/nohup %s/gce-pd-csi-driver -v=6 --endpoint=%s %s 2> %s/prog.out < /dev/null > /dev/null &'",
workspace, endpoint, strings.Join(extra_flags, " "), workspace)
driverRunCmd := fmt.Sprintf("docker run --privileged -v /dev/:/dev/:shared -v /tmp/:/tmp/:shared -d --network=host local:local -v=6 --endpoint=%s %s 2> %s/prog.out < /dev/null",
endpoint, strings.Join(extra_flags, " "), workspace)
config := &remote.ClientConfig{
PkgPath: pkgPath,
BinPath: binPath,
WorkspaceDir: workspace,
RunDriverCmd: driverRunCmd,
Port: port,
ArchivePath: driverConfig.ArchivePath,
}

err := os.Setenv("GCE_PD_CSI_STAGING_VERSION", "latest")
Expand Down Expand Up @@ -333,3 +346,26 @@ func ValidateLogicalLinkIsDisk(instance *remote.InstanceInfo, link, diskName str
}
return false, fmt.Errorf("symlinked disk %s for diskName %s does not match a supported /dev/sd* or /dev/nvme* path", devFsPath, diskName)
}

func BuildLocalImage() (string, error) {
klog.V(2).Infof("Building local image...")

archivePath := filepath.Join(pkgPath, archiveName)
makeCmd := exec.Command("make", "build-container-local", "-C", pkgPath)
if out, err := makeCmd.CombinedOutput(); err != nil {
return "", fmt.Errorf("failed to build local image: %v, output: %s", err, out)
}
if out, err := exec.Command("docker", "save", "local:local", "-o", archivePath).CombinedOutput(); err != nil {
return "", fmt.Errorf("failed to save local image: %v, output: %s", err, out)
}

return archivePath, nil
}

func RemoveLocalImage() error {
klog.V(2).Infof("Removing local image archive...")
if err := os.Remove(filepath.Join(pkgPath, archiveName)); err != nil {
return fmt.Errorf("failed to remove local image archive: %v", err)
}
return nil
}
68 changes: 34 additions & 34 deletions test/remote/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,14 @@ package remote

import (
"fmt"
"path"
"path/filepath"
"strconv"
"strings"

"k8s.io/klog/v2"
)

func (i *InstanceInfo) UploadAndRun(archivePath, remoteWorkspace, driverRunCmd string) (int, error) {
func (i *InstanceInfo) UploadAndRun(archiveName, archivePath, remoteWorkspace, driverRunCmd, pkgPath string) (int, error) {

// Create the temp staging directory
klog.V(4).Infof("Staging test binaries on %q", i.cfg.Name)
Expand All @@ -37,53 +36,54 @@ func (i *InstanceInfo) UploadAndRun(archivePath, remoteWorkspace, driverRunCmd s
return -1, fmt.Errorf("failed to create remoteWorkspace directory %q on instance %q: %v output: %q", remoteWorkspace, i.cfg.Name, err.Error(), output)
}

// Copy the archive to the staging directory
if output, err := runSSHCommand("scp", archivePath, fmt.Sprintf("%s:%s/", i.GetSSHTarget(), remoteWorkspace)); err != nil {
// Copy the setup script to the staging directory
if output, err := runSSHCommand("scp", fmt.Sprintf("%s/test/e2e/utils/setup-remote.sh", pkgPath), fmt.Sprintf("%s:%s/", i.GetSSHTarget(), remoteWorkspace)); err != nil {
// Exit failure with the error
return -1, fmt.Errorf("failed to copy test archive: %v, output: %q", err.Error(), output)
}

// Extract the archive
archiveName := path.Base(archivePath)
cmd := getSSHCommand(" && ",
fmt.Sprintf("cd %s", remoteWorkspace),
fmt.Sprintf("tar -xzvf ./%s", archiveName),
)
klog.V(4).Infof("Extracting tar on %q", i.cfg.Name)
// Do not use sudo here, because `sudo tar -x` will recover the file ownership inside the tar ball, but
// we want the extracted files to be owned by the current user.
if output, err := i.SSHNoSudo("sh", "-c", cmd); err != nil {
// Exit failure with the error
return -1, fmt.Errorf("failed to extract test archive: %v, output: %q", err.Error(), output)
// Set up the VM env with docker and make
if output, err := i.SSH("sh", "-c", fmt.Sprintf("%s/setup-remote.sh", remoteWorkspace)); err != nil {
return -1, fmt.Errorf("failed to setup VM environment: %v, output: %q", err.Error(), output)
}

// Upload local image to remote
if output, err := runSSHCommand("scp", archivePath, fmt.Sprintf("%s:%s/", i.GetSSHTarget(), remoteWorkspace)); err != nil {
return -1, fmt.Errorf("failed to copy image archive: %v, output: %q", err, output)
}

// Run PD CSI driver as a container
klog.V(4).Infof("Starting driver on %q", i.cfg.Name)
// When the process is killed the driver should close the TCP endpoint, then we want to download the logs
output, err := i.SSH(driverRunCmd)
cmd := getSSHCommand(" && ",
fmt.Sprintf("docker load -i %v/%v", remoteWorkspace, archiveName),
driverRunCmd,
)
output, err := i.SSH("sh", "-c", cmd)
if err != nil {
// Exit failure with the error
return -1, fmt.Errorf("failed start driver, got output: %v, error: %v", output, err.Error())
return -1, fmt.Errorf("failed to load or run docker image: %v, output: %q", err, output)
}

// Get the driver PID
// ps -aux | grep /tmp/gce-pd-e2e-0180801T114407/gce-pd-csi-driver | awk '{print $2}'
driverPIDCmd := getSSHCommand(" | ",
"ps -aux",
fmt.Sprintf("grep %s", remoteWorkspace),
"grep -v grep",
// All ye who try to deal with escaped/non-escaped quotes with exec beware.
//`awk "{print \$2}"`,
)
driverPIDString, err := i.SSHNoSudo("sh", "-c", driverPIDCmd)
// Grab the container ID from `docker run` output
driverRunOutputs := strings.Split(output, "\n")
numSplits := len(driverRunOutputs)
if numSplits < 2 {
return -1, fmt.Errorf("failed to get driver container ID from driver run outputs, outputs are: %v", output)
}
// Grabbing the second last split because it contains an empty string
driverContainerID := driverRunOutputs[len(driverRunOutputs)-2]

// Grab driver PID from container ID
driverPIDStr, err := i.SSH(fmt.Sprintf("docker inspect -f {{.State.Pid}} %v", driverContainerID))
if err != nil {
// Exit failure with the error
return -1, fmt.Errorf("failed to get PID of driver, got output: %v, error: %v", output, err.Error())
return -1, fmt.Errorf("failed to get PID of driver, got output: %v, error: %v", driverPIDStr, err.Error())
}

driverPID, err := strconv.Atoi(strings.Fields(driverPIDString)[1])
driverPIDStr = strings.TrimSpace(driverPIDStr)
driverPID, err := strconv.Atoi(driverPIDStr)
if err != nil {
return -1, fmt.Errorf("failed to convert driver PID from string %s to int: %v", driverPIDString, err.Error())
return -1, fmt.Errorf("failed to convert driver PID from string %s to int: %v", driverPIDStr, err.Error())
}
klog.V(4).Infof("Driver PID is: %v", driverPID)

return driverPID, nil
}
Expand Down
21 changes: 5 additions & 16 deletions test/remote/setup-teardown.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ package remote
import (
"fmt"
"os"

"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/klog/v2"
"path"
)

// TestContext holds the CSI Client handle to a remotely connected Driver
Expand All @@ -45,6 +43,8 @@ type ClientConfig struct {
RunDriverCmd string
// Port to use as SSH tunnel on both remote and local side.
Port string
// Absolute path of the archive of the built image
ArchivePath string
}

type processes struct {
Expand All @@ -70,20 +70,9 @@ func SetupInstance(cfg InstanceConfig) (*InstanceInfo, error) {
// a CSI client to it through SHH tunnelling. It returns a TestContext with both a handle to the instance
// that the driver is on and the CSI Client object to make CSI calls to the remote driver.
func SetupNewDriverAndClient(instance *InstanceInfo, config *ClientConfig) (*TestContext, error) {
archiveName := fmt.Sprintf("e2e_driver_binaries_%s.tar.gz", uuid.NewUUID())
archivePath, err := CreateDriverArchive(archiveName, instance.cfg.Architecture, config.PkgPath, config.BinPath)
if err != nil {
return nil, err
}
defer func() {
err = os.Remove(archivePath)
if err != nil {
klog.Warningf("Failed to remove archive file %s: %v", archivePath, err)
}
}()

// Upload archive to instance and run binaries
driverPID, err := instance.UploadAndRun(archivePath, config.WorkspaceDir, config.RunDriverCmd)
archiveName := path.Base(config.ArchivePath)
driverPID, err := instance.UploadAndRun(archiveName, config.ArchivePath, config.WorkspaceDir, config.RunDriverCmd, config.PkgPath)
if err != nil {
return nil, err
}
Expand Down