Add debug info to error logs if Mountpoint fails to start (#385)

As an example, I used an incorrect IAM role with PLI, and consequently my Pod failed to start, and then when I do `kubectl describe busybox-pli`, I got the following: ``` Events: Type Reason Age From Message ---- ------ ---- ---- ------- Warning FailedScheduling 14s default-scheduler 0/2 nodes are available: pod has unbound immediate PersistentVolumeClaims. preemption: 0/2 nodes are available: 2 Preemption is not helpful for scheduling. Normal Scheduled 7s default-scheduler Successfully assigned default/busybox-pli to ....eu-north-1.compute.internal Warning FailedMount 3s (x2 over 4s) kubelet MountVolume.SetUp failed for volume "s3-mp-csi-pv-pli" : rpc error: code = Internal desc = Could not mount "amzn-s3-demo-bucket" at "/var/lib/kubelet/pods/7405ef8c-02bd-44b6-8a02-9c62f3ceaab4/volumes/kubernetes.io~csi/s3-mp-csi-pv-pli/mount": Failed to wait for Mountpoint Pod mp-7ecac1a54cebd682c4ab95a8a7dd94e9b3321d27b232754561433f6f to be ready for /var/lib/kubelet/pods/7405ef8c-02bd-44b6-8a02-9c62f3ceaab4/volumes/kubernetes.io~csi/s3-mp-csi-pv-pli/mount: Mountpoint Pod mp-7ecac1a54cebd682c4ab95a8a7dd94e9b3321d27b232754561433f6f failed: Error: Failed to create S3 client Caused by: 0: initial ListObjectsV2 failed for bucket amzn-s3-demo-bucket in region eu-north-1 1: Client error 2: No signing credentials available, see CRT debug logs You can see Mountpoint logs by running: `kubectl logs -n mount-s3 mp-7ecac1a54cebd682c4ab95a8a7dd94e9b3321d27b232754561433f6f`. If the Mountpoint Pod already restarted, you can also pass `--previous` to get logs from the previous run. ``` --- By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
awslabs · Feb 21, 2025 · 5b2e4b4 · 5b2e4b4
1 parent 1b07cc2
commit 5b2e4b4
Show file tree

Hide file tree

Showing 2 changed files with 60 additions and 17 deletions.
diff --git a/pkg/driver/node/mounter/pod_mounter.go b/pkg/driver/node/mounter/pod_mounter.go
@@ -95,16 +95,10 @@ func (pm *PodMounter) Mount(ctx context.Context, bucketName string, target strin
 		return fmt.Errorf("Failed to wait for Mountpoint Pod to be ready for %q: %w", target, err)
 	}
 
-	// TODO: After this point we know Mountpoint Pod's name, we should add some debugging tips to our error messages returned, like:
-	// """
-	// Failed to mount on "..." because "..."`
-	// You can see logs for Mountpoint Pod by running:
-	// `kubectl logs -n mount-s3 "mp-..." --previous`
-	// """
-
 	podCredentialsPath, err := pm.ensureCredentialsDirExists(podPath)
 	if err != nil {
-		return fmt.Errorf("Failed to create credentials directory for %q: %W", target, err)
+		klog.Errorf("Failed to create credentials directory for %q: %v", target, err)
+		return fmt.Errorf("Failed to create credentials directory for %q: %w", target, err)
 	}
 
 	credentialCtx.SetWriteAndEnvPath(podCredentialsPath, mppod.PathInsideMountpointPod(mppod.KnownPathCredentials))
@@ -113,8 +107,8 @@ func (pm *PodMounter) Mount(ctx context.Context, bucketName string, target strin
 	// there is an existing mount point at `target`.
 	credEnv, authenticationSource, err := pm.credProvider.Provide(ctx, credentialCtx)
 	if err != nil {
-		klog.V(4).Infof("NodePublishVolume: Failed to provide credentials for %s: %v", target, err)
-		return fmt.Errorf("Failed to provide credentials for %q: %w", target, err)
+		klog.Errorf("Failed to provide credentials for %s: %v\n%s", target, err, pm.helpMessageForGettingMountpointLogs(pod))
+		return fmt.Errorf("Failed to provide credentials for %q: %w\n%s", target, err, pm.helpMessageForGettingMountpointLogs(pod))
 	}
 
 	if isMountPoint {
@@ -139,6 +133,7 @@ func (pm *PodMounter) Mount(ctx context.Context, bucketName string, target strin
 
 	fuseDeviceFD, err := pm.mountSyscallWithDefault(target, args)
 	if err != nil {
+		klog.Errorf("Failed to mount %s: %v", target, err)
 		return fmt.Errorf("Failed to mount %s: %w", target, err)
 	}
 
@@ -172,13 +167,14 @@ func (pm *PodMounter) Mount(ctx context.Context, bucketName string, target strin
 		Env:        env.List(),
 	})
 	if err != nil {
-		klog.V(4).Infof("Failed to send mount option to Mountpoint Pod %s for %s: %v\n", pod.Name, target, err)
-		return fmt.Errorf("Failed to send mount options to Mountpoint Pod %s for %s: %w", pod.Name, target, err)
+		klog.Errorf("Failed to send mount option to Mountpoint Pod %s for %s: %v\n%s", pod.Name, target, err, pm.helpMessageForGettingMountpointLogs(pod))
+		return fmt.Errorf("Failed to send mount options to Mountpoint Pod %s for %s: %w\n%s", pod.Name, target, err, pm.helpMessageForGettingMountpointLogs(pod))
 	}
 
 	err = pm.waitForMount(ctx, target, pod.Name, podMountErrorPath)
 	if err != nil {
-		return fmt.Errorf("Failed to wait for Mountpoint Pod %s to be ready for %s: %w", pod.Name, target, err)
+		klog.Errorf("Failed to wait for Mountpoint Pod %s to be ready for %s: %v\n%s", pod.Name, target, err, pm.helpMessageForGettingMountpointLogs(pod))
+		return fmt.Errorf("Failed to wait for Mountpoint Pod %s to be ready for %s: %w\n%s", pod.Name, target, err, pm.helpMessageForGettingMountpointLogs(pod))
 	}
 
 	// Mountpoint successfully started, so don't unmount the filesystem
@@ -197,7 +193,7 @@ func (pm *PodMounter) Unmount(ctx context.Context, target string, credentialCtx
 
 	// TODO: If `target` is a `systemd`-mounted Mountpoint, this would return an error,
 	// but we should still unmount it and clean the credentials.
-	_, podPath, err := pm.waitForMountpointPod(ctx, podID, volumeName)
+	pod, podPath, err := pm.waitForMountpointPod(ctx, podID, volumeName)
 	if err != nil {
 		klog.Errorf("Failed to wait for Mountpoint Pod to be ready for %q: %v", target, err)
 		return fmt.Errorf("Failed to wait for Mountpoint Pod for %q: %w", target, err)
@@ -209,18 +205,20 @@ func (pm *PodMounter) Unmount(ctx context.Context, target string, credentialCtx
 	podMountExitPath := mppod.PathOnHost(podPath, mppod.KnownPathMountExit)
 	_, err = os.OpenFile(podMountExitPath, os.O_RDONLY|os.O_CREATE, credentialprovider.CredentialFilePerm)
 	if err != nil {
-		return fmt.Errorf("Failed to send a exit message to Mountpoint Pod for %q: %w", target, err)
+		klog.Errorf("Failed to send a exit message to Mountpoint Pod for %q: %s\n%s", target, err, pm.helpMessageForGettingMountpointLogs(pod))
+		return fmt.Errorf("Failed to send a exit message to Mountpoint Pod for %q: %w\n%s", target, err, pm.helpMessageForGettingMountpointLogs(pod))
 	}
 
 	err = pm.unmountTarget(target)
 	if err != nil {
+		klog.Errorf("Failed to unmount %q: %v", target, err)
 		return fmt.Errorf("Failed to unmount %q: %w", target, err)
 	}
 
 	err = pm.credProvider.Cleanup(credentialCtx)
 	if err != nil {
-		klog.V(4).Infof("Unmount: Failed to clean up credentials for %s: %v", target, err)
-		return fmt.Errorf("Failed to clean up credentials for %q: %w", target, err)
+		klog.Errorf("Failed to clean up credentials for %s: %v\n%s", target, err, pm.helpMessageForGettingMountpointLogs(pod))
+		return fmt.Errorf("Failed to clean up credentials for %q: %w\n%s", target, err, pm.helpMessageForGettingMountpointLogs(pod))
 	}
 
 	return nil
@@ -376,3 +374,7 @@ func (pm *PodMounter) volumeNameFromTargetPath(target string) (string, error) {
 	}
 	return tp.VolumeID, nil
 }
+
+func (pm *PodMounter) helpMessageForGettingMountpointLogs(pod *corev1.Pod) string {
+	return fmt.Sprintf("You can see Mountpoint logs by running: `kubectl logs -n %s %s`. If the Mountpoint Pod already restarted, you can also pass `--previous` to get logs from the previous run.", pod.Namespace, pod.Name)
+}
diff --git a/pkg/driver/node/mounter/pod_mounter_test.go b/pkg/driver/node/mounter/pod_mounter_test.go
@@ -6,6 +6,7 @@ import (
 	"log"
 	"os"
 	"path/filepath"
+	"strings"
 	"sync/atomic"
 	"syscall"
 	"testing"
@@ -293,6 +294,46 @@ func TestPodMounter(t *testing.T) {
 				t.Errorf("it should unmount the target path if Mountpoint fails to start")
 			}
 		})
+
+		t.Run("Adds a help message to see Mountpoint logs if Mountpoint Pod fails to start", func(t *testing.T) {
+			testCtx := setup(t)
+
+			testCtx.mountSyscall = func(target string, args mountpoint.Args) (fd int, err error) {
+				// Does not do real mounting
+				return int(mountertest.OpenDevNull(t).Fd()), nil
+			}
+
+			mpPod := createMountpointPod(testCtx)
+
+			go func() {
+				mpPod.run()
+				mpPod.receiveMountOptions(testCtx.ctx)
+
+				// Emulate that Mountpoint failed to mount
+				mountErrorPath := mppod.PathOnHost(mpPod.podPath, mppod.KnownPathMountError)
+				err := os.WriteFile(mountErrorPath, []byte("mount failed"), 0777)
+				assert.NoError(t, err)
+			}()
+
+			err := testCtx.podMounter.Mount(testCtx.ctx, testCtx.bucketName, testCtx.targetPath, credentialprovider.ProvideContext{
+				VolumeID: testCtx.volumeID,
+				PodID:    testCtx.podUID,
+			}, mountpoint.ParseArgs(nil))
+			if err == nil {
+				t.Errorf("mount shouldn't succeeded if Mountpoint fails to start")
+			}
+
+			mpLogsCmd := fmt.Sprintf("kubectl logs -n %s %s", mountpointPodNamespace, mpPod.pod.Name)
+			if !strings.Contains(err.Error(), mpLogsCmd) {
+				t.Errorf("Expected error message to contain a help message to get Mountpoint logs %s, but got: %s", mpLogsCmd, err.Error())
+			}
+
+			ok, err := testCtx.mount.IsMountPoint(testCtx.targetPath)
+			assert.NoError(t, err)
+			if ok {
+				t.Errorf("it should unmount the target path if Mountpoint fails to start")
+			}
+		})
 	})
 
 	t.Run("Checking if target is a mount point", func(t *testing.T) {