Skip to content

Commit

Permalink
Add debug info to error logs if Mountpoint fails to start (#385)
Browse files Browse the repository at this point in the history
As an example, I used an incorrect IAM role with PLI, and consequently
my Pod failed to start, and then when I do `kubectl describe
busybox-pli`, I got the following:

```
Events:
  Type     Reason            Age              From               Message
  ----     ------            ----             ----               -------
  Warning  FailedScheduling  14s              default-scheduler  0/2 nodes are available: pod has unbound immediate PersistentVolumeClaims. preemption: 0/2 nodes are available: 2 Preemption is not helpful for scheduling.
  Normal   Scheduled         7s               default-scheduler  Successfully assigned default/busybox-pli to ....eu-north-1.compute.internal
  Warning  FailedMount       3s (x2 over 4s)  kubelet            MountVolume.SetUp failed for volume "s3-mp-csi-pv-pli" : rpc error: code = Internal desc = Could not mount "amzn-s3-demo-bucket" at "/var/lib/kubelet/pods/7405ef8c-02bd-44b6-8a02-9c62f3ceaab4/volumes/kubernetes.io~csi/s3-mp-csi-pv-pli/mount": Failed to wait for Mountpoint Pod mp-7ecac1a54cebd682c4ab95a8a7dd94e9b3321d27b232754561433f6f to be ready for /var/lib/kubelet/pods/7405ef8c-02bd-44b6-8a02-9c62f3ceaab4/volumes/kubernetes.io~csi/s3-mp-csi-pv-pli/mount: Mountpoint Pod mp-7ecac1a54cebd682c4ab95a8a7dd94e9b3321d27b232754561433f6f failed: Error: Failed to create S3 client

Caused by:
    0: initial ListObjectsV2 failed for bucket amzn-s3-demo-bucket in region eu-north-1
    1: Client error
    2: No signing credentials available, see CRT debug logs

You can see Mountpoint logs by running: `kubectl logs -n mount-s3 mp-7ecac1a54cebd682c4ab95a8a7dd94e9b3321d27b232754561433f6f`. If the Mountpoint Pod already restarted, you can also pass `--previous` to get logs from the previous run.
```

---

By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.
  • Loading branch information
unexge authored Feb 21, 2025
1 parent 1b07cc2 commit 5b2e4b4
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 17 deletions.
36 changes: 19 additions & 17 deletions pkg/driver/node/mounter/pod_mounter.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,10 @@ func (pm *PodMounter) Mount(ctx context.Context, bucketName string, target strin
return fmt.Errorf("Failed to wait for Mountpoint Pod to be ready for %q: %w", target, err)
}

// TODO: After this point we know Mountpoint Pod's name, we should add some debugging tips to our error messages returned, like:
// """
// Failed to mount on "..." because "..."`
// You can see logs for Mountpoint Pod by running:
// `kubectl logs -n mount-s3 "mp-..." --previous`
// """

podCredentialsPath, err := pm.ensureCredentialsDirExists(podPath)
if err != nil {
return fmt.Errorf("Failed to create credentials directory for %q: %W", target, err)
klog.Errorf("Failed to create credentials directory for %q: %v", target, err)
return fmt.Errorf("Failed to create credentials directory for %q: %w", target, err)
}

credentialCtx.SetWriteAndEnvPath(podCredentialsPath, mppod.PathInsideMountpointPod(mppod.KnownPathCredentials))
Expand All @@ -113,8 +107,8 @@ func (pm *PodMounter) Mount(ctx context.Context, bucketName string, target strin
// there is an existing mount point at `target`.
credEnv, authenticationSource, err := pm.credProvider.Provide(ctx, credentialCtx)
if err != nil {
klog.V(4).Infof("NodePublishVolume: Failed to provide credentials for %s: %v", target, err)
return fmt.Errorf("Failed to provide credentials for %q: %w", target, err)
klog.Errorf("Failed to provide credentials for %s: %v\n%s", target, err, pm.helpMessageForGettingMountpointLogs(pod))
return fmt.Errorf("Failed to provide credentials for %q: %w\n%s", target, err, pm.helpMessageForGettingMountpointLogs(pod))
}

if isMountPoint {
Expand All @@ -139,6 +133,7 @@ func (pm *PodMounter) Mount(ctx context.Context, bucketName string, target strin

fuseDeviceFD, err := pm.mountSyscallWithDefault(target, args)
if err != nil {
klog.Errorf("Failed to mount %s: %v", target, err)
return fmt.Errorf("Failed to mount %s: %w", target, err)
}

Expand Down Expand Up @@ -172,13 +167,14 @@ func (pm *PodMounter) Mount(ctx context.Context, bucketName string, target strin
Env: env.List(),
})
if err != nil {
klog.V(4).Infof("Failed to send mount option to Mountpoint Pod %s for %s: %v\n", pod.Name, target, err)
return fmt.Errorf("Failed to send mount options to Mountpoint Pod %s for %s: %w", pod.Name, target, err)
klog.Errorf("Failed to send mount option to Mountpoint Pod %s for %s: %v\n%s", pod.Name, target, err, pm.helpMessageForGettingMountpointLogs(pod))
return fmt.Errorf("Failed to send mount options to Mountpoint Pod %s for %s: %w\n%s", pod.Name, target, err, pm.helpMessageForGettingMountpointLogs(pod))
}

err = pm.waitForMount(ctx, target, pod.Name, podMountErrorPath)
if err != nil {
return fmt.Errorf("Failed to wait for Mountpoint Pod %s to be ready for %s: %w", pod.Name, target, err)
klog.Errorf("Failed to wait for Mountpoint Pod %s to be ready for %s: %v\n%s", pod.Name, target, err, pm.helpMessageForGettingMountpointLogs(pod))
return fmt.Errorf("Failed to wait for Mountpoint Pod %s to be ready for %s: %w\n%s", pod.Name, target, err, pm.helpMessageForGettingMountpointLogs(pod))
}

// Mountpoint successfully started, so don't unmount the filesystem
Expand All @@ -197,7 +193,7 @@ func (pm *PodMounter) Unmount(ctx context.Context, target string, credentialCtx

// TODO: If `target` is a `systemd`-mounted Mountpoint, this would return an error,
// but we should still unmount it and clean the credentials.
_, podPath, err := pm.waitForMountpointPod(ctx, podID, volumeName)
pod, podPath, err := pm.waitForMountpointPod(ctx, podID, volumeName)
if err != nil {
klog.Errorf("Failed to wait for Mountpoint Pod to be ready for %q: %v", target, err)
return fmt.Errorf("Failed to wait for Mountpoint Pod for %q: %w", target, err)
Expand All @@ -209,18 +205,20 @@ func (pm *PodMounter) Unmount(ctx context.Context, target string, credentialCtx
podMountExitPath := mppod.PathOnHost(podPath, mppod.KnownPathMountExit)
_, err = os.OpenFile(podMountExitPath, os.O_RDONLY|os.O_CREATE, credentialprovider.CredentialFilePerm)
if err != nil {
return fmt.Errorf("Failed to send a exit message to Mountpoint Pod for %q: %w", target, err)
klog.Errorf("Failed to send a exit message to Mountpoint Pod for %q: %s\n%s", target, err, pm.helpMessageForGettingMountpointLogs(pod))
return fmt.Errorf("Failed to send a exit message to Mountpoint Pod for %q: %w\n%s", target, err, pm.helpMessageForGettingMountpointLogs(pod))
}

err = pm.unmountTarget(target)
if err != nil {
klog.Errorf("Failed to unmount %q: %v", target, err)
return fmt.Errorf("Failed to unmount %q: %w", target, err)
}

err = pm.credProvider.Cleanup(credentialCtx)
if err != nil {
klog.V(4).Infof("Unmount: Failed to clean up credentials for %s: %v", target, err)
return fmt.Errorf("Failed to clean up credentials for %q: %w", target, err)
klog.Errorf("Failed to clean up credentials for %s: %v\n%s", target, err, pm.helpMessageForGettingMountpointLogs(pod))
return fmt.Errorf("Failed to clean up credentials for %q: %w\n%s", target, err, pm.helpMessageForGettingMountpointLogs(pod))
}

return nil
Expand Down Expand Up @@ -376,3 +374,7 @@ func (pm *PodMounter) volumeNameFromTargetPath(target string) (string, error) {
}
return tp.VolumeID, nil
}

func (pm *PodMounter) helpMessageForGettingMountpointLogs(pod *corev1.Pod) string {
return fmt.Sprintf("You can see Mountpoint logs by running: `kubectl logs -n %s %s`. If the Mountpoint Pod already restarted, you can also pass `--previous` to get logs from the previous run.", pod.Namespace, pod.Name)
}
41 changes: 41 additions & 0 deletions pkg/driver/node/mounter/pod_mounter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"log"
"os"
"path/filepath"
"strings"
"sync/atomic"
"syscall"
"testing"
Expand Down Expand Up @@ -293,6 +294,46 @@ func TestPodMounter(t *testing.T) {
t.Errorf("it should unmount the target path if Mountpoint fails to start")
}
})

t.Run("Adds a help message to see Mountpoint logs if Mountpoint Pod fails to start", func(t *testing.T) {
testCtx := setup(t)

testCtx.mountSyscall = func(target string, args mountpoint.Args) (fd int, err error) {
// Does not do real mounting
return int(mountertest.OpenDevNull(t).Fd()), nil
}

mpPod := createMountpointPod(testCtx)

go func() {
mpPod.run()
mpPod.receiveMountOptions(testCtx.ctx)

// Emulate that Mountpoint failed to mount
mountErrorPath := mppod.PathOnHost(mpPod.podPath, mppod.KnownPathMountError)
err := os.WriteFile(mountErrorPath, []byte("mount failed"), 0777)
assert.NoError(t, err)
}()

err := testCtx.podMounter.Mount(testCtx.ctx, testCtx.bucketName, testCtx.targetPath, credentialprovider.ProvideContext{
VolumeID: testCtx.volumeID,
PodID: testCtx.podUID,
}, mountpoint.ParseArgs(nil))
if err == nil {
t.Errorf("mount shouldn't succeeded if Mountpoint fails to start")
}

mpLogsCmd := fmt.Sprintf("kubectl logs -n %s %s", mountpointPodNamespace, mpPod.pod.Name)
if !strings.Contains(err.Error(), mpLogsCmd) {
t.Errorf("Expected error message to contain a help message to get Mountpoint logs %s, but got: %s", mpLogsCmd, err.Error())
}

ok, err := testCtx.mount.IsMountPoint(testCtx.targetPath)
assert.NoError(t, err)
if ok {
t.Errorf("it should unmount the target path if Mountpoint fails to start")
}
})
})

t.Run("Checking if target is a mount point", func(t *testing.T) {
Expand Down

0 comments on commit 5b2e4b4

Please sign in to comment.