diff --git a/src/bun.js/node/dir_iterator.zig b/src/bun.js/node/dir_iterator.zig index 215fcf395ed960..8e5493e76f5839 100644 --- a/src/bun.js/node/dir_iterator.zig +++ b/src/bun.js/node/dir_iterator.zig @@ -162,7 +162,7 @@ pub fn NewIterator(comptime use_windows_ospath: bool) type { continue :start_over; } - const entry_kind = switch (linux_entry.type) { + const entry_kind: Entry.Kind = switch (linux_entry.type) { linux.DT.BLK => Entry.Kind.block_device, linux.DT.CHR => Entry.Kind.character_device, linux.DT.DIR => Entry.Kind.directory, @@ -170,6 +170,9 @@ pub fn NewIterator(comptime use_windows_ospath: bool) type { linux.DT.LNK => Entry.Kind.sym_link, linux.DT.REG => Entry.Kind.file, linux.DT.SOCK => Entry.Kind.unix_domain_socket, + // DT_UNKNOWN: Some filesystems (e.g., bind mounts, FUSE, NFS) + // don't provide d_type. Callers should use lstatat() to determine + // the type when needed (lazy stat pattern for performance). else => Entry.Kind.unknown, }; return .{ diff --git a/src/bun.js/node/node_fs.zig b/src/bun.js/node/node_fs.zig index 0baa7b8a947290..135859c37ab7ab 100644 --- a/src/bun.js/node/node_fs.zig +++ b/src/bun.js/node/node_fs.zig @@ -4627,6 +4627,9 @@ pub const NodeFS = struct { break :brk bun.path.joinZBuf(buf, &path_parts, .auto); }; + // Track effective kind - may be resolved from .unknown via stat + var effective_kind = current.kind; + enqueue: { switch (current.kind) { // a symlink might be a directory or might not be @@ -4646,6 +4649,24 @@ pub const NodeFS = struct { async_task.enqueue(name_to_copy); }, + // Some filesystems (e.g., Docker bind mounts, FUSE, NFS) return + // DT_UNKNOWN for d_type. Use lstatat to determine the actual type. + .unknown => { + if (current.name.len + 1 + name_to_copy.len > bun.MAX_PATH_BYTES) break :enqueue; + + // Lazy stat to determine the actual kind (lstatat to not follow symlinks) + const stat_result = bun.sys.lstatat(fd, current.name.sliceAssumeZ()); + switch (stat_result) { + .result => |st| { + const real_kind = bun.sys.kindFromMode(st.mode); + effective_kind = real_kind; + if (real_kind == .directory or real_kind == .sym_link) { + async_task.enqueue(name_to_copy); + } + }, + .err => {}, // Skip entries we can't stat + } + }, else => {}, } } @@ -4662,7 +4683,7 @@ pub const NodeFS = struct { entries.append(.{ .name = bun.String.cloneUTF8(utf8_name), .path = dirent_path_prev, - .kind = current.kind, + .kind = effective_kind, }) catch |err| bun.handleOom(err); }, Buffer => { @@ -4774,6 +4795,9 @@ pub const NodeFS = struct { break :brk bun.path.joinZBuf(buf, &path_parts, .auto); }; + // Track effective kind - may be resolved from .unknown via stat + var effective_kind = current.kind; + enqueue: { switch (current.kind) { // a symlink might be a directory or might not be @@ -4786,6 +4810,24 @@ pub const NodeFS = struct { if (current.name.len + 1 + name_to_copy.len > bun.MAX_PATH_BYTES) break :enqueue; stack.writeItem(basename_allocator.dupeZ(u8, name_to_copy) catch break :enqueue) catch break :enqueue; }, + // Some filesystems (e.g., Docker bind mounts, FUSE, NFS) return + // DT_UNKNOWN for d_type. Use lstatat to determine the actual type. + .unknown => { + if (current.name.len + 1 + name_to_copy.len > bun.MAX_PATH_BYTES) break :enqueue; + + // Lazy stat to determine the actual kind (lstatat to not follow symlinks) + const stat_result = bun.sys.lstatat(fd, current.name.sliceAssumeZ()); + switch (stat_result) { + .result => |st| { + const real_kind = bun.sys.kindFromMode(st.mode); + effective_kind = real_kind; + if (real_kind == .directory or real_kind == .sym_link) { + stack.writeItem(basename_allocator.dupeZ(u8, name_to_copy) catch break :enqueue) catch break :enqueue; + } + }, + .err => {}, // Skip entries we can't stat + } + }, else => {}, } } @@ -4801,7 +4843,7 @@ pub const NodeFS = struct { entries.append(.{ .name = jsc.WebCore.encoding.toBunString(utf8_name, args.encoding), .path = dirent_path_prev, - .kind = current.kind, + .kind = effective_kind, }) catch |err| bun.handleOom(err); }, Buffer => { diff --git a/src/glob/GlobWalker.zig b/src/glob/GlobWalker.zig index 2fda733a469da0..f3e2f8cbeecd9b 100644 --- a/src/glob/GlobWalker.zig +++ b/src/glob/GlobWalker.zig @@ -154,6 +154,12 @@ pub const SyscallAccessor = struct { }; } + /// Like statat but does not follow symlinks. + pub fn lstatat(handle: Handle, path: [:0]const u8) Maybe(bun.Stat) { + if (comptime bun.Environment.isWindows) return statatWindows(handle.value, path); + return Syscall.lstatat(handle.value, path); + } + pub fn openat(handle: Handle, path: [:0]const u8) !Maybe(Handle) { return switch (Syscall.openat(handle.value, path, bun.O.DIRECTORY | bun.O.RDONLY, 0)) { .err => |err| .{ .err = err }, @@ -247,6 +253,20 @@ pub const DirEntryAccessor = struct { return Syscall.stat(path); } + /// Like statat but does not follow symlinks. + pub fn lstatat(handle: Handle, path_: [:0]const u8) Maybe(bun.Stat) { + var path: [:0]const u8 = path_; + var buf: bun.PathBuffer = undefined; + if (!bun.path.Platform.auto.isAbsolute(path)) { + if (handle.value) |entry| { + const slice = bun.path.joinStringBuf(&buf, [_][]const u8{ entry.dir, path }, .auto); + buf[slice.len] = 0; + path = buf[0..slice.len :0]; + } + } + return Syscall.lstat(path); + } + pub fn open(path: [:0]const u8) !Maybe(Handle) { return openat(.empty, path); } @@ -902,6 +922,93 @@ pub fn GlobWalker_( continue; }, + // Some filesystems (e.g., Docker bind mounts, FUSE, NFS) return + // DT_UNKNOWN for d_type. Use lazy stat to determine the real kind + // only when needed (PR #18172 pattern for performance). + .unknown => { + // First check if name might match pattern (avoid unnecessary stat) + const might_match = this.walker.matchPatternImpl(dir_iter_state.pattern, entry_name); + if (!might_match) continue; + + // Need to stat to determine actual kind (lstatat to not follow symlinks) + // Use stack fallback for short names (typical case) to avoid arena allocation + const stackbuf_size = 256; + var stfb = std.heap.stackFallback(stackbuf_size, this.walker.arena.allocator()); + const name_z = stfb.get().dupeZ(u8, entry_name) catch bun.outOfMemory(); + const stat_result = Accessor.lstatat(dir.fd, name_z); + const real_kind: std.fs.File.Kind = switch (stat_result) { + .result => |st| bun.sys.kindFromMode(st.mode), + .err => continue, // Skip entries we can't stat + }; + + // Process based on actual kind + switch (real_kind) { + .file => { + const matches = this.walker.matchPatternFile(entry_name, dir_iter_state.component_idx, dir.is_last, dir_iter_state.pattern, dir_iter_state.next_pattern); + if (matches) { + const prepared = try this.walker.prepareMatchedPath(entry_name, dir.dir_path) orelse continue; + return .{ .result = prepared }; + } + }, + .directory => { + var add_dir: bool = false; + const recursion_idx_bump_ = this.walker.matchPatternDir(dir_iter_state.pattern, dir_iter_state.next_pattern, entry_name, dir_iter_state.component_idx, dir_iter_state.is_last, &add_dir); + + if (recursion_idx_bump_) |recursion_idx_bump| { + const subdir_parts: []const []const u8 = &[_][]const u8{ + dir.dir_path[0..dir.dir_path.len], + entry_name, + }; + + const subdir_entry_name = try this.walker.join(subdir_parts); + + if (recursion_idx_bump == 2) { + try this.walker.workbuf.append( + this.walker.arena.allocator(), + WorkItem.new(subdir_entry_name, dir_iter_state.component_idx + recursion_idx_bump, .directory), + ); + try this.walker.workbuf.append( + this.walker.arena.allocator(), + WorkItem.new(subdir_entry_name, dir_iter_state.component_idx, .directory), + ); + } else { + try this.walker.workbuf.append( + this.walker.arena.allocator(), + WorkItem.new(subdir_entry_name, dir_iter_state.component_idx + recursion_idx_bump, .directory), + ); + } + } + + if (add_dir and !this.walker.only_files) { + const prepared_path = try this.walker.prepareMatchedPath(entry_name, dir.dir_path) orelse continue; + return .{ .result = prepared_path }; + } + }, + .sym_link => { + if (this.walker.follow_symlinks) { + const subdir_parts: []const []const u8 = &[_][]const u8{ + dir.dir_path[0..dir.dir_path.len], + entry_name, + }; + const entry_start: u32 = @intCast(if (dir.dir_path.len == 0) 0 else dir.dir_path.len + 1); + const subdir_entry_name = try this.walker.join(subdir_parts); + + try this.walker.workbuf.append( + this.walker.arena.allocator(), + WorkItem.newSymlink(subdir_entry_name, dir_iter_state.component_idx, entry_start), + ); + } else if (!this.walker.only_files) { + const matches = this.walker.matchPatternFile(entry_name, dir_iter_state.component_idx, dir_iter_state.is_last, dir_iter_state.pattern, dir_iter_state.next_pattern); + if (matches) { + const prepared_path = try this.walker.prepareMatchedPath(entry_name, dir.dir_path) orelse continue; + return .{ .result = prepared_path }; + } + } + }, + else => {}, // Skip other types (block devices, etc.) + } + continue; + }, else => continue, } }, diff --git a/src/sys.zig b/src/sys.zig index 0c0d23f3deb4f7..2eb205cbd2ebcb 100644 --- a/src/sys.zig +++ b/src/sys.zig @@ -744,6 +744,29 @@ pub fn fstatat(fd: bun.FileDescriptor, path: [:0]const u8) Maybe(bun.Stat) { return Maybe(bun.Stat){ .result = stat_buf }; } +/// Like fstatat but does not follow symlinks (uses AT.SYMLINK_NOFOLLOW). +/// This is the "at" equivalent of lstat. +pub fn lstatat(fd: bun.FileDescriptor, path: [:0]const u8) Maybe(bun.Stat) { + if (Environment.isWindows) { + // Use O.NOFOLLOW to not follow symlinks (FILE_OPEN_REPARSE_POINT on Windows) + return switch (openatWindowsA(fd, path, O.NOFOLLOW, 0)) { + .result => |file| { + defer file.close(); + return fstat(file); + }, + .err => |err| Maybe(bun.Stat){ .err = err }, + }; + } + var stat_buf = mem.zeroes(bun.Stat); + const fd_valid = if (fd == bun.invalid_fd) std.posix.AT.FDCWD else fd.native(); + if (Maybe(bun.Stat).errnoSysFP(syscall.fstatat(fd_valid, path, &stat_buf, std.posix.AT.SYMLINK_NOFOLLOW), .fstatat, fd, path)) |err| { + log("lstatat({f}, {s}) = {s}", .{ fd, path, @tagName(err.getErrno()) }); + return err; + } + log("lstatat({f}, {s}) = 0", .{ fd, path }); + return Maybe(bun.Stat){ .result = stat_buf }; +} + pub fn mkdir(file_path: [:0]const u8, flags: mode_t) Maybe(void) { return switch (Environment.os) { .mac => Maybe(void).errnoSysP(syscall.mkdir(file_path, flags), .mkdir, file_path) orelse .success, diff --git a/test/cli/run/glob-on-fuse.test.ts b/test/cli/run/glob-on-fuse.test.ts new file mode 100644 index 00000000000000..8c631c25caa78b --- /dev/null +++ b/test/cli/run/glob-on-fuse.test.ts @@ -0,0 +1,140 @@ +/** + * Test that Bun.Glob and fs.globSync work correctly on FUSE filesystems + * where d_type returns DT_UNKNOWN. + * + * Related to issue #24007 and PR #18172 + */ +import { spawn, type ReadableSubprocess } from "bun"; +import { describe, expect, test } from "bun:test"; +import { isLinux, tmpdirSync } from "harness"; +import fs from "node:fs"; +import { join } from "node:path"; + +describe.skipIf(!isLinux)("glob on a FUSE mount", () => { + async function withFuseMount(fn: (mountpoint: string) => Promise): Promise { + // Use tmpdirSync for empty mount point (tempDir requires file tree) + const mountpoint = tmpdirSync(); + + let pythonProcess: ReadableSubprocess | undefined = undefined; + let result: T; + let originalError: Error | undefined; + let cleanupError: Error | undefined; + + try { + // setup FUSE filesystem (uses fuse-fs.py which returns DT_UNKNOWN) + pythonProcess = spawn({ + cmd: ["python3", "fuse-fs.py", "-f", mountpoint], + cwd: __dirname, + stdout: "pipe", + stderr: "pipe", + }); + + // wait for mount to be ready, also check if Python process exited early + let tries = 0; + while (!fs.existsSync(join(mountpoint, "main.js")) && tries < 250 && pythonProcess.exitCode === null) { + tries++; + await Bun.sleep(5); + } + if (pythonProcess.exitCode !== null && pythonProcess.exitCode !== 0) { + throw new Error(`FUSE process exited early with code ${pythonProcess.exitCode}`); + } + expect(fs.existsSync(join(mountpoint, "main.js"))).toBeTrue(); + + result = await fn(mountpoint); + } catch (e) { + originalError = e instanceof Error ? e : new Error(String(e)); + } finally { + if (pythonProcess) { + try { + // unmount + const umount = spawn({ cmd: ["fusermount", "-u", mountpoint] }); + await umount.exited; + // wait for graceful exit + await Promise.race([pythonProcess.exited, Bun.sleep(1000)]); + expect(pythonProcess.exitCode).toBe(0); + } catch (e) { + pythonProcess.kill("SIGKILL"); + console.error("python process errored:", await new Response(pythonProcess.stderr).text()); + // Capture cleanup error but don't throw inside finally + if (!originalError) { + cleanupError = e instanceof Error ? e : new Error(String(e)); + } + } + } + } + + // Re-throw errors outside finally block + if (originalError) { + throw originalError; + } + if (cleanupError) { + throw cleanupError; + } + + return result!; + } + + // Set a long timeout so the test can clean up the filesystem mount itself + // rather than getting interrupted by timeout (matches run-file-on-fuse.test.ts) + test( + "Bun.Glob.scanSync finds files on FUSE mount", + async () => { + await withFuseMount(async (mountpoint) => { + const glob = new Bun.Glob("*.js"); + const results = Array.from(glob.scanSync({ cwd: mountpoint })); + + // fuse-fs.py provides main.js and main-symlink.js + expect(results).toContain("main.js"); + expect(results.length).toBeGreaterThanOrEqual(1); + }); + }, + 10000 + ); + + test( + "fs.globSync finds files on FUSE mount", + async () => { + await withFuseMount(async (mountpoint) => { + const results = fs.globSync("*.js", { cwd: mountpoint }); + + expect(results).toContain("main.js"); + expect(results.length).toBeGreaterThanOrEqual(1); + }); + }, + 10000 + ); + + test( + "fs.readdirSync works on FUSE mount", + async () => { + await withFuseMount(async (mountpoint) => { + const results = fs.readdirSync(mountpoint); + + expect(results).toContain("main.js"); + expect(results).toContain("main-symlink.js"); + }); + }, + 10000 + ); + + test( + "fs.readdirSync with withFileTypes returns correct types on FUSE mount", + async () => { + await withFuseMount(async (mountpoint) => { + const results = fs.readdirSync(mountpoint, { withFileTypes: true }); + + const mainJs = results.find((d) => d.name === "main.js"); + expect(mainJs).toBeDefined(); + expect(mainJs!.isFile()).toBe(true); + + const symlink = results.find((d) => d.name === "main-symlink.js"); + expect(symlink).toBeDefined(); + expect(symlink!.isSymbolicLink()).toBe(true); + }); + }, + 10000 + ); +}); + + + diff --git a/test/regression/issue/24007.test.ts b/test/regression/issue/24007.test.ts new file mode 100644 index 00000000000000..e57ae265f79877 --- /dev/null +++ b/test/regression/issue/24007.test.ts @@ -0,0 +1,131 @@ +/** + * Regression test for GitHub issue #24007 + * https://github.com/oven-sh/bun/issues/24007 + * + * Issue: Bun's glob/readdir functionality failed on bind-mounted paths in Docker + * because certain filesystems (sshfs, fuse, NFS, bind mounts) don't provide d_type + * information in directory entries (returns DT_UNKNOWN). + * + * Fix: Added lstatat() fallback when d_type is unknown, following the lazy stat + * pattern from PR #18172. + * + * See also: test/cli/run/glob-on-fuse.test.ts for FUSE filesystem testing. + */ +import { test, expect, describe } from "bun:test"; +import { tempDir } from "harness"; +import fs from "node:fs"; +import path from "node:path"; + +describe.concurrent("issue #24007 - glob with recursive patterns", () => { + test("recursive glob pattern **/*.ts finds nested files", () => { + using dir = tempDir("issue-24007", { + "server/api/health.get.ts": "export default () => 'ok';", + "server/api/users/list.ts": "export default () => [];", + "server/routes/index.ts": "export default {};", + "server/routes/admin/dashboard.ts": "export default {};", + "config.ts": "export default {};", + }); + + const cwd = String(dir); + + // Test recursive pattern with ** + const results = fs.globSync("**/*.ts", { cwd }); + + expect(results).toContain("config.ts"); + expect(results).toContain(path.join("server", "api", "health.get.ts")); + expect(results).toContain(path.join("server", "api", "users", "list.ts")); + expect(results).toContain(path.join("server", "routes", "index.ts")); + expect(results).toContain(path.join("server", "routes", "admin", "dashboard.ts")); + expect(results.length).toBe(5); + }); + + test("recursive glob pattern server/**/*.ts finds files in subdirectory", () => { + using dir = tempDir("issue-24007-subdir", { + "server/api/health.get.ts": "x", + "server/routes/status.ts": "x", + "other/file.ts": "x", + }); + + const cwd = String(dir); + const results = fs.globSync("server/**/*.ts", { cwd }); + + expect(results).toContain(path.join("server", "api", "health.get.ts")); + expect(results).toContain(path.join("server", "routes", "status.ts")); + expect(results).not.toContain(path.join("other", "file.ts")); + expect(results.length).toBe(2); + }); + + test("top-level glob pattern server/*.ts finds direct children", () => { + using dir = tempDir("issue-24007-toplevel", { + "server/index.ts": "x", + "server/config.ts": "x", + "server/nested/deep.ts": "x", + }); + + const cwd = String(dir); + const results = fs.globSync("server/*.ts", { cwd }); + + expect(results).toContain(path.join("server", "index.ts")); + expect(results).toContain(path.join("server", "config.ts")); + expect(results).not.toContain(path.join("server", "nested", "deep.ts")); + expect(results.length).toBe(2); + }); + + test("Bun.Glob recursive scan finds nested files", () => { + using dir = tempDir("issue-24007-bun-glob", { + "api/health.get.ts": "x", + "api/users/index.ts": "x", + "routes/home.ts": "x", + }); + + const cwd = String(dir); + const glob = new Bun.Glob("**/*.ts"); + const results = Array.from(glob.scanSync({ cwd })); + + expect(results).toContain("api/health.get.ts"); + expect(results).toContain("api/users/index.ts"); + expect(results).toContain("routes/home.ts"); + expect(results.length).toBe(3); + }); + + test("fs.readdirSync with recursive option finds all files", () => { + using dir = tempDir("issue-24007-readdir", { + "a/b/c/file.txt": "content", + "a/b/file.txt": "content", + "a/file.txt": "content", + "file.txt": "content", + }); + + const cwd = String(dir); + const results = fs.readdirSync(cwd, { recursive: true }); + + expect(results).toContain("file.txt"); + expect(results).toContain(path.join("a", "file.txt")); + expect(results).toContain(path.join("a", "b", "file.txt")); + expect(results).toContain(path.join("a", "b", "c", "file.txt")); + }); + + test("fs.readdirSync with recursive and withFileTypes returns correct types", () => { + using dir = tempDir("issue-24007-dirent", { + "dir/subdir/file.txt": "content", + "dir/another.txt": "content", + }); + + const cwd = String(dir); + const results = fs.readdirSync(cwd, { recursive: true, withFileTypes: true }); + + // Find the nested file in dir/subdir/ + const expectedParent = path.join(cwd, "dir", "subdir"); + const nestedFile = results.find( + (d) => d.name === "file.txt" && d.parentPath === expectedParent + ); + expect(nestedFile).toBeDefined(); + expect(nestedFile!.isFile()).toBe(true); + + // Find a directory entry + const dirEntry = results.find((d) => d.name === "subdir"); + expect(dirEntry).toBeDefined(); + expect(dirEntry!.isDirectory()).toBe(true); + }); +}); +