-
Notifications
You must be signed in to change notification settings - Fork 4
fix: daemon upgrade no longer kills running processes #133
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -26,6 +26,7 @@ extern "c" fn getenv(name: [*:0]const u8) ?[*:0]const u8; | |||||||||||||||||||||||||||||||||||||||||||||||||||
| extern "c" fn execvp(file: [*:0]const u8, argv: [*]const ?[*:0]const u8) c_int; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| extern "c" fn chdir(path: [*:0]const u8) c_int; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| extern "c" fn waitpid(pid: c_int, status: ?*c_int, options: c_int) c_int; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| extern "c" fn kill(pid: c_int, sig: c_int) c_int; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| extern "c" fn tcgetpgrp(fd: c_int) posix.pid_t; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| extern "c" fn getuid() c_uint; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| extern "c" fn access(path: [*:0]const u8, mode: c_int) c_int; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
@@ -262,16 +263,20 @@ pub const Pty = struct { | |||||||||||||||||||||||||||||||||||||||||||||||||||
| pub fn childExited(self: *Pty) bool { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| if (self.exit_status != null) return true; // already reaped | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| // Raw C waitpid: returns pid if reaped, 0 if still running, -1 on error. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| // ECHILD (-1) means already reaped — treat as exited. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| var status: c_int = undefined; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| const result = waitpid(self.pid, &status, 1); // 1 = WNOHANG | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| if (result != 0) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| // Store exit status only on successful reap (result > 0). | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| // ECHILD (result < 0) means already reaped — no status available. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| if (result > 0) self.exit_status = status; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| if (result > 0) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| self.exit_status = status; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| return true; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| return false; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| if (result == 0) return false; // still running | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| // result < 0: ECHILD — we're not the parent of this process. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| // This happens after hot-upgrade: the new daemon inherits PTY fds | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| // but the shell processes were forked by the old daemon and got | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| // reparented to PID 1. Use kill(pid, 0) to check if the process | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| // is actually still alive. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| if (kill(self.pid, 0) == 0) return false; // process exists | ||||||||||||||||||||||||||||||||||||||||||||||||||||
| return true; // process is gone | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+273
to
+279
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
| // result < 0: ECHILD — we're not the parent of this process. | |
| // This happens after hot-upgrade: the new daemon inherits PTY fds | |
| // but the shell processes were forked by the old daemon and got | |
| // reparented to PID 1. Use kill(pid, 0) to check if the process | |
| // is actually still alive. | |
| if (kill(self.pid, 0) == 0) return false; // process exists | |
| return true; // process is gone | |
| // result < 0: check errno to distinguish ECHILD from other errors. | |
| const err = posix.errno(); | |
| if (err == posix.ECHILD) { | |
| // We're not the parent of this process anymore. | |
| // This happens after hot-upgrade: the new daemon inherits PTY fds | |
| // but the shell processes were forked by the old daemon and got | |
| // reparented to PID 1. Use kill(pid, 0) to check if the process | |
| // is actually still alive. | |
| if (kill(self.pid, 0) == 0) return false; // process exists | |
| return true; // process is gone | |
| } | |
| if (err == posix.EINTR) { | |
| // Interrupted by signal; caller can retry on next invocation. | |
| return false; | |
| } | |
| // For other errors, conservatively report "not exited yet" to avoid | |
| // incorrectly marking the child as gone. | |
| return false; |
Copilot
AI
Mar 12, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Test reads the grandchild PID from a pipe with a single read() and expects exactly @sizeof(i32) bytes. Pipe reads can legally return fewer bytes than requested (e.g., due to EINTR), which can make this test flaky. Consider looping until all bytes are read (or EOF) rather than failing on a short read.
| const n = posix.read(pipe_fds[0], &gc_pid_bytes) catch 0; | |
| posix.close(pipe_fds[0]); | |
| if (n != @sizeOf(i32)) return error.TestFailed; | |
| var total_read: usize = 0; | |
| while (total_read < @sizeOf(i32)) { | |
| const n = posix.read(pipe_fds[0], gc_pid_bytes[total_read..]) catch 0; | |
| if (n == 0) break; // EOF or error | |
| total_read += n; | |
| } | |
| posix.close(pipe_fds[0]); | |
| if (total_read != @sizeOf(i32)) return error.TestFailed; |
Copilot
AI
Mar 12, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Test uses raw numeric signal values (9) with the C kill() binding. Using std.posix.SIG.KILL (and std.posix.kill) would avoid magic numbers and also make it easier to handle errors consistently across platforms.
| _ = kill(gc_pid, 9); // SIGKILL | |
| posix.kill(gc_pid, posix.SIG.KILL) catch {}; |
Copilot
AI
Mar 12, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Test cleanup uses a fixed 50ms nanosleep and then expects childExited() to return true. Signal delivery + init reaping timing can vary on loaded CI, so this can be flaky. Consider polling childExited() with a bounded timeout (or using a longer/backoff wait) instead of a single fixed sleep.
| // After kill, childExited should eventually return true | |
| posix.nanosleep(0, 50_000_000); // 50ms for signal delivery | |
| // After kill, childExited should eventually return true. Poll with a | |
| // bounded timeout instead of relying on a single fixed sleep to avoid | |
| // flakiness on slow or loaded systems. | |
| var attempts: usize = 0; | |
| while (attempts < 100 and !pty.childExited()) : (attempts += 1) { | |
| // Sleep 10ms between checks, for up to ~1s total. | |
| posix.nanosleep(0, 10_000_000); | |
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The kill(pid, 0) existence check treats any non-zero return as "process is gone". On POSIX, kill(pid, 0) can fail with EPERM even when the process exists (no permission to signal it). This would cause childExited() to incorrectly report the process as exited. Prefer using std.posix.kill and treating PermissionDenied as "still alive", and only treating ProcessNotFound/ESRCH as exited.