fix(shutdown): kill entire process tree to prevent orphaned Claude processes

The previous implementation used `pkill -P pid` which only kills direct
children. When Claude spawns subprocesses (like node workers), those
grandchild processes would become orphaned (PPID=1) when their parent
was killed, causing them to survive `gt shutdown -fa`.

The fix recursively finds all descendant processes and kills them in
deepest-first order, ensuring no process becomes orphaned during
shutdown.

Fixes: gt-wd3ce

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
dementus
2026-01-13 18:21:25 -08:00
committed by beads/crew/emma
parent 4ee1a4472d
commit 1043f00d06
2 changed files with 64 additions and 9 deletions

View File

@@ -139,12 +139,13 @@ func (t *Tmux) KillSession(name string) error {
//
// Process:
// 1. Get the pane's main process PID
// 2. Send SIGTERM to all child processes (pkill -TERM -P <pid>)
// 3. Wait 100ms for graceful shutdown
// 4. Send SIGKILL to any remaining children (pkill -KILL -P <pid>)
// 5. Kill the tmux session
// 2. Find all descendant processes recursively (not just direct children)
// 3. Send SIGTERM to all descendants (deepest first)
// 4. Wait 100ms for graceful shutdown
// 5. Send SIGKILL to any remaining descendants
// 6. Kill the tmux session
//
// This ensures Claude processes are properly terminated even if they ignore SIGHUP.
// This ensures Claude processes and all their children are properly terminated.
func (t *Tmux) KillSessionWithProcesses(name string) error {
// Get the pane PID
pid, err := t.GetPanePID(name)
@@ -154,20 +155,49 @@ func (t *Tmux) KillSessionWithProcesses(name string) error {
}
if pid != "" {
// Send SIGTERM to child processes
_ = exec.Command("pkill", "-TERM", "-P", pid).Run()
// Get all descendant PIDs recursively (returns deepest-first order)
descendants := getAllDescendants(pid)
// Send SIGTERM to all descendants (deepest first to avoid orphaning)
for _, dpid := range descendants {
_ = exec.Command("kill", "-TERM", dpid).Run()
}
// Wait for graceful shutdown
time.Sleep(100 * time.Millisecond)
// Send SIGKILL to any remaining children
_ = exec.Command("pkill", "-KILL", "-P", pid).Run()
// Send SIGKILL to any remaining descendants
for _, dpid := range descendants {
_ = exec.Command("kill", "-KILL", dpid).Run()
}
}
// Kill the tmux session
return t.KillSession(name)
}
// getAllDescendants recursively finds all descendant PIDs of a process.
// Returns PIDs in deepest-first order so killing them doesn't orphan grandchildren.
func getAllDescendants(pid string) []string {
var result []string
// Get direct children using pgrep
out, err := exec.Command("pgrep", "-P", pid).Output()
if err != nil {
return result
}
children := strings.Fields(strings.TrimSpace(string(out)))
for _, child := range children {
// First add grandchildren (recursively) - deepest first
result = append(result, getAllDescendants(child)...)
// Then add this child
result = append(result, child)
}
return result
}
// KillServer terminates the entire tmux server and all sessions.
func (t *Tmux) KillServer() error {
_, err := t.run("kill-server")

View File

@@ -527,3 +527,28 @@ func TestHasClaudeChild(t *testing.T) {
t.Error("hasClaudeChild should return false for nonexistent PID")
}
}
func TestGetAllDescendants(t *testing.T) {
// Test the getAllDescendants helper function
// Test with nonexistent PID - should return empty slice
got := getAllDescendants("999999999")
if len(got) != 0 {
t.Errorf("getAllDescendants(nonexistent) = %v, want empty slice", got)
}
// Test with PID 1 (init/launchd) - should find some descendants
// Note: We can't test exact PIDs, just that the function doesn't panic
// and returns reasonable results
descendants := getAllDescendants("1")
t.Logf("getAllDescendants(\"1\") found %d descendants", len(descendants))
// Verify returned PIDs are all numeric strings
for _, pid := range descendants {
for _, c := range pid {
if c < '0' || c > '9' {
t.Errorf("getAllDescendants returned non-numeric PID: %q", pid)
}
}
}
}