From f82477d6a6d37bfcf92e4c077c3d7445e9014b26 Mon Sep 17 00:00:00 2001 From: Julian Knutsen Date: Tue, 20 Jan 2026 19:34:20 -0800 Subject: [PATCH] fix(tmux): prevent gt done from killing itself during session cleanup (#821) When gt done runs inside a tmux session (e.g., after polecat task completion), calling KillSessionWithProcesses would kill the gt done process itself before it could complete cleanup operations like writing handoff state. Add KillSessionWithProcessesExcluding() function that accepts a list of PIDs to exclude from the kill sequence. Update selfKillSession to pass its own PID, ensuring gt done completes before the session is destroyed. Also fix both Kill*WithProcesses functions to ignore "session not found" errors from KillSession - when we kill all processes in a session, tmux may automatically destroy it before we explicitly call KillSession. Co-authored-by: julianknutsen Co-authored-by: Claude Opus 4.5 --- internal/cmd/done.go | 8 +- internal/tmux/tmux.go | 70 ++++++++++++++++- internal/tmux/tmux_test.go | 151 +++++++++++++++++++++++++++++++++++++ 3 files changed, 226 insertions(+), 3 deletions(-) diff --git a/internal/cmd/done.go b/internal/cmd/done.go index 13ab1ce3..a385ac07 100644 --- a/internal/cmd/done.go +++ b/internal/cmd/done.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "path/filepath" + "strconv" "strings" "github.com/spf13/cobra" @@ -746,9 +747,12 @@ func selfKillSession(townRoot string, roleInfo RoleInfo) error { // Kill our own tmux session with proper process cleanup // This will terminate Claude and all child processes, completing the self-cleaning cycle. - // We use KillSessionWithProcesses to ensure no orphaned processes are left behind. + // We use KillSessionWithProcessesExcluding to ensure no orphaned processes are left behind, + // while excluding our own PID to avoid killing ourselves before cleanup completes. + // The tmux kill-session at the end will terminate us along with the session. t := tmux.NewTmux() - if err := t.KillSessionWithProcesses(sessionName); err != nil { + myPID := strconv.Itoa(os.Getpid()) + if err := t.KillSessionWithProcessesExcluding(sessionName, []string{myPID}); err != nil { return fmt.Errorf("killing session %s: %w", sessionName, err) } diff --git a/internal/tmux/tmux.go b/internal/tmux/tmux.go index aee4533f..8e5f6a49 100644 --- a/internal/tmux/tmux.go +++ b/internal/tmux/tmux.go @@ -191,7 +191,75 @@ func (t *Tmux) KillSessionWithProcesses(name string) error { } // Kill the tmux session - return t.KillSession(name) + // Ignore "session not found" - killing the pane process may have already + // caused tmux to destroy the session automatically + err = t.KillSession(name) + if err == ErrSessionNotFound { + return nil + } + return err +} + +// KillSessionWithProcessesExcluding is like KillSessionWithProcesses but excludes +// specified PIDs from being killed. This is essential for self-kill scenarios where +// the calling process (e.g., gt done) is running inside the session it's terminating. +// Without exclusion, the caller would be killed before completing the cleanup. +func (t *Tmux) KillSessionWithProcessesExcluding(name string, excludePIDs []string) error { + // Build exclusion set for O(1) lookup + exclude := make(map[string]bool) + for _, pid := range excludePIDs { + exclude[pid] = true + } + + // Get the pane PID + pid, err := t.GetPanePID(name) + if err != nil { + // Session might not exist or be in bad state, try direct kill + return t.KillSession(name) + } + + if pid != "" { + // Get all descendant PIDs recursively (returns deepest-first order) + descendants := getAllDescendants(pid) + + // Filter out excluded PIDs + var filtered []string + for _, dpid := range descendants { + if !exclude[dpid] { + filtered = append(filtered, dpid) + } + } + + // Send SIGTERM to all non-excluded descendants (deepest first to avoid orphaning) + for _, dpid := range filtered { + _ = exec.Command("kill", "-TERM", dpid).Run() + } + + // Wait for graceful shutdown + time.Sleep(100 * time.Millisecond) + + // Send SIGKILL to any remaining non-excluded descendants + for _, dpid := range filtered { + _ = exec.Command("kill", "-KILL", dpid).Run() + } + + // Kill the pane process itself (may have called setsid() and detached) + // Only if not excluded + if !exclude[pid] { + _ = exec.Command("kill", "-TERM", pid).Run() + time.Sleep(100 * time.Millisecond) + _ = exec.Command("kill", "-KILL", pid).Run() + } + } + + // Kill the tmux session - this will terminate the excluded process too + // Ignore "session not found" - if we killed all non-excluded processes, + // tmux may have already destroyed the session automatically + err = t.KillSession(name) + if err == ErrSessionNotFound { + return nil + } + return err } // getAllDescendants recursively finds all descendant PIDs of a process. diff --git a/internal/tmux/tmux_test.go b/internal/tmux/tmux_test.go index 615b7b42..6b0262fe 100644 --- a/internal/tmux/tmux_test.go +++ b/internal/tmux/tmux_test.go @@ -554,6 +554,157 @@ func TestGetAllDescendants(t *testing.T) { } } +func TestKillSessionWithProcesses(t *testing.T) { + if !hasTmux() { + t.Skip("tmux not installed") + } + + tm := NewTmux() + sessionName := "gt-test-killproc-" + t.Name() + + // Clean up any existing session + _ = tm.KillSession(sessionName) + + // Create session with a long-running process + cmd := `sleep 300` + if err := tm.NewSessionWithCommand(sessionName, "", cmd); err != nil { + t.Fatalf("NewSessionWithCommand: %v", err) + } + + // Verify session exists + has, err := tm.HasSession(sessionName) + if err != nil { + t.Fatalf("HasSession: %v", err) + } + if !has { + t.Fatal("expected session to exist after creation") + } + + // Kill with processes + if err := tm.KillSessionWithProcesses(sessionName); err != nil { + t.Fatalf("KillSessionWithProcesses: %v", err) + } + + // Verify session is gone + has, err = tm.HasSession(sessionName) + if err != nil { + t.Fatalf("HasSession after kill: %v", err) + } + if has { + t.Error("expected session to not exist after KillSessionWithProcesses") + _ = tm.KillSession(sessionName) // cleanup + } +} + +func TestKillSessionWithProcesses_NonexistentSession(t *testing.T) { + if !hasTmux() { + t.Skip("tmux not installed") + } + + tm := NewTmux() + + // Killing nonexistent session should not panic, just return error or nil + err := tm.KillSessionWithProcesses("nonexistent-session-xyz-12345") + // We don't care about the error value, just that it doesn't panic + _ = err +} + +func TestKillSessionWithProcessesExcluding(t *testing.T) { + if !hasTmux() { + t.Skip("tmux not installed") + } + + tm := NewTmux() + sessionName := "gt-test-killexcl-" + t.Name() + + // Clean up any existing session + _ = tm.KillSession(sessionName) + + // Create session with a long-running process + cmd := `sleep 300` + if err := tm.NewSessionWithCommand(sessionName, "", cmd); err != nil { + t.Fatalf("NewSessionWithCommand: %v", err) + } + + // Verify session exists + has, err := tm.HasSession(sessionName) + if err != nil { + t.Fatalf("HasSession: %v", err) + } + if !has { + t.Fatal("expected session to exist after creation") + } + + // Kill with empty excludePIDs (should behave like KillSessionWithProcesses) + if err := tm.KillSessionWithProcessesExcluding(sessionName, nil); err != nil { + t.Fatalf("KillSessionWithProcessesExcluding: %v", err) + } + + // Verify session is gone + has, err = tm.HasSession(sessionName) + if err != nil { + t.Fatalf("HasSession after kill: %v", err) + } + if has { + t.Error("expected session to not exist after KillSessionWithProcessesExcluding") + _ = tm.KillSession(sessionName) // cleanup + } +} + +func TestKillSessionWithProcessesExcluding_WithExcludePID(t *testing.T) { + if !hasTmux() { + t.Skip("tmux not installed") + } + + tm := NewTmux() + sessionName := "gt-test-killexcl2-" + t.Name() + + // Clean up any existing session + _ = tm.KillSession(sessionName) + + // Create session with a long-running process + cmd := `sleep 300` + if err := tm.NewSessionWithCommand(sessionName, "", cmd); err != nil { + t.Fatalf("NewSessionWithCommand: %v", err) + } + defer func() { _ = tm.KillSession(sessionName) }() + + // Get the pane PID + panePID, err := tm.GetPanePID(sessionName) + if err != nil { + t.Fatalf("GetPanePID: %v", err) + } + if panePID == "" { + t.Skip("could not get pane PID") + } + + // Kill with the pane PID excluded - the function should still kill the session + // but should not kill the excluded PID before the session is destroyed + err = tm.KillSessionWithProcessesExcluding(sessionName, []string{panePID}) + if err != nil { + t.Fatalf("KillSessionWithProcessesExcluding: %v", err) + } + + // Session should be gone (the final KillSession always happens) + has, _ := tm.HasSession(sessionName) + if has { + t.Error("expected session to not exist after KillSessionWithProcessesExcluding") + } +} + +func TestKillSessionWithProcessesExcluding_NonexistentSession(t *testing.T) { + if !hasTmux() { + t.Skip("tmux not installed") + } + + tm := NewTmux() + + // Killing nonexistent session should not panic + err := tm.KillSessionWithProcessesExcluding("nonexistent-session-xyz-12345", []string{"12345"}) + // We don't care about the error value, just that it doesn't panic + _ = err +} + func TestSessionSet(t *testing.T) { if !hasTmux() { t.Skip("tmux not installed")