diff --git a/internal/cmd/handoff.go b/internal/cmd/handoff.go index c51cdcbc..614e4b8c 100644 --- a/internal/cmd/handoff.go +++ b/internal/cmd/handoff.go @@ -5,6 +5,7 @@ import ( "os" "os/exec" "path/filepath" + "strconv" "strings" "github.com/spf13/cobra" @@ -211,15 +212,19 @@ func runHandoff(cmd *cobra.Command, args []string) error { style.PrintWarning("could not set remain-on-exit: %v", err) } - // NOTE: For self-handoff, we do NOT call KillPaneProcesses here. - // That would kill the gt handoff process itself before it can call RespawnPane, - // leaving the pane dead with no respawn. RespawnPane's -k flag handles killing - // atomically - tmux kills the old process and spawns the new one together. - // See: https://github.com/steveyegge/gastown/issues/859 (pane is dead bug) + // Kill all processes in the pane before respawning to prevent orphan leaks. + // RespawnPane's -k flag only sends SIGHUP which Claude/Node may ignore. // - // For orphan prevention, we rely on respawn-pane -k which sends SIGHUP/SIGTERM. - // If orphans still occur, the solution is to adjust the restart command to - // kill orphans at startup, not to kill ourselves before respawning. + // IMPORTANT: For self-handoff, we must exclude our own process and parent (Claude Code) + // from being killed. Otherwise gt handoff dies before reaching RespawnPane. + excludePIDs := []string{ + strconv.Itoa(os.Getpid()), // gt handoff process + strconv.Itoa(os.Getppid()), // Claude Code (parent) + } + if err := t.KillPaneProcessesExcluding(pane, excludePIDs); err != nil { + // Non-fatal but log the warning + style.PrintWarning("could not kill pane processes: %v", err) + } // Use respawn-pane -k to atomically kill current process and start new one // Note: respawn-pane automatically resets remain-on-exit to off diff --git a/internal/tmux/tmux.go b/internal/tmux/tmux.go index eb4005c6..a8a521d3 100644 --- a/internal/tmux/tmux.go +++ b/internal/tmux/tmux.go @@ -418,6 +418,65 @@ func (t *Tmux) KillPaneProcesses(pane string) error { return nil } +// KillPaneProcessesExcluding is like KillPaneProcesses but excludes specified PIDs +// from being killed. This is essential for self-handoff scenarios where the calling +// process (e.g., gt handoff running inside Claude Code) needs to survive long enough +// to call RespawnPane. Without exclusion, the caller would be killed before completing. +// +// The excluded PIDs should include the calling process and any ancestors that must +// survive. After this function returns, RespawnPane's -k flag will send SIGHUP to +// clean up the remaining processes. +func (t *Tmux) KillPaneProcessesExcluding(pane string, excludePIDs []string) error { + // Build exclusion set for O(1) lookup + exclude := make(map[string]bool) + for _, pid := range excludePIDs { + exclude[pid] = true + } + + // Get the pane PID + pid, err := t.GetPanePID(pane) + if err != nil { + return fmt.Errorf("getting pane PID: %w", err) + } + + if pid == "" { + return fmt.Errorf("pane PID is empty") + } + + // Get all descendant PIDs recursively (returns deepest-first order) + descendants := getAllDescendants(pid) + + // Filter out excluded PIDs + var filtered []string + for _, dpid := range descendants { + if !exclude[dpid] { + filtered = append(filtered, dpid) + } + } + + // Send SIGTERM to all non-excluded descendants (deepest first to avoid orphaning) + for _, dpid := range filtered { + _ = exec.Command("kill", "-TERM", dpid).Run() + } + + // Wait for graceful shutdown + time.Sleep(100 * time.Millisecond) + + // Send SIGKILL to any remaining non-excluded descendants + for _, dpid := range filtered { + _ = exec.Command("kill", "-KILL", dpid).Run() + } + + // Kill the pane process itself only if not excluded + if !exclude[pid] { + _ = exec.Command("kill", "-TERM", pid).Run() + time.Sleep(100 * time.Millisecond) + _ = exec.Command("kill", "-KILL", pid).Run() + } + + return nil +} + // KillServer terminates the entire tmux server and all sessions. func (t *Tmux) KillServer() error { _, err := t.run("kill-server") diff --git a/internal/tmux/tmux_test.go b/internal/tmux/tmux_test.go index 053d2c90..c08f6e2e 100644 --- a/internal/tmux/tmux_test.go +++ b/internal/tmux/tmux_test.go @@ -945,3 +945,142 @@ func TestCleanupOrphanedSessions_NoSessions(t *testing.T) { // May clean some existing GT sessions if they exist, but shouldn't error t.Logf("CleanupOrphanedSessions cleaned %d sessions", cleaned) } + +func TestKillPaneProcessesExcluding(t *testing.T) { + if !hasTmux() { + t.Skip("tmux not installed") + } + + tm := NewTmux() + sessionName := "gt-test-killpaneexcl-" + t.Name() + + // Clean up any existing session + _ = tm.KillSession(sessionName) + + // Create session with a long-running process + cmd := `sleep 300` + if err := tm.NewSessionWithCommand(sessionName, "", cmd); err != nil { + t.Fatalf("NewSessionWithCommand: %v", err) + } + defer func() { _ = tm.KillSession(sessionName) }() + + // Get the pane ID + paneID, err := tm.GetPaneID(sessionName) + if err != nil { + t.Fatalf("GetPaneID: %v", err) + } + + // Kill pane processes with empty excludePIDs (should kill all processes) + if err := tm.KillPaneProcessesExcluding(paneID, nil); err != nil { + t.Fatalf("KillPaneProcessesExcluding: %v", err) + } + + // Session may still exist (pane respawns as dead), but processes should be gone + // Check that we can still get info about the session (verifies we didn't panic) + _, _ = tm.HasSession(sessionName) +} + +func TestKillPaneProcessesExcluding_WithExcludePID(t *testing.T) { + if !hasTmux() { + t.Skip("tmux not installed") + } + + tm := NewTmux() + sessionName := "gt-test-killpaneexcl2-" + t.Name() + + // Clean up any existing session + _ = tm.KillSession(sessionName) + + // Create session with a long-running process + cmd := `sleep 300` + if err := tm.NewSessionWithCommand(sessionName, "", cmd); err != nil { + t.Fatalf("NewSessionWithCommand: %v", err) + } + defer func() { _ = tm.KillSession(sessionName) }() + + // Get the pane ID and PID + paneID, err := tm.GetPaneID(sessionName) + if err != nil { + t.Fatalf("GetPaneID: %v", err) + } + + panePID, err := tm.GetPanePID(sessionName) + if err != nil { + t.Fatalf("GetPanePID: %v", err) + } + if panePID == "" { + t.Skip("could not get pane PID") + } + + // Kill pane processes with the pane PID excluded + // The function should NOT kill the excluded PID + err = tm.KillPaneProcessesExcluding(paneID, []string{panePID}) + if err != nil { + t.Fatalf("KillPaneProcessesExcluding: %v", err) + } + + // The session/pane should still exist since we excluded the main process + has, _ := tm.HasSession(sessionName) + if !has { + t.Log("Session was destroyed - this may happen if tmux auto-cleaned after descendants died") + } +} + +func TestKillPaneProcessesExcluding_NonexistentPane(t *testing.T) { + if !hasTmux() { + t.Skip("tmux not installed") + } + + tm := NewTmux() + + // Killing nonexistent pane should return an error but not panic + err := tm.KillPaneProcessesExcluding("%99999", []string{"12345"}) + if err == nil { + t.Error("expected error for nonexistent pane") + } +} + +func TestKillPaneProcessesExcluding_FiltersPIDs(t *testing.T) { + // Unit test the PID filtering logic without needing tmux + // This tests that the exclusion set is built correctly + + excludePIDs := []string{"123", "456", "789"} + exclude := make(map[string]bool) + for _, pid := range excludePIDs { + exclude[pid] = true + } + + // Test that excluded PIDs are in the set + for _, pid := range excludePIDs { + if !exclude[pid] { + t.Errorf("exclude[%q] = false, want true", pid) + } + } + + // Test that non-excluded PIDs are not in the set + nonExcluded := []string{"111", "222", "333"} + for _, pid := range nonExcluded { + if exclude[pid] { + t.Errorf("exclude[%q] = true, want false", pid) + } + } + + // Test filtering logic + allPIDs := []string{"111", "123", "222", "456", "333", "789"} + var filtered []string + for _, pid := range allPIDs { + if !exclude[pid] { + filtered = append(filtered, pid) + } + } + + expectedFiltered := []string{"111", "222", "333"} + if len(filtered) != len(expectedFiltered) { + t.Fatalf("filtered = %v, want %v", filtered, expectedFiltered) + } + for i, pid := range filtered { + if pid != expectedFiltered[i] { + t.Errorf("filtered[%d] = %q, want %q", i, pid, expectedFiltered[i]) + } + } +}