From 8b2dc39e882273f9d873fe7cc76a65cd1574d170 Mon Sep 17 00:00:00 2001 From: diesel Date: Fri, 23 Jan 2026 15:39:00 -0800 Subject: [PATCH] fix(handoff): prevent race condition when killing pane processes KillPaneProcesses was killing ALL processes in the pane, including the gt handoff process itself. This created a race condition where the process could be killed before RespawnPane executes, causing the pane to close prematurely and requiring manual reattach. Added KillPaneProcessesExcluding() function that excludes specified PIDs from being killed. The handoff command now passes its own PID to avoid the race condition. Fixes: gt-85qd Co-Authored-By: Claude Opus 4.5 --- internal/cmd/handoff.go | 5 ++- internal/tmux/tmux.go | 72 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/internal/cmd/handoff.go b/internal/cmd/handoff.go index 2313c390..3d8cd839 100644 --- a/internal/cmd/handoff.go +++ b/internal/cmd/handoff.go @@ -213,7 +213,10 @@ func runHandoff(cmd *cobra.Command, args []string) error { // Kill all processes in the pane before respawning to prevent orphan leaks // RespawnPane's -k flag only sends SIGHUP which Claude/Node may ignore - if err := t.KillPaneProcesses(pane); err != nil { + // IMPORTANT: Exclude our own process to avoid race condition where we get killed + // before RespawnPane executes, causing the pane to close prematurely (gt-85qd) + myPID := fmt.Sprintf("%d", os.Getpid()) + if err := t.KillPaneProcessesExcluding(pane, []string{myPID}); err != nil { // Non-fatal but log the warning style.PrintWarning("could not kill pane processes: %v", err) } diff --git a/internal/tmux/tmux.go b/internal/tmux/tmux.go index eb4005c6..0ef2d419 100644 --- a/internal/tmux/tmux.go +++ b/internal/tmux/tmux.go @@ -418,6 +418,78 @@ func (t *Tmux) KillPaneProcesses(pane string) error { return nil } +// KillPaneProcessesExcluding is like KillPaneProcesses but excludes specified PIDs. +// This is essential for self-handoff scenarios where the calling process (e.g., gt handoff) +// is running inside the pane it's about to respawn. Without exclusion, the caller would +// be killed before completing the respawn operation, potentially leaving the pane in a +// broken state. +func (t *Tmux) KillPaneProcessesExcluding(pane string, excludePIDs []string) error { + // Build exclusion set for O(1) lookup + exclude := make(map[string]bool) + for _, pid := range excludePIDs { + exclude[pid] = true + } + + // Get the pane PID + pid, err := t.GetPanePID(pane) + if err != nil { + return fmt.Errorf("getting pane PID: %w", err) + } + + if pid == "" { + return fmt.Errorf("pane PID is empty") + } + + // Collect PIDs to kill (excluding specified ones) + toKill := make(map[string]bool) + + // First, collect process group members (catches reparented processes) + pgid := getProcessGroupID(pid) + if pgid != "" && pgid != "0" && pgid != "1" { + for _, member := range getProcessGroupMembers(pgid) { + if !exclude[member] { + toKill[member] = true + } + } + } + + // Also walk the process tree for any descendants that might have called setsid() + descendants := getAllDescendants(pid) + for _, dpid := range descendants { + if !exclude[dpid] { + toKill[dpid] = true + } + } + + // Convert to slice for iteration + var killList []string + for dpid := range toKill { + killList = append(killList, dpid) + } + + // Send SIGTERM to all non-excluded processes + for _, dpid := range killList { + _ = exec.Command("kill", "-TERM", dpid).Run() + } + + // Wait for graceful shutdown (2s gives processes time to clean up) + time.Sleep(processKillGracePeriod) + + // Send SIGKILL to any remaining non-excluded processes + for _, dpid := range killList { + _ = exec.Command("kill", "-KILL", dpid).Run() + } + + // Kill the pane process itself only if not excluded + if !exclude[pid] { + _ = exec.Command("kill", "-TERM", pid).Run() + time.Sleep(processKillGracePeriod) + _ = exec.Command("kill", "-KILL", pid).Run() + } + + return nil +} + // KillServer terminates the entire tmux server and all sessions. func (t *Tmux) KillServer() error { _, err := t.run("kill-server")