fix(handoff): prevent race condition when killing pane processes

KillPaneProcesses was killing ALL processes in the pane, including the
gt handoff process itself. This created a race condition where the
process could be killed before RespawnPane executes, causing the pane
to close prematurely and requiring manual reattach.

Added KillPaneProcessesExcluding() function that excludes specified PIDs
from being killed. The handoff command now passes its own PID to avoid
the race condition.

Fixes: gt-85qd

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
diesel
2026-01-23 15:39:00 -08:00
committed by John Ogle
parent fa9087c5d7
commit 8b2dc39e88
2 changed files with 76 additions and 1 deletions

View File

@@ -418,6 +418,78 @@ func (t *Tmux) KillPaneProcesses(pane string) error {
return nil
}
// KillPaneProcessesExcluding is like KillPaneProcesses but excludes specified PIDs.
// This is essential for self-handoff scenarios where the calling process (e.g., gt handoff)
// is running inside the pane it's about to respawn. Without exclusion, the caller would
// be killed before completing the respawn operation, potentially leaving the pane in a
// broken state.
func (t *Tmux) KillPaneProcessesExcluding(pane string, excludePIDs []string) error {
// Build exclusion set for O(1) lookup
exclude := make(map[string]bool)
for _, pid := range excludePIDs {
exclude[pid] = true
}
// Get the pane PID
pid, err := t.GetPanePID(pane)
if err != nil {
return fmt.Errorf("getting pane PID: %w", err)
}
if pid == "" {
return fmt.Errorf("pane PID is empty")
}
// Collect PIDs to kill (excluding specified ones)
toKill := make(map[string]bool)
// First, collect process group members (catches reparented processes)
pgid := getProcessGroupID(pid)
if pgid != "" && pgid != "0" && pgid != "1" {
for _, member := range getProcessGroupMembers(pgid) {
if !exclude[member] {
toKill[member] = true
}
}
}
// Also walk the process tree for any descendants that might have called setsid()
descendants := getAllDescendants(pid)
for _, dpid := range descendants {
if !exclude[dpid] {
toKill[dpid] = true
}
}
// Convert to slice for iteration
var killList []string
for dpid := range toKill {
killList = append(killList, dpid)
}
// Send SIGTERM to all non-excluded processes
for _, dpid := range killList {
_ = exec.Command("kill", "-TERM", dpid).Run()
}
// Wait for graceful shutdown (2s gives processes time to clean up)
time.Sleep(processKillGracePeriod)
// Send SIGKILL to any remaining non-excluded processes
for _, dpid := range killList {
_ = exec.Command("kill", "-KILL", dpid).Run()
}
// Kill the pane process itself only if not excluded
if !exclude[pid] {
_ = exec.Command("kill", "-TERM", pid).Run()
time.Sleep(processKillGracePeriod)
_ = exec.Command("kill", "-KILL", pid).Run()
}
return nil
}
// KillServer terminates the entire tmux server and all sessions.
func (t *Tmux) KillServer() error {
_, err := t.run("kill-server")