fix(tmux): prevent gt done from killing itself during session cleanup (#821)

When gt done runs inside a tmux session (e.g., after polecat task
completion), calling KillSessionWithProcesses would kill the gt done
process itself before it could complete cleanup operations like writing
handoff state.

Add KillSessionWithProcessesExcluding() function that accepts a list of
PIDs to exclude from the kill sequence. Update selfKillSession to pass
its own PID, ensuring gt done completes before the session is destroyed.

Also fix both Kill*WithProcesses functions to ignore "session not found"
errors from KillSession - when we kill all processes in a session, tmux
may automatically destroy it before we explicitly call KillSession.

Co-authored-by: julianknutsen <julianknutsen@users.noreply.github>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Julian Knutsen
2026-01-20 19:34:20 -08:00
committed by GitHub
parent 4dd11d4ffa
commit f82477d6a6
3 changed files with 226 additions and 3 deletions

View File

@@ -4,6 +4,7 @@ import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/spf13/cobra"
@@ -746,9 +747,12 @@ func selfKillSession(townRoot string, roleInfo RoleInfo) error {
// Kill our own tmux session with proper process cleanup
// This will terminate Claude and all child processes, completing the self-cleaning cycle.
// We use KillSessionWithProcesses to ensure no orphaned processes are left behind.
// We use KillSessionWithProcessesExcluding to ensure no orphaned processes are left behind,
// while excluding our own PID to avoid killing ourselves before cleanup completes.
// The tmux kill-session at the end will terminate us along with the session.
t := tmux.NewTmux()
if err := t.KillSessionWithProcesses(sessionName); err != nil {
myPID := strconv.Itoa(os.Getpid())
if err := t.KillSessionWithProcessesExcluding(sessionName, []string{myPID}); err != nil {
return fmt.Errorf("killing session %s: %w", sessionName, err)
}

View File

@@ -191,7 +191,75 @@ func (t *Tmux) KillSessionWithProcesses(name string) error {
}
// Kill the tmux session
return t.KillSession(name)
// Ignore "session not found" - killing the pane process may have already
// caused tmux to destroy the session automatically
err = t.KillSession(name)
if err == ErrSessionNotFound {
return nil
}
return err
}
// KillSessionWithProcessesExcluding is like KillSessionWithProcesses but excludes
// specified PIDs from being killed. This is essential for self-kill scenarios where
// the calling process (e.g., gt done) is running inside the session it's terminating.
// Without exclusion, the caller would be killed before completing the cleanup.
func (t *Tmux) KillSessionWithProcessesExcluding(name string, excludePIDs []string) error {
// Build exclusion set for O(1) lookup
exclude := make(map[string]bool)
for _, pid := range excludePIDs {
exclude[pid] = true
}
// Get the pane PID
pid, err := t.GetPanePID(name)
if err != nil {
// Session might not exist or be in bad state, try direct kill
return t.KillSession(name)
}
if pid != "" {
// Get all descendant PIDs recursively (returns deepest-first order)
descendants := getAllDescendants(pid)
// Filter out excluded PIDs
var filtered []string
for _, dpid := range descendants {
if !exclude[dpid] {
filtered = append(filtered, dpid)
}
}
// Send SIGTERM to all non-excluded descendants (deepest first to avoid orphaning)
for _, dpid := range filtered {
_ = exec.Command("kill", "-TERM", dpid).Run()
}
// Wait for graceful shutdown
time.Sleep(100 * time.Millisecond)
// Send SIGKILL to any remaining non-excluded descendants
for _, dpid := range filtered {
_ = exec.Command("kill", "-KILL", dpid).Run()
}
// Kill the pane process itself (may have called setsid() and detached)
// Only if not excluded
if !exclude[pid] {
_ = exec.Command("kill", "-TERM", pid).Run()
time.Sleep(100 * time.Millisecond)
_ = exec.Command("kill", "-KILL", pid).Run()
}
}
// Kill the tmux session - this will terminate the excluded process too
// Ignore "session not found" - if we killed all non-excluded processes,
// tmux may have already destroyed the session automatically
err = t.KillSession(name)
if err == ErrSessionNotFound {
return nil
}
return err
}
// getAllDescendants recursively finds all descendant PIDs of a process.

View File

@@ -554,6 +554,157 @@ func TestGetAllDescendants(t *testing.T) {
}
}
func TestKillSessionWithProcesses(t *testing.T) {
if !hasTmux() {
t.Skip("tmux not installed")
}
tm := NewTmux()
sessionName := "gt-test-killproc-" + t.Name()
// Clean up any existing session
_ = tm.KillSession(sessionName)
// Create session with a long-running process
cmd := `sleep 300`
if err := tm.NewSessionWithCommand(sessionName, "", cmd); err != nil {
t.Fatalf("NewSessionWithCommand: %v", err)
}
// Verify session exists
has, err := tm.HasSession(sessionName)
if err != nil {
t.Fatalf("HasSession: %v", err)
}
if !has {
t.Fatal("expected session to exist after creation")
}
// Kill with processes
if err := tm.KillSessionWithProcesses(sessionName); err != nil {
t.Fatalf("KillSessionWithProcesses: %v", err)
}
// Verify session is gone
has, err = tm.HasSession(sessionName)
if err != nil {
t.Fatalf("HasSession after kill: %v", err)
}
if has {
t.Error("expected session to not exist after KillSessionWithProcesses")
_ = tm.KillSession(sessionName) // cleanup
}
}
func TestKillSessionWithProcesses_NonexistentSession(t *testing.T) {
if !hasTmux() {
t.Skip("tmux not installed")
}
tm := NewTmux()
// Killing nonexistent session should not panic, just return error or nil
err := tm.KillSessionWithProcesses("nonexistent-session-xyz-12345")
// We don't care about the error value, just that it doesn't panic
_ = err
}
func TestKillSessionWithProcessesExcluding(t *testing.T) {
if !hasTmux() {
t.Skip("tmux not installed")
}
tm := NewTmux()
sessionName := "gt-test-killexcl-" + t.Name()
// Clean up any existing session
_ = tm.KillSession(sessionName)
// Create session with a long-running process
cmd := `sleep 300`
if err := tm.NewSessionWithCommand(sessionName, "", cmd); err != nil {
t.Fatalf("NewSessionWithCommand: %v", err)
}
// Verify session exists
has, err := tm.HasSession(sessionName)
if err != nil {
t.Fatalf("HasSession: %v", err)
}
if !has {
t.Fatal("expected session to exist after creation")
}
// Kill with empty excludePIDs (should behave like KillSessionWithProcesses)
if err := tm.KillSessionWithProcessesExcluding(sessionName, nil); err != nil {
t.Fatalf("KillSessionWithProcessesExcluding: %v", err)
}
// Verify session is gone
has, err = tm.HasSession(sessionName)
if err != nil {
t.Fatalf("HasSession after kill: %v", err)
}
if has {
t.Error("expected session to not exist after KillSessionWithProcessesExcluding")
_ = tm.KillSession(sessionName) // cleanup
}
}
func TestKillSessionWithProcessesExcluding_WithExcludePID(t *testing.T) {
if !hasTmux() {
t.Skip("tmux not installed")
}
tm := NewTmux()
sessionName := "gt-test-killexcl2-" + t.Name()
// Clean up any existing session
_ = tm.KillSession(sessionName)
// Create session with a long-running process
cmd := `sleep 300`
if err := tm.NewSessionWithCommand(sessionName, "", cmd); err != nil {
t.Fatalf("NewSessionWithCommand: %v", err)
}
defer func() { _ = tm.KillSession(sessionName) }()
// Get the pane PID
panePID, err := tm.GetPanePID(sessionName)
if err != nil {
t.Fatalf("GetPanePID: %v", err)
}
if panePID == "" {
t.Skip("could not get pane PID")
}
// Kill with the pane PID excluded - the function should still kill the session
// but should not kill the excluded PID before the session is destroyed
err = tm.KillSessionWithProcessesExcluding(sessionName, []string{panePID})
if err != nil {
t.Fatalf("KillSessionWithProcessesExcluding: %v", err)
}
// Session should be gone (the final KillSession always happens)
has, _ := tm.HasSession(sessionName)
if has {
t.Error("expected session to not exist after KillSessionWithProcessesExcluding")
}
}
func TestKillSessionWithProcessesExcluding_NonexistentSession(t *testing.T) {
if !hasTmux() {
t.Skip("tmux not installed")
}
tm := NewTmux()
// Killing nonexistent session should not panic
err := tm.KillSessionWithProcessesExcluding("nonexistent-session-xyz-12345", []string{"12345"})
// We don't care about the error value, just that it doesn't panic
_ = err
}
func TestSessionSet(t *testing.T) {
if !hasTmux() {
t.Skip("tmux not installed")