fix: clean up orphaned tmux sessions at gt start time

Add CleanupOrphanedSessions() function that runs at `gt start` time to
detect and kill zombie tmux sessions (sessions where tmux is alive but
the Claude process has died).

This prevents:
- Session name conflicts when restarting agents
- Resource accumulation from orphaned sessions
- Process accumulation that can overwhelm the system

The function scans for sessions with `gt-*` and `hq-*` prefixes, checks
if Claude is running using IsClaudeRunning(), and kills zombie sessions
using KillSessionWithProcesses() for proper cleanup.

Fixes #700

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
toast
2026-01-22 21:40:35 -08:00
committed by beads/crew/emma
parent 73d577e3c3
commit ee5221889f
3 changed files with 137 additions and 0 deletions

View File

@@ -174,6 +174,15 @@ func runStart(cmd *cobra.Command, args []string) error {
t := tmux.NewTmux()
// Clean up orphaned tmux sessions before starting new agents.
// This prevents session name conflicts and resource accumulation from
// zombie sessions (tmux alive but Claude dead).
if cleaned, err := t.CleanupOrphanedSessions(); err != nil {
fmt.Printf(" %s Could not clean orphaned sessions: %v\n", style.Dim.Render("○"), err)
} else if cleaned > 0 {
fmt.Printf(" %s Cleaned up %d orphaned session(s)\n", style.Bold.Render("✓"), cleaned)
}
fmt.Printf("Starting Gas Town from %s\n\n", style.Dim.Render(townRoot))
fmt.Println("Starting all agents in parallel...")
fmt.Println()

View File

@@ -1374,6 +1374,40 @@ func (t *Tmux) SetFeedBinding(session string) error {
return err
}
// CleanupOrphanedSessions scans for zombie Gas Town sessions and kills them.
// A zombie session is one where tmux is alive but the Claude process has died.
// This runs at `gt start` time to prevent session name conflicts and resource accumulation.
//
// Returns:
// - cleaned: number of zombie sessions that were killed
// - err: error if session listing failed (individual kill errors are logged but not returned)
func (t *Tmux) CleanupOrphanedSessions() (cleaned int, err error) {
sessions, err := t.ListSessions()
if err != nil {
return 0, fmt.Errorf("listing sessions: %w", err)
}
for _, sess := range sessions {
// Only process Gas Town sessions (gt-* for rigs, hq-* for town-level)
if !strings.HasPrefix(sess, "gt-") && !strings.HasPrefix(sess, "hq-") {
continue
}
// Check if the session is a zombie (tmux alive, Claude dead)
if !t.IsClaudeRunning(sess) {
// Kill the zombie session
if killErr := t.KillSessionWithProcesses(sess); killErr != nil {
// Log but continue - other sessions may still need cleanup
fmt.Printf(" warning: failed to kill orphaned session %s: %v\n", sess, killErr)
continue
}
cleaned++
}
}
return cleaned, nil
}
// SetPaneDiedHook sets a pane-died hook on a session to detect crashes.
// When the pane exits, tmux runs the hook command with exit status info.
// The agentID is used to identify the agent in crash logs (e.g., "gastown/Toast").

View File

@@ -757,3 +757,97 @@ func TestSessionSet(t *testing.T) {
t.Errorf("SessionSet.Names() doesn't contain %q", sessionName)
}
}
func TestCleanupOrphanedSessions(t *testing.T) {
if !hasTmux() {
t.Skip("tmux not installed")
}
tm := NewTmux()
// Create test sessions with gt- and hq- prefixes (zombie sessions - no Claude running)
gtSession := "gt-test-cleanup-rig"
hqSession := "hq-test-cleanup"
nonGtSession := "other-test-session"
// Clean up any existing test sessions
_ = tm.KillSession(gtSession)
_ = tm.KillSession(hqSession)
_ = tm.KillSession(nonGtSession)
// Create zombie sessions (tmux alive, but just shell - no Claude)
if err := tm.NewSession(gtSession, ""); err != nil {
t.Fatalf("NewSession(gt): %v", err)
}
defer func() { _ = tm.KillSession(gtSession) }()
if err := tm.NewSession(hqSession, ""); err != nil {
t.Fatalf("NewSession(hq): %v", err)
}
defer func() { _ = tm.KillSession(hqSession) }()
// Create a non-GT session (should NOT be cleaned up)
if err := tm.NewSession(nonGtSession, ""); err != nil {
t.Fatalf("NewSession(other): %v", err)
}
defer func() { _ = tm.KillSession(nonGtSession) }()
// Verify all sessions exist
for _, sess := range []string{gtSession, hqSession, nonGtSession} {
has, err := tm.HasSession(sess)
if err != nil {
t.Fatalf("HasSession(%q): %v", sess, err)
}
if !has {
t.Fatalf("expected session %q to exist", sess)
}
}
// Run cleanup
cleaned, err := tm.CleanupOrphanedSessions()
if err != nil {
t.Fatalf("CleanupOrphanedSessions: %v", err)
}
// Should have cleaned the gt- and hq- zombie sessions
if cleaned < 2 {
t.Errorf("CleanupOrphanedSessions cleaned %d sessions, want >= 2", cleaned)
}
// Verify GT sessions are gone
for _, sess := range []string{gtSession, hqSession} {
has, err := tm.HasSession(sess)
if err != nil {
t.Fatalf("HasSession(%q) after cleanup: %v", sess, err)
}
if has {
t.Errorf("expected session %q to be cleaned up", sess)
}
}
// Verify non-GT session still exists
has, err := tm.HasSession(nonGtSession)
if err != nil {
t.Fatalf("HasSession(%q) after cleanup: %v", nonGtSession, err)
}
if !has {
t.Error("non-GT session should NOT have been cleaned up")
}
}
func TestCleanupOrphanedSessions_NoSessions(t *testing.T) {
if !hasTmux() {
t.Skip("tmux not installed")
}
tm := NewTmux()
// Running cleanup with no orphaned GT sessions should return 0, no error
cleaned, err := tm.CleanupOrphanedSessions()
if err != nil {
t.Fatalf("CleanupOrphanedSessions: %v", err)
}
// May clean some existing GT sessions if they exist, but shouldn't error
t.Logf("CleanupOrphanedSessions cleaned %d sessions", cleaned)
}