feat: Add 'ensure' semantics to witness/refinery start commands

gt witness start and gt refinery start now detect zombie sessions
(tmux alive but Claude dead) and automatically kill and recreate them.

This makes the start commands idempotent:
- If no session exists → create new session
- If session exists and healthy → do nothing (already running)
- If session exists but zombie → kill and recreate

Previously users had to manually run stop then start, or use restart.

Closes: gt-ekc5u

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
keeper
2026-01-02 18:51:41 -08:00
committed by Steve Yegge
parent dd870bb3e4
commit 354219033a
2 changed files with 22 additions and 3 deletions

View File

@@ -284,7 +284,8 @@ func witnessSessionName(rigName string) string {
}
// ensureWitnessSession creates a witness tmux session if it doesn't exist.
// Returns true if a new session was created, false if it already existed.
// Returns true if a new session was created, false if it already existed (and is healthy).
// Implements 'ensure' semantics: if session exists but Claude is dead (zombie), kills and recreates.
func ensureWitnessSession(rigName string, r *rig.Rig) (bool, error) {
t := tmux.NewTmux()
sessionName := witnessSessionName(rigName)
@@ -296,7 +297,16 @@ func ensureWitnessSession(rigName string, r *rig.Rig) (bool, error) {
}
if running {
return false, nil
// Session exists - check if Claude is actually running (healthy vs zombie)
if t.IsClaudeRunning(sessionName) {
// Healthy - Claude is running
return false, nil
}
// Zombie - tmux alive but Claude dead. Kill and recreate.
fmt.Printf("%s Detected zombie session (tmux alive, Claude dead). Recreating...\n", style.Dim.Render("⚠"))
if err := t.KillSession(sessionName); err != nil {
return false, fmt.Errorf("killing zombie session: %w", err)
}
}
// Working directory is the witness's rig clone (if it exists) or witness dir

View File

@@ -137,7 +137,16 @@ func (m *Manager) Start(foreground bool) error {
// Background mode: check if session already exists
running, _ := t.HasSession(sessionID)
if running {
return ErrAlreadyRunning
// Session exists - check if Claude is actually running (healthy vs zombie)
if t.IsClaudeRunning(sessionID) {
// Healthy - Claude is running
return ErrAlreadyRunning
}
// Zombie - tmux alive but Claude dead. Kill and recreate.
fmt.Fprintln(m.output, "⚠ Detected zombie session (tmux alive, Claude dead). Recreating...")
if err := t.KillSession(sessionID); err != nil {
return fmt.Errorf("killing zombie session: %w", err)
}
}
// Also check via PID for backwards compatibility