diff --git a/internal/cmd/costs.go b/internal/cmd/costs.go index 906aa90b..a8cd4e18 100644 --- a/internal/cmd/costs.go +++ b/internal/cmd/costs.go @@ -150,8 +150,8 @@ func runLiveCosts() error { // Extract cost from content cost := extractCost(content) - // Check if Claude is running - running := t.IsClaudeRunning(session) + // Check if an agent appears to be running + running := t.IsAgentRunning(session) costs = append(costs, SessionCost{ Session: session, @@ -428,7 +428,6 @@ func extractCost(content string) float64 { return cost } - func outputCostsJSON(output CostsOutput) error { enc := json.NewEncoder(os.Stdout) enc.SetIndent("", " ") diff --git a/internal/cmd/crew_at.go b/internal/cmd/crew_at.go index bea11b2f..29fa3385 100644 --- a/internal/cmd/crew_at.go +++ b/internal/cmd/crew_at.go @@ -89,9 +89,9 @@ func runCrewAt(cmd *cobra.Command, args []string) error { if !hasSession { existingSessions, err := t.FindSessionByWorkDir(worker.ClonePath, true) if err == nil && len(existingSessions) > 0 { - // Found an existing session with Claude running in this directory + // Found an existing session with an agent running in this directory existingSession := existingSessions[0] - fmt.Printf("%s Found existing Claude session '%s' in crew directory\n", + fmt.Printf("%s Found existing agent session '%s' in crew directory\n", style.Warning.Render("⚠"), existingSession) fmt.Printf(" Attaching to existing session instead of creating a new one\n") @@ -164,7 +164,11 @@ func runCrewAt(cmd *cobra.Command, args []string) error { // Session exists - check if Claude is still running // Uses both pane command check and UI marker detection to avoid // restarting when user is in a subshell spawned from Claude - if !t.IsClaudeRunning(sessionID) { + agentCfg, _, err := config.ResolveAgentConfigWithOverride(townRoot, r.Path, crewAgentOverride) + if err != nil { + return fmt.Errorf("resolving agent: %w", err) + } + if !t.IsAgentRunning(sessionID, config.ExpectedPaneCommands(agentCfg)...) { // Claude has exited, restart it using respawn-pane fmt.Printf("Claude exited, restarting...\n") diff --git a/internal/cmd/sling.go b/internal/cmd/sling.go index 07575a9f..56b6d6f1 100644 --- a/internal/cmd/sling.go +++ b/internal/cmd/sling.go @@ -447,13 +447,13 @@ func runSling(cmd *cobra.Command, args []string) error { if targetPane == "" { fmt.Printf("%s No pane to nudge (agent will discover work via gt prime)\n", style.Dim.Render("○")) } else { - // Ensure Claude is ready before nudging (prevents race condition where + // Ensure agent is ready before nudging (prevents race condition where // message arrives before Claude has fully started - see issue #115) sessionName := getSessionFromPane(targetPane) if sessionName != "" { - if err := ensureClaudeReady(sessionName); err != nil { + if err := ensureAgentReady(sessionName); err != nil { // Non-fatal: warn and continue, agent will discover work via gt prime - fmt.Printf("%s Could not verify Claude ready: %v\n", style.Dim.Render("○"), err) + fmt.Printf("%s Could not verify agent ready: %v\n", style.Dim.Render("○"), err) } } @@ -605,30 +605,32 @@ func getSessionFromPane(pane string) string { return pane } -// ensureClaudeReady waits for Claude to be ready before nudging an existing session. -// Uses the same pragmatic approach as session.Start(): poll for node process, -// accept bypass dialog if present, then wait for full initialization. -// Returns early if Claude is already running and ready. -func ensureClaudeReady(sessionName string) error { +// ensureAgentReady waits for an agent to be ready before nudging an existing session. +// Uses a pragmatic approach: wait for the pane to leave a shell, then (Claude-only) +// accept the bypass permissions warning and give it a moment to finish initializing. +func ensureAgentReady(sessionName string) error { t := tmux.NewTmux() - // If Claude is already running, assume it's ready (session was started earlier) - if t.IsClaudeRunning(sessionName) { + // If an agent is already running, assume it's ready (session was started earlier) + if t.IsAgentRunning(sessionName) { return nil } - // Claude not running yet - wait for it to start (shell → node transition) + // Agent not running yet - wait for it to start (shell → program transition) if err := t.WaitForCommand(sessionName, constants.SupportedShells, constants.ClaudeStartTimeout); err != nil { - return fmt.Errorf("waiting for Claude to start: %w", err) + return fmt.Errorf("waiting for agent to start: %w", err) } - // Accept bypass permissions warning if present - _ = t.AcceptBypassPermissionsWarning(sessionName) + // Claude-only: accept bypass permissions warning if present + if t.IsClaudeRunning(sessionName) { + _ = t.AcceptBypassPermissionsWarning(sessionName) - // Wait for Claude to be fully ready at the prompt - // PRAGMATIC APPROACH: Use fixed delay rather than detection. - // Claude startup takes ~5-8 seconds on typical machines. - time.Sleep(8 * time.Second) + // PRAGMATIC APPROACH: fixed delay rather than prompt detection. + // Claude startup takes ~5-8 seconds on typical machines. + time.Sleep(8 * time.Second) + } else { + time.Sleep(1 * time.Second) + } return nil } diff --git a/internal/cmd/start.go b/internal/cmd/start.go index 59da6ef3..0a431e5d 100644 --- a/internal/cmd/start.go +++ b/internal/cmd/start.go @@ -277,7 +277,8 @@ func startConfiguredCrew(t *tmux.Tmux, townRoot string) { sessionID := crewSessionName(r.Name, crewName) if running, _ := t.HasSession(sessionID); running { // Session exists - check if Claude is still running - if !t.IsClaudeRunning(sessionID) { + agentCfg := config.ResolveAgentConfig(townRoot, r.Path) + if !t.IsAgentRunning(sessionID, config.ExpectedPaneCommands(agentCfg)...) { // Claude has exited, restart it fmt.Printf(" %s %s/%s session exists, restarting Claude...\n", style.Dim.Render("○"), r.Name, crewName) claudeCmd := config.BuildCrewStartupCommand(r.Name, crewName, r.Path, "gt prime") @@ -800,7 +801,11 @@ func runStartCrew(cmd *cobra.Command, args []string) error { if hasSession { // Session exists - check if Claude is still running - if !t.IsClaudeRunning(sessionID) { + agentCfg, _, err := config.ResolveAgentConfigWithOverride(townRoot, r.Path, startCrewAgentOverride) + if err != nil { + return fmt.Errorf("resolving agent: %w", err) + } + if !t.IsAgentRunning(sessionID, config.ExpectedPaneCommands(agentCfg)...) { // Claude has exited, restart it with "gt prime" as initial prompt fmt.Printf("Session exists, restarting Claude...\n") startupCmd, err := config.BuildCrewStartupCommandWithAgentOverride(rigName, name, r.Path, "gt prime", startCrewAgentOverride) diff --git a/internal/config/loader.go b/internal/config/loader.go index 62b8de68..16d2054d 100644 --- a/internal/config/loader.go +++ b/internal/config/loader.go @@ -1204,6 +1204,18 @@ func BuildCrewStartupCommandWithAgentOverride(rigName, crewName, rigPath, prompt return BuildStartupCommandWithAgentOverride(envVars, rigPath, prompt, agentOverride) } +// ExpectedPaneCommands returns tmux pane command names that indicate the runtime is running. +// For example, Claude runs as "node", while most other runtimes report their executable name. +func ExpectedPaneCommands(rc *RuntimeConfig) []string { + if rc == nil || rc.Command == "" { + return nil + } + if filepath.Base(rc.Command) == "claude" { + return []string{"node"} + } + return []string{filepath.Base(rc.Command)} +} + // GetRigPrefix returns the beads prefix for a rig from rigs.json. // Falls back to "gt" if the rig isn't found or has no prefix configured. // townRoot is the path to the town directory (e.g., ~/gt). diff --git a/internal/config/loader_test.go b/internal/config/loader_test.go index 1bc30cf2..df5b9d77 100644 --- a/internal/config/loader_test.go +++ b/internal/config/loader_test.go @@ -1165,6 +1165,22 @@ func TestGetRuntimeCommand_UsesRigAgentWhenRigPathProvided(t *testing.T) { } } +func TestExpectedPaneCommands(t *testing.T) { + t.Run("claude maps to node", func(t *testing.T) { + got := ExpectedPaneCommands(&RuntimeConfig{Command: "claude"}) + if len(got) != 1 || got[0] != "node" { + t.Fatalf("ExpectedPaneCommands(claude) = %v, want %v", got, []string{"node"}) + } + }) + + t.Run("codex maps to executable", func(t *testing.T) { + got := ExpectedPaneCommands(&RuntimeConfig{Command: "codex"}) + if len(got) != 1 || got[0] != "codex" { + t.Fatalf("ExpectedPaneCommands(codex) = %v, want %v", got, []string{"codex"}) + } + }) +} + func TestLoadRuntimeConfigFromSettings(t *testing.T) { // Create temp rig with custom runtime config dir := t.TempDir() diff --git a/internal/refinery/manager.go b/internal/refinery/manager.go index 2b8cd48d..f4faa8c6 100644 --- a/internal/refinery/manager.go +++ b/internal/refinery/manager.go @@ -138,7 +138,9 @@ func (m *Manager) Start(foreground bool) error { running, _ := t.HasSession(sessionID) if running { // Session exists - check if Claude is actually running (healthy vs zombie) - if t.IsClaudeRunning(sessionID) { + townRoot := filepath.Dir(m.rig.Path) + agentCfg := config.ResolveAgentConfig(townRoot, m.rig.Path) + if t.IsAgentRunning(sessionID, config.ExpectedPaneCommands(agentCfg)...) { // Healthy - Claude is running return ErrAlreadyRunning } diff --git a/internal/tmux/tmux.go b/internal/tmux/tmux.go index 844af816..51c36d89 100644 --- a/internal/tmux/tmux.go +++ b/internal/tmux/tmux.go @@ -15,8 +15,8 @@ import ( // Common errors var ( - ErrNoServer = errors.New("no tmux server running") - ErrSessionExists = errors.New("session already exists") + ErrNoServer = errors.New("no tmux server running") + ErrSessionExists = errors.New("session already exists") ErrSessionNotFound = errors.New("session not found") ) @@ -94,7 +94,7 @@ func (t *Tmux) EnsureSessionFresh(name, workDir string) error { if exists { // Session exists - check if it's a zombie - if !t.IsClaudeRunning(name) { + if !t.IsAgentRunning(name) { // Zombie session: tmux alive but Claude dead // Kill it so we can create a fresh one if err := t.KillSession(name); err != nil { @@ -390,8 +390,8 @@ func (t *Tmux) GetPaneWorkDir(session string) (string, error) { // FindSessionByWorkDir finds tmux sessions where the pane's current working directory // matches or is under the target directory. Returns session names that match. -// If checkClaude is true, only returns sessions that have Claude (node) running. -func (t *Tmux) FindSessionByWorkDir(targetDir string, checkClaude bool) ([]string, error) { +// If requireAgentRunning is true, only returns sessions that have some non-shell command running. +func (t *Tmux) FindSessionByWorkDir(targetDir string, requireAgentRunning bool) ([]string, error) { sessions, err := t.ListSessions() if err != nil { return nil, err @@ -410,9 +410,9 @@ func (t *Tmux) FindSessionByWorkDir(targetDir string, checkClaude bool) ([]strin // Check if workdir matches target (exact match or subdir) if workDir == targetDir || strings.HasPrefix(workDir, targetDir+"/") { - if checkClaude { - // Only include if Claude is running - if t.IsClaudeRunning(session) { + if requireAgentRunning { + // Only include if an agent appears to be running + if t.IsAgentRunning(session) { matches = append(matches, session) } } else { @@ -526,15 +526,39 @@ Run: gt mail inbox return t.SendKeys(session, banner) } -// IsClaudeRunning checks if Claude appears to be running in the session. -// Only trusts the pane command - UI markers in scrollback cause false positives. -func (t *Tmux) IsClaudeRunning(session string) bool { - // Check pane command - Claude runs as node +// IsAgentRunning checks if an agent appears to be running in the session. +// +// If expectedPaneCommands is non-empty, the pane's current command must match one of them. +// If expectedPaneCommands is empty, any non-shell command counts as "agent running". +func (t *Tmux) IsAgentRunning(session string, expectedPaneCommands ...string) bool { cmd, err := t.GetPaneCommand(session) if err != nil { return false } - return cmd == "node" + + if len(expectedPaneCommands) > 0 { + for _, expected := range expectedPaneCommands { + if expected != "" && cmd == expected { + return true + } + } + return false + } + + // Fallback: any non-shell command counts as running. + for _, shell := range constants.SupportedShells { + if cmd == shell { + return false + } + } + return cmd != "" +} + +// IsClaudeRunning checks if Claude appears to be running in the session. +// Only trusts the pane command - UI markers in scrollback cause false positives. +func (t *Tmux) IsClaudeRunning(session string) bool { + // Claude runs as node + return t.IsAgentRunning(session, "node") } // WaitForCommand polls until the pane is NOT running one of the excluded commands. @@ -595,14 +619,16 @@ func (t *Tmux) WaitForShellReady(session string, timeout time.Duration) error { // ZFC (Zero False Commands) principle: AI should observe AI, not regex. // // Bootstrap (acceptable): -// During cold startup when no AI agent is running, the daemon uses this -// function to get the Deacon online. Regex is acceptable here. +// +// During cold startup when no AI agent is running, the daemon uses this +// function to get the Deacon online. Regex is acceptable here. // // Steady-State (use AI observation instead): -// Once any AI agent is running, observation should be AI-to-AI: -// - Deacon starting polecats → use 'gt deacon pending' + AI analysis -// - Deacon restarting → Mayor watches via 'gt peek' -// - Mayor restarting → Deacon watches via 'gt peek' +// +// Once any AI agent is running, observation should be AI-to-AI: +// - Deacon starting polecats → use 'gt deacon pending' + AI analysis +// - Deacon restarting → Mayor watches via 'gt peek' +// - Mayor restarting → Deacon watches via 'gt peek' // // See: gt deacon pending (ZFC-compliant AI observation) // See: gt deacon trigger-pending (bootstrap mode, regex-based) diff --git a/internal/tmux/tmux_test.go b/internal/tmux/tmux_test.go index 8aeee7f5..270ca767 100644 --- a/internal/tmux/tmux_test.go +++ b/internal/tmux/tmux_test.go @@ -260,6 +260,11 @@ func TestEnsureSessionFresh_ZombieSession(t *testing.T) { t.Skip("session unexpectedly has Claude running - can't test zombie case") } + // Verify generic agent check also treats it as not running (shell session) + if tm.IsAgentRunning(sessionName) { + t.Fatalf("expected IsAgentRunning(%q) to be false for a fresh shell session", sessionName) + } + // EnsureSessionFresh should kill the zombie and create fresh session // This should NOT error with "session already exists" if err := tm.EnsureSessionFresh(sessionName, ""); err != nil {