feat: runtime-aware tmux agent checks

This commit is contained in:
jv
2026-01-07 12:56:00 +13:00
committed by Steve Yegge
parent 02ca9e43fa
commit 22693c1dcc
9 changed files with 117 additions and 46 deletions
+2 -3
View File
@@ -150,8 +150,8 @@ func runLiveCosts() error {
// Extract cost from content
cost := extractCost(content)
// Check if Claude is running
running := t.IsClaudeRunning(session)
// Check if an agent appears to be running
running := t.IsAgentRunning(session)
costs = append(costs, SessionCost{
Session: session,
@@ -428,7 +428,6 @@ func extractCost(content string) float64 {
return cost
}
func outputCostsJSON(output CostsOutput) error {
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
+7 -3
View File
@@ -89,9 +89,9 @@ func runCrewAt(cmd *cobra.Command, args []string) error {
if !hasSession {
existingSessions, err := t.FindSessionByWorkDir(worker.ClonePath, true)
if err == nil && len(existingSessions) > 0 {
// Found an existing session with Claude running in this directory
// Found an existing session with an agent running in this directory
existingSession := existingSessions[0]
fmt.Printf("%s Found existing Claude session '%s' in crew directory\n",
fmt.Printf("%s Found existing agent session '%s' in crew directory\n",
style.Warning.Render("⚠"),
existingSession)
fmt.Printf(" Attaching to existing session instead of creating a new one\n")
@@ -164,7 +164,11 @@ func runCrewAt(cmd *cobra.Command, args []string) error {
// Session exists - check if Claude is still running
// Uses both pane command check and UI marker detection to avoid
// restarting when user is in a subshell spawned from Claude
if !t.IsClaudeRunning(sessionID) {
agentCfg, _, err := config.ResolveAgentConfigWithOverride(townRoot, r.Path, crewAgentOverride)
if err != nil {
return fmt.Errorf("resolving agent: %w", err)
}
if !t.IsAgentRunning(sessionID, config.ExpectedPaneCommands(agentCfg)...) {
// Claude has exited, restart it using respawn-pane
fmt.Printf("Claude exited, restarting...\n")
+20 -18
View File
@@ -447,13 +447,13 @@ func runSling(cmd *cobra.Command, args []string) error {
if targetPane == "" {
fmt.Printf("%s No pane to nudge (agent will discover work via gt prime)\n", style.Dim.Render("○"))
} else {
// Ensure Claude is ready before nudging (prevents race condition where
// Ensure agent is ready before nudging (prevents race condition where
// message arrives before Claude has fully started - see issue #115)
sessionName := getSessionFromPane(targetPane)
if sessionName != "" {
if err := ensureClaudeReady(sessionName); err != nil {
if err := ensureAgentReady(sessionName); err != nil {
// Non-fatal: warn and continue, agent will discover work via gt prime
fmt.Printf("%s Could not verify Claude ready: %v\n", style.Dim.Render("○"), err)
fmt.Printf("%s Could not verify agent ready: %v\n", style.Dim.Render("○"), err)
}
}
@@ -605,30 +605,32 @@ func getSessionFromPane(pane string) string {
return pane
}
// ensureClaudeReady waits for Claude to be ready before nudging an existing session.
// Uses the same pragmatic approach as session.Start(): poll for node process,
// accept bypass dialog if present, then wait for full initialization.
// Returns early if Claude is already running and ready.
func ensureClaudeReady(sessionName string) error {
// ensureAgentReady waits for an agent to be ready before nudging an existing session.
// Uses a pragmatic approach: wait for the pane to leave a shell, then (Claude-only)
// accept the bypass permissions warning and give it a moment to finish initializing.
func ensureAgentReady(sessionName string) error {
t := tmux.NewTmux()
// If Claude is already running, assume it's ready (session was started earlier)
if t.IsClaudeRunning(sessionName) {
// If an agent is already running, assume it's ready (session was started earlier)
if t.IsAgentRunning(sessionName) {
return nil
}
// Claude not running yet - wait for it to start (shell → node transition)
// Agent not running yet - wait for it to start (shell → program transition)
if err := t.WaitForCommand(sessionName, constants.SupportedShells, constants.ClaudeStartTimeout); err != nil {
return fmt.Errorf("waiting for Claude to start: %w", err)
return fmt.Errorf("waiting for agent to start: %w", err)
}
// Accept bypass permissions warning if present
_ = t.AcceptBypassPermissionsWarning(sessionName)
// Claude-only: accept bypass permissions warning if present
if t.IsClaudeRunning(sessionName) {
_ = t.AcceptBypassPermissionsWarning(sessionName)
// Wait for Claude to be fully ready at the prompt
// PRAGMATIC APPROACH: Use fixed delay rather than detection.
// Claude startup takes ~5-8 seconds on typical machines.
time.Sleep(8 * time.Second)
// PRAGMATIC APPROACH: fixed delay rather than prompt detection.
// Claude startup takes ~5-8 seconds on typical machines.
time.Sleep(8 * time.Second)
} else {
time.Sleep(1 * time.Second)
}
return nil
}
+7 -2
View File
@@ -277,7 +277,8 @@ func startConfiguredCrew(t *tmux.Tmux, townRoot string) {
sessionID := crewSessionName(r.Name, crewName)
if running, _ := t.HasSession(sessionID); running {
// Session exists - check if Claude is still running
if !t.IsClaudeRunning(sessionID) {
agentCfg := config.ResolveAgentConfig(townRoot, r.Path)
if !t.IsAgentRunning(sessionID, config.ExpectedPaneCommands(agentCfg)...) {
// Claude has exited, restart it
fmt.Printf(" %s %s/%s session exists, restarting Claude...\n", style.Dim.Render("○"), r.Name, crewName)
claudeCmd := config.BuildCrewStartupCommand(r.Name, crewName, r.Path, "gt prime")
@@ -800,7 +801,11 @@ func runStartCrew(cmd *cobra.Command, args []string) error {
if hasSession {
// Session exists - check if Claude is still running
if !t.IsClaudeRunning(sessionID) {
agentCfg, _, err := config.ResolveAgentConfigWithOverride(townRoot, r.Path, startCrewAgentOverride)
if err != nil {
return fmt.Errorf("resolving agent: %w", err)
}
if !t.IsAgentRunning(sessionID, config.ExpectedPaneCommands(agentCfg)...) {
// Claude has exited, restart it with "gt prime" as initial prompt
fmt.Printf("Session exists, restarting Claude...\n")
startupCmd, err := config.BuildCrewStartupCommandWithAgentOverride(rigName, name, r.Path, "gt prime", startCrewAgentOverride)
+12
View File
@@ -1204,6 +1204,18 @@ func BuildCrewStartupCommandWithAgentOverride(rigName, crewName, rigPath, prompt
return BuildStartupCommandWithAgentOverride(envVars, rigPath, prompt, agentOverride)
}
// ExpectedPaneCommands returns tmux pane command names that indicate the runtime is running.
// For example, Claude runs as "node", while most other runtimes report their executable name.
func ExpectedPaneCommands(rc *RuntimeConfig) []string {
if rc == nil || rc.Command == "" {
return nil
}
if filepath.Base(rc.Command) == "claude" {
return []string{"node"}
}
return []string{filepath.Base(rc.Command)}
}
// GetRigPrefix returns the beads prefix for a rig from rigs.json.
// Falls back to "gt" if the rig isn't found or has no prefix configured.
// townRoot is the path to the town directory (e.g., ~/gt).
+16
View File
@@ -1165,6 +1165,22 @@ func TestGetRuntimeCommand_UsesRigAgentWhenRigPathProvided(t *testing.T) {
}
}
func TestExpectedPaneCommands(t *testing.T) {
t.Run("claude maps to node", func(t *testing.T) {
got := ExpectedPaneCommands(&RuntimeConfig{Command: "claude"})
if len(got) != 1 || got[0] != "node" {
t.Fatalf("ExpectedPaneCommands(claude) = %v, want %v", got, []string{"node"})
}
})
t.Run("codex maps to executable", func(t *testing.T) {
got := ExpectedPaneCommands(&RuntimeConfig{Command: "codex"})
if len(got) != 1 || got[0] != "codex" {
t.Fatalf("ExpectedPaneCommands(codex) = %v, want %v", got, []string{"codex"})
}
})
}
func TestLoadRuntimeConfigFromSettings(t *testing.T) {
// Create temp rig with custom runtime config
dir := t.TempDir()
+3 -1
View File
@@ -138,7 +138,9 @@ func (m *Manager) Start(foreground bool) error {
running, _ := t.HasSession(sessionID)
if running {
// Session exists - check if Claude is actually running (healthy vs zombie)
if t.IsClaudeRunning(sessionID) {
townRoot := filepath.Dir(m.rig.Path)
agentCfg := config.ResolveAgentConfig(townRoot, m.rig.Path)
if t.IsAgentRunning(sessionID, config.ExpectedPaneCommands(agentCfg)...) {
// Healthy - Claude is running
return ErrAlreadyRunning
}
+45 -19
View File
@@ -15,8 +15,8 @@ import (
// Common errors
var (
ErrNoServer = errors.New("no tmux server running")
ErrSessionExists = errors.New("session already exists")
ErrNoServer = errors.New("no tmux server running")
ErrSessionExists = errors.New("session already exists")
ErrSessionNotFound = errors.New("session not found")
)
@@ -94,7 +94,7 @@ func (t *Tmux) EnsureSessionFresh(name, workDir string) error {
if exists {
// Session exists - check if it's a zombie
if !t.IsClaudeRunning(name) {
if !t.IsAgentRunning(name) {
// Zombie session: tmux alive but Claude dead
// Kill it so we can create a fresh one
if err := t.KillSession(name); err != nil {
@@ -390,8 +390,8 @@ func (t *Tmux) GetPaneWorkDir(session string) (string, error) {
// FindSessionByWorkDir finds tmux sessions where the pane's current working directory
// matches or is under the target directory. Returns session names that match.
// If checkClaude is true, only returns sessions that have Claude (node) running.
func (t *Tmux) FindSessionByWorkDir(targetDir string, checkClaude bool) ([]string, error) {
// If requireAgentRunning is true, only returns sessions that have some non-shell command running.
func (t *Tmux) FindSessionByWorkDir(targetDir string, requireAgentRunning bool) ([]string, error) {
sessions, err := t.ListSessions()
if err != nil {
return nil, err
@@ -410,9 +410,9 @@ func (t *Tmux) FindSessionByWorkDir(targetDir string, checkClaude bool) ([]strin
// Check if workdir matches target (exact match or subdir)
if workDir == targetDir || strings.HasPrefix(workDir, targetDir+"/") {
if checkClaude {
// Only include if Claude is running
if t.IsClaudeRunning(session) {
if requireAgentRunning {
// Only include if an agent appears to be running
if t.IsAgentRunning(session) {
matches = append(matches, session)
}
} else {
@@ -526,15 +526,39 @@ Run: gt mail inbox
return t.SendKeys(session, banner)
}
// IsClaudeRunning checks if Claude appears to be running in the session.
// Only trusts the pane command - UI markers in scrollback cause false positives.
func (t *Tmux) IsClaudeRunning(session string) bool {
// Check pane command - Claude runs as node
// IsAgentRunning checks if an agent appears to be running in the session.
//
// If expectedPaneCommands is non-empty, the pane's current command must match one of them.
// If expectedPaneCommands is empty, any non-shell command counts as "agent running".
func (t *Tmux) IsAgentRunning(session string, expectedPaneCommands ...string) bool {
cmd, err := t.GetPaneCommand(session)
if err != nil {
return false
}
return cmd == "node"
if len(expectedPaneCommands) > 0 {
for _, expected := range expectedPaneCommands {
if expected != "" && cmd == expected {
return true
}
}
return false
}
// Fallback: any non-shell command counts as running.
for _, shell := range constants.SupportedShells {
if cmd == shell {
return false
}
}
return cmd != ""
}
// IsClaudeRunning checks if Claude appears to be running in the session.
// Only trusts the pane command - UI markers in scrollback cause false positives.
func (t *Tmux) IsClaudeRunning(session string) bool {
// Claude runs as node
return t.IsAgentRunning(session, "node")
}
// WaitForCommand polls until the pane is NOT running one of the excluded commands.
@@ -595,14 +619,16 @@ func (t *Tmux) WaitForShellReady(session string, timeout time.Duration) error {
// ZFC (Zero False Commands) principle: AI should observe AI, not regex.
//
// Bootstrap (acceptable):
// During cold startup when no AI agent is running, the daemon uses this
// function to get the Deacon online. Regex is acceptable here.
//
// During cold startup when no AI agent is running, the daemon uses this
// function to get the Deacon online. Regex is acceptable here.
//
// Steady-State (use AI observation instead):
// Once any AI agent is running, observation should be AI-to-AI:
// - Deacon starting polecats → use 'gt deacon pending' + AI analysis
// - Deacon restarting → Mayor watches via 'gt peek'
// - Mayor restarting → Deacon watches via 'gt peek'
//
// Once any AI agent is running, observation should be AI-to-AI:
// - Deacon starting polecats → use 'gt deacon pending' + AI analysis
// - Deacon restarting → Mayor watches via 'gt peek'
// - Mayor restarting → Deacon watches via 'gt peek'
//
// See: gt deacon pending (ZFC-compliant AI observation)
// See: gt deacon trigger-pending (bootstrap mode, regex-based)
+5
View File
@@ -260,6 +260,11 @@ func TestEnsureSessionFresh_ZombieSession(t *testing.T) {
t.Skip("session unexpectedly has Claude running - can't test zombie case")
}
// Verify generic agent check also treats it as not running (shell session)
if tm.IsAgentRunning(sessionName) {
t.Fatalf("expected IsAgentRunning(%q) to be false for a fresh shell session", sessionName)
}
// EnsureSessionFresh should kill the zombie and create fresh session
// This should NOT error with "session already exists"
if err := tm.EnsureSessionFresh(sessionName, ""); err != nil {