feat: runtime-aware tmux agent checks
This commit is contained in:
@@ -150,8 +150,8 @@ func runLiveCosts() error {
|
||||
// Extract cost from content
|
||||
cost := extractCost(content)
|
||||
|
||||
// Check if Claude is running
|
||||
running := t.IsClaudeRunning(session)
|
||||
// Check if an agent appears to be running
|
||||
running := t.IsAgentRunning(session)
|
||||
|
||||
costs = append(costs, SessionCost{
|
||||
Session: session,
|
||||
@@ -428,7 +428,6 @@ func extractCost(content string) float64 {
|
||||
return cost
|
||||
}
|
||||
|
||||
|
||||
func outputCostsJSON(output CostsOutput) error {
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
|
||||
@@ -89,9 +89,9 @@ func runCrewAt(cmd *cobra.Command, args []string) error {
|
||||
if !hasSession {
|
||||
existingSessions, err := t.FindSessionByWorkDir(worker.ClonePath, true)
|
||||
if err == nil && len(existingSessions) > 0 {
|
||||
// Found an existing session with Claude running in this directory
|
||||
// Found an existing session with an agent running in this directory
|
||||
existingSession := existingSessions[0]
|
||||
fmt.Printf("%s Found existing Claude session '%s' in crew directory\n",
|
||||
fmt.Printf("%s Found existing agent session '%s' in crew directory\n",
|
||||
style.Warning.Render("⚠"),
|
||||
existingSession)
|
||||
fmt.Printf(" Attaching to existing session instead of creating a new one\n")
|
||||
@@ -164,7 +164,11 @@ func runCrewAt(cmd *cobra.Command, args []string) error {
|
||||
// Session exists - check if Claude is still running
|
||||
// Uses both pane command check and UI marker detection to avoid
|
||||
// restarting when user is in a subshell spawned from Claude
|
||||
if !t.IsClaudeRunning(sessionID) {
|
||||
agentCfg, _, err := config.ResolveAgentConfigWithOverride(townRoot, r.Path, crewAgentOverride)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resolving agent: %w", err)
|
||||
}
|
||||
if !t.IsAgentRunning(sessionID, config.ExpectedPaneCommands(agentCfg)...) {
|
||||
// Claude has exited, restart it using respawn-pane
|
||||
fmt.Printf("Claude exited, restarting...\n")
|
||||
|
||||
|
||||
+20
-18
@@ -447,13 +447,13 @@ func runSling(cmd *cobra.Command, args []string) error {
|
||||
if targetPane == "" {
|
||||
fmt.Printf("%s No pane to nudge (agent will discover work via gt prime)\n", style.Dim.Render("○"))
|
||||
} else {
|
||||
// Ensure Claude is ready before nudging (prevents race condition where
|
||||
// Ensure agent is ready before nudging (prevents race condition where
|
||||
// message arrives before Claude has fully started - see issue #115)
|
||||
sessionName := getSessionFromPane(targetPane)
|
||||
if sessionName != "" {
|
||||
if err := ensureClaudeReady(sessionName); err != nil {
|
||||
if err := ensureAgentReady(sessionName); err != nil {
|
||||
// Non-fatal: warn and continue, agent will discover work via gt prime
|
||||
fmt.Printf("%s Could not verify Claude ready: %v\n", style.Dim.Render("○"), err)
|
||||
fmt.Printf("%s Could not verify agent ready: %v\n", style.Dim.Render("○"), err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -605,30 +605,32 @@ func getSessionFromPane(pane string) string {
|
||||
return pane
|
||||
}
|
||||
|
||||
// ensureClaudeReady waits for Claude to be ready before nudging an existing session.
|
||||
// Uses the same pragmatic approach as session.Start(): poll for node process,
|
||||
// accept bypass dialog if present, then wait for full initialization.
|
||||
// Returns early if Claude is already running and ready.
|
||||
func ensureClaudeReady(sessionName string) error {
|
||||
// ensureAgentReady waits for an agent to be ready before nudging an existing session.
|
||||
// Uses a pragmatic approach: wait for the pane to leave a shell, then (Claude-only)
|
||||
// accept the bypass permissions warning and give it a moment to finish initializing.
|
||||
func ensureAgentReady(sessionName string) error {
|
||||
t := tmux.NewTmux()
|
||||
|
||||
// If Claude is already running, assume it's ready (session was started earlier)
|
||||
if t.IsClaudeRunning(sessionName) {
|
||||
// If an agent is already running, assume it's ready (session was started earlier)
|
||||
if t.IsAgentRunning(sessionName) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Claude not running yet - wait for it to start (shell → node transition)
|
||||
// Agent not running yet - wait for it to start (shell → program transition)
|
||||
if err := t.WaitForCommand(sessionName, constants.SupportedShells, constants.ClaudeStartTimeout); err != nil {
|
||||
return fmt.Errorf("waiting for Claude to start: %w", err)
|
||||
return fmt.Errorf("waiting for agent to start: %w", err)
|
||||
}
|
||||
|
||||
// Accept bypass permissions warning if present
|
||||
_ = t.AcceptBypassPermissionsWarning(sessionName)
|
||||
// Claude-only: accept bypass permissions warning if present
|
||||
if t.IsClaudeRunning(sessionName) {
|
||||
_ = t.AcceptBypassPermissionsWarning(sessionName)
|
||||
|
||||
// Wait for Claude to be fully ready at the prompt
|
||||
// PRAGMATIC APPROACH: Use fixed delay rather than detection.
|
||||
// Claude startup takes ~5-8 seconds on typical machines.
|
||||
time.Sleep(8 * time.Second)
|
||||
// PRAGMATIC APPROACH: fixed delay rather than prompt detection.
|
||||
// Claude startup takes ~5-8 seconds on typical machines.
|
||||
time.Sleep(8 * time.Second)
|
||||
} else {
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -277,7 +277,8 @@ func startConfiguredCrew(t *tmux.Tmux, townRoot string) {
|
||||
sessionID := crewSessionName(r.Name, crewName)
|
||||
if running, _ := t.HasSession(sessionID); running {
|
||||
// Session exists - check if Claude is still running
|
||||
if !t.IsClaudeRunning(sessionID) {
|
||||
agentCfg := config.ResolveAgentConfig(townRoot, r.Path)
|
||||
if !t.IsAgentRunning(sessionID, config.ExpectedPaneCommands(agentCfg)...) {
|
||||
// Claude has exited, restart it
|
||||
fmt.Printf(" %s %s/%s session exists, restarting Claude...\n", style.Dim.Render("○"), r.Name, crewName)
|
||||
claudeCmd := config.BuildCrewStartupCommand(r.Name, crewName, r.Path, "gt prime")
|
||||
@@ -800,7 +801,11 @@ func runStartCrew(cmd *cobra.Command, args []string) error {
|
||||
|
||||
if hasSession {
|
||||
// Session exists - check if Claude is still running
|
||||
if !t.IsClaudeRunning(sessionID) {
|
||||
agentCfg, _, err := config.ResolveAgentConfigWithOverride(townRoot, r.Path, startCrewAgentOverride)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resolving agent: %w", err)
|
||||
}
|
||||
if !t.IsAgentRunning(sessionID, config.ExpectedPaneCommands(agentCfg)...) {
|
||||
// Claude has exited, restart it with "gt prime" as initial prompt
|
||||
fmt.Printf("Session exists, restarting Claude...\n")
|
||||
startupCmd, err := config.BuildCrewStartupCommandWithAgentOverride(rigName, name, r.Path, "gt prime", startCrewAgentOverride)
|
||||
|
||||
@@ -1204,6 +1204,18 @@ func BuildCrewStartupCommandWithAgentOverride(rigName, crewName, rigPath, prompt
|
||||
return BuildStartupCommandWithAgentOverride(envVars, rigPath, prompt, agentOverride)
|
||||
}
|
||||
|
||||
// ExpectedPaneCommands returns tmux pane command names that indicate the runtime is running.
|
||||
// For example, Claude runs as "node", while most other runtimes report their executable name.
|
||||
func ExpectedPaneCommands(rc *RuntimeConfig) []string {
|
||||
if rc == nil || rc.Command == "" {
|
||||
return nil
|
||||
}
|
||||
if filepath.Base(rc.Command) == "claude" {
|
||||
return []string{"node"}
|
||||
}
|
||||
return []string{filepath.Base(rc.Command)}
|
||||
}
|
||||
|
||||
// GetRigPrefix returns the beads prefix for a rig from rigs.json.
|
||||
// Falls back to "gt" if the rig isn't found or has no prefix configured.
|
||||
// townRoot is the path to the town directory (e.g., ~/gt).
|
||||
|
||||
@@ -1165,6 +1165,22 @@ func TestGetRuntimeCommand_UsesRigAgentWhenRigPathProvided(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpectedPaneCommands(t *testing.T) {
|
||||
t.Run("claude maps to node", func(t *testing.T) {
|
||||
got := ExpectedPaneCommands(&RuntimeConfig{Command: "claude"})
|
||||
if len(got) != 1 || got[0] != "node" {
|
||||
t.Fatalf("ExpectedPaneCommands(claude) = %v, want %v", got, []string{"node"})
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("codex maps to executable", func(t *testing.T) {
|
||||
got := ExpectedPaneCommands(&RuntimeConfig{Command: "codex"})
|
||||
if len(got) != 1 || got[0] != "codex" {
|
||||
t.Fatalf("ExpectedPaneCommands(codex) = %v, want %v", got, []string{"codex"})
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestLoadRuntimeConfigFromSettings(t *testing.T) {
|
||||
// Create temp rig with custom runtime config
|
||||
dir := t.TempDir()
|
||||
|
||||
@@ -138,7 +138,9 @@ func (m *Manager) Start(foreground bool) error {
|
||||
running, _ := t.HasSession(sessionID)
|
||||
if running {
|
||||
// Session exists - check if Claude is actually running (healthy vs zombie)
|
||||
if t.IsClaudeRunning(sessionID) {
|
||||
townRoot := filepath.Dir(m.rig.Path)
|
||||
agentCfg := config.ResolveAgentConfig(townRoot, m.rig.Path)
|
||||
if t.IsAgentRunning(sessionID, config.ExpectedPaneCommands(agentCfg)...) {
|
||||
// Healthy - Claude is running
|
||||
return ErrAlreadyRunning
|
||||
}
|
||||
|
||||
+45
-19
@@ -15,8 +15,8 @@ import (
|
||||
|
||||
// Common errors
|
||||
var (
|
||||
ErrNoServer = errors.New("no tmux server running")
|
||||
ErrSessionExists = errors.New("session already exists")
|
||||
ErrNoServer = errors.New("no tmux server running")
|
||||
ErrSessionExists = errors.New("session already exists")
|
||||
ErrSessionNotFound = errors.New("session not found")
|
||||
)
|
||||
|
||||
@@ -94,7 +94,7 @@ func (t *Tmux) EnsureSessionFresh(name, workDir string) error {
|
||||
|
||||
if exists {
|
||||
// Session exists - check if it's a zombie
|
||||
if !t.IsClaudeRunning(name) {
|
||||
if !t.IsAgentRunning(name) {
|
||||
// Zombie session: tmux alive but Claude dead
|
||||
// Kill it so we can create a fresh one
|
||||
if err := t.KillSession(name); err != nil {
|
||||
@@ -390,8 +390,8 @@ func (t *Tmux) GetPaneWorkDir(session string) (string, error) {
|
||||
|
||||
// FindSessionByWorkDir finds tmux sessions where the pane's current working directory
|
||||
// matches or is under the target directory. Returns session names that match.
|
||||
// If checkClaude is true, only returns sessions that have Claude (node) running.
|
||||
func (t *Tmux) FindSessionByWorkDir(targetDir string, checkClaude bool) ([]string, error) {
|
||||
// If requireAgentRunning is true, only returns sessions that have some non-shell command running.
|
||||
func (t *Tmux) FindSessionByWorkDir(targetDir string, requireAgentRunning bool) ([]string, error) {
|
||||
sessions, err := t.ListSessions()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -410,9 +410,9 @@ func (t *Tmux) FindSessionByWorkDir(targetDir string, checkClaude bool) ([]strin
|
||||
|
||||
// Check if workdir matches target (exact match or subdir)
|
||||
if workDir == targetDir || strings.HasPrefix(workDir, targetDir+"/") {
|
||||
if checkClaude {
|
||||
// Only include if Claude is running
|
||||
if t.IsClaudeRunning(session) {
|
||||
if requireAgentRunning {
|
||||
// Only include if an agent appears to be running
|
||||
if t.IsAgentRunning(session) {
|
||||
matches = append(matches, session)
|
||||
}
|
||||
} else {
|
||||
@@ -526,15 +526,39 @@ Run: gt mail inbox
|
||||
return t.SendKeys(session, banner)
|
||||
}
|
||||
|
||||
// IsClaudeRunning checks if Claude appears to be running in the session.
|
||||
// Only trusts the pane command - UI markers in scrollback cause false positives.
|
||||
func (t *Tmux) IsClaudeRunning(session string) bool {
|
||||
// Check pane command - Claude runs as node
|
||||
// IsAgentRunning checks if an agent appears to be running in the session.
|
||||
//
|
||||
// If expectedPaneCommands is non-empty, the pane's current command must match one of them.
|
||||
// If expectedPaneCommands is empty, any non-shell command counts as "agent running".
|
||||
func (t *Tmux) IsAgentRunning(session string, expectedPaneCommands ...string) bool {
|
||||
cmd, err := t.GetPaneCommand(session)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return cmd == "node"
|
||||
|
||||
if len(expectedPaneCommands) > 0 {
|
||||
for _, expected := range expectedPaneCommands {
|
||||
if expected != "" && cmd == expected {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Fallback: any non-shell command counts as running.
|
||||
for _, shell := range constants.SupportedShells {
|
||||
if cmd == shell {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return cmd != ""
|
||||
}
|
||||
|
||||
// IsClaudeRunning checks if Claude appears to be running in the session.
|
||||
// Only trusts the pane command - UI markers in scrollback cause false positives.
|
||||
func (t *Tmux) IsClaudeRunning(session string) bool {
|
||||
// Claude runs as node
|
||||
return t.IsAgentRunning(session, "node")
|
||||
}
|
||||
|
||||
// WaitForCommand polls until the pane is NOT running one of the excluded commands.
|
||||
@@ -595,14 +619,16 @@ func (t *Tmux) WaitForShellReady(session string, timeout time.Duration) error {
|
||||
// ZFC (Zero False Commands) principle: AI should observe AI, not regex.
|
||||
//
|
||||
// Bootstrap (acceptable):
|
||||
// During cold startup when no AI agent is running, the daemon uses this
|
||||
// function to get the Deacon online. Regex is acceptable here.
|
||||
//
|
||||
// During cold startup when no AI agent is running, the daemon uses this
|
||||
// function to get the Deacon online. Regex is acceptable here.
|
||||
//
|
||||
// Steady-State (use AI observation instead):
|
||||
// Once any AI agent is running, observation should be AI-to-AI:
|
||||
// - Deacon starting polecats → use 'gt deacon pending' + AI analysis
|
||||
// - Deacon restarting → Mayor watches via 'gt peek'
|
||||
// - Mayor restarting → Deacon watches via 'gt peek'
|
||||
//
|
||||
// Once any AI agent is running, observation should be AI-to-AI:
|
||||
// - Deacon starting polecats → use 'gt deacon pending' + AI analysis
|
||||
// - Deacon restarting → Mayor watches via 'gt peek'
|
||||
// - Mayor restarting → Deacon watches via 'gt peek'
|
||||
//
|
||||
// See: gt deacon pending (ZFC-compliant AI observation)
|
||||
// See: gt deacon trigger-pending (bootstrap mode, regex-based)
|
||||
|
||||
@@ -260,6 +260,11 @@ func TestEnsureSessionFresh_ZombieSession(t *testing.T) {
|
||||
t.Skip("session unexpectedly has Claude running - can't test zombie case")
|
||||
}
|
||||
|
||||
// Verify generic agent check also treats it as not running (shell session)
|
||||
if tm.IsAgentRunning(sessionName) {
|
||||
t.Fatalf("expected IsAgentRunning(%q) to be false for a fresh shell session", sessionName)
|
||||
}
|
||||
|
||||
// EnsureSessionFresh should kill the zombie and create fresh session
|
||||
// This should NOT error with "session already exists"
|
||||
if err := tm.EnsureSessionFresh(sessionName, ""); err != nil {
|
||||
|
||||
Reference in New Issue
Block a user