fix: remove observable states from agent_state (discover, don't track)
The agent_state field was recording observable state like "running", "dead", "idle" which violated the "Discover, Don't Track" principle. This caused stale state bugs where agents were marked "dead" in beads but actually running in tmux. Changes: - Remove daemon's checkStaleAgents() which marked agents "dead" - Simplify ensureXxxRunning() to use tmux.IsClaudeRunning() directly - Remove reportAgentState() calls from gt prime and gt handoff - Add SetHookBead/ClearHookBead helpers that don't update agent_state - Use ClearHookBead in gt done and gt unsling - Simplify gt status to derive state from tmux, not bead Non-observable states (stuck, awaiting-gate, muted, paused) are still set because they represent intentional agent decisions that can't be discovered from tmux state. Fixes: gt-zecmc 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
committed by
Steve Yegge
parent
950e35317e
commit
1f44482ad0
@@ -356,12 +356,10 @@ func runDone(cmd *cobra.Command, args []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// updateAgentStateOnDone updates the agent bead state when work is complete.
|
||||
// Maps exit type to agent state:
|
||||
// - COMPLETED → "done"
|
||||
// - ESCALATED → "stuck"
|
||||
// - DEFERRED → "idle"
|
||||
// - PHASE_COMPLETE → "awaiting-gate"
|
||||
// updateAgentStateOnDone clears the agent's hook and reports cleanup status.
|
||||
// Per gt-zecmc: observable states ("done", "idle") removed - use tmux to discover.
|
||||
// Non-observable states ("stuck", "awaiting-gate") are still set since they represent
|
||||
// intentional agent decisions that can't be observed from tmux.
|
||||
//
|
||||
// Also self-reports cleanup_status for ZFC compliance (#10).
|
||||
func updateAgentStateOnDone(cwd, townRoot, exitType, _ string) { // issueID unused but kept for future audit logging
|
||||
@@ -384,22 +382,6 @@ func updateAgentStateOnDone(cwd, townRoot, exitType, _ string) { // issueID unus
|
||||
return
|
||||
}
|
||||
|
||||
// Map exit type to agent state
|
||||
var newState string
|
||||
switch exitType {
|
||||
case ExitCompleted:
|
||||
newState = "done"
|
||||
case ExitEscalated:
|
||||
newState = "stuck"
|
||||
case ExitDeferred:
|
||||
newState = "idle"
|
||||
case ExitPhaseComplete:
|
||||
newState = "awaiting-gate"
|
||||
default:
|
||||
return
|
||||
}
|
||||
|
||||
// Update agent bead with new state and clear hook_bead (work is done)
|
||||
// Use rig path for slot commands - bd slot doesn't route from town root
|
||||
var beadsPath string
|
||||
switch ctx.Role {
|
||||
@@ -423,11 +405,26 @@ func updateAgentStateOnDone(cwd, townRoot, exitType, _ string) { // issueID unus
|
||||
}
|
||||
}
|
||||
|
||||
emptyHook := ""
|
||||
if err := bd.UpdateAgentState(agentBeadID, newState, &emptyHook); err != nil {
|
||||
// Log warning instead of silent ignore - helps debug cross-beads issues
|
||||
fmt.Fprintf(os.Stderr, "Warning: couldn't update agent %s state on done: %v\n", agentBeadID, err)
|
||||
return
|
||||
// Clear the hook (work is done) - gt-zecmc
|
||||
if err := bd.ClearHookBead(agentBeadID); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Warning: couldn't clear agent %s hook: %v\n", agentBeadID, err)
|
||||
}
|
||||
|
||||
// Only set non-observable states - "stuck" and "awaiting-gate" are intentional
|
||||
// agent decisions that can't be discovered from tmux. Skip "done" and "idle"
|
||||
// since those are observable (no session = done, session + no hook = idle).
|
||||
switch exitType {
|
||||
case ExitEscalated:
|
||||
// "stuck" = agent is requesting help - not observable from tmux
|
||||
if _, err := bd.Run("agent", "state", agentBeadID, "stuck"); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Warning: couldn't set agent %s to stuck: %v\n", agentBeadID, err)
|
||||
}
|
||||
case ExitPhaseComplete:
|
||||
// "awaiting-gate" = agent is waiting for external trigger - not observable
|
||||
if _, err := bd.Run("agent", "state", agentBeadID, "awaiting-gate"); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Warning: couldn't set agent %s to awaiting-gate: %v\n", agentBeadID, err)
|
||||
}
|
||||
// ExitCompleted and ExitDeferred don't set state - observable from tmux
|
||||
}
|
||||
|
||||
// ZFC #10: Self-report cleanup status
|
||||
|
||||
@@ -182,19 +182,9 @@ func runHandoff(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Report agent state as stopped (ZFC: agents self-report state)
|
||||
cwd, _ := os.Getwd()
|
||||
if townRoot, _ := workspace.FindFromCwd(); townRoot != "" {
|
||||
if roleInfo, err := GetRoleWithContext(cwd, townRoot); err == nil {
|
||||
reportAgentState(RoleContext{
|
||||
Role: roleInfo.Role,
|
||||
Rig: roleInfo.Rig,
|
||||
Polecat: roleInfo.Polecat,
|
||||
TownRoot: townRoot,
|
||||
WorkDir: cwd,
|
||||
}, "stopped")
|
||||
}
|
||||
}
|
||||
// NOTE: reportAgentState("stopped") removed (gt-zecmc)
|
||||
// Agent liveness is observable from tmux - no need to record it in bead.
|
||||
// "Discover, don't track" principle: reality is truth, state is derived.
|
||||
|
||||
// Clear scrollback history before respawn (resets copy-mode from [0/N] to [0/0])
|
||||
if err := t.ClearHistory(pane); err != nil {
|
||||
|
||||
@@ -149,8 +149,9 @@ func runPrime(cmd *cobra.Command, args []string) error {
|
||||
// Ensure beads redirect exists for worktree-based roles
|
||||
ensureBeadsRedirect(ctx)
|
||||
|
||||
// Report agent state as running (ZFC: agents self-report state)
|
||||
reportAgentState(ctx, "running")
|
||||
// NOTE: reportAgentState("running") removed (gt-zecmc)
|
||||
// Agent liveness is observable from tmux - no need to record it in bead.
|
||||
// "Discover, don't track" principle: reality is truth, state is derived.
|
||||
|
||||
// Emit session_start event for seance discovery
|
||||
emitSessionEvent(ctx)
|
||||
|
||||
@@ -1033,13 +1033,13 @@ func updateAgentHookBead(agentID, beadID, workDir, townBeadsDir string) {
|
||||
}
|
||||
|
||||
// Run from workDir WITHOUT BEADS_DIR to enable redirect-based routing.
|
||||
// Update agent_state to "running" and set hook_bead to the slung work.
|
||||
// For same-database beads, the hook slot is set via `bd slot set`.
|
||||
// Set hook_bead to the slung work (gt-zecmc: removed agent_state update).
|
||||
// Agent liveness is observable from tmux - no need to record it in bead.
|
||||
// For cross-database scenarios, slot set may fail gracefully (warning only).
|
||||
bd := beads.New(bdWorkDir)
|
||||
if err := bd.UpdateAgentState(agentBeadID, "running", &beadID); err != nil {
|
||||
if err := bd.SetHookBead(agentBeadID, beadID); err != nil {
|
||||
// Log warning instead of silent ignore - helps debug cross-beads issues
|
||||
fmt.Fprintf(os.Stderr, "Warning: couldn't update agent %s state: %v\n", agentBeadID, err)
|
||||
fmt.Fprintf(os.Stderr, "Warning: couldn't set agent %s hook: %v\n", agentBeadID, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
@@ -586,40 +586,34 @@ func outputStatusText(status TownStatus) error {
|
||||
// renderAgentDetails renders full agent bead details
|
||||
func renderAgentDetails(agent AgentRuntime, indent string, hooks []AgentHookInfo, townRoot string) { //nolint:unparam // indent kept for future customization
|
||||
// Line 1: Agent bead ID + status
|
||||
// Reconcile bead state with tmux session state to surface mismatches
|
||||
// States: "running" (active), "idle" (waiting), "stopped", "dead", etc.
|
||||
beadState := agent.State
|
||||
// Per gt-zecmc: derive status from tmux (observable reality), not bead state.
|
||||
// "Discover, don't track" - agent liveness is observable from tmux session.
|
||||
sessionExists := agent.Running
|
||||
|
||||
// "idle" is a normal operational state (running but waiting for work)
|
||||
// Treat it the same as "running" for reconciliation purposes
|
||||
beadSaysRunning := beadState == "running" || beadState == "idle" || beadState == ""
|
||||
|
||||
var statusStr string
|
||||
var stateInfo string
|
||||
|
||||
switch {
|
||||
case beadSaysRunning && sessionExists:
|
||||
// Normal running state - session exists and bead agrees
|
||||
if sessionExists {
|
||||
statusStr = style.Success.Render("running")
|
||||
case beadSaysRunning && !sessionExists:
|
||||
// Bead thinks running but session is gone - stale bead state
|
||||
statusStr = style.Error.Render("running")
|
||||
stateInfo = style.Warning.Render(" [dead]")
|
||||
case !beadSaysRunning && sessionExists:
|
||||
// Session exists but bead says stopped/dead - mismatch!
|
||||
// This is the key case: tmux says alive, bead says dead/stopped
|
||||
statusStr = style.Success.Render("running")
|
||||
stateInfo = style.Warning.Render(" [bead: " + beadState + "]")
|
||||
default:
|
||||
// Both agree: stopped
|
||||
} else {
|
||||
statusStr = style.Error.Render("stopped")
|
||||
}
|
||||
|
||||
// Add agent state info if not already shown and state is interesting
|
||||
// Skip "idle" and "running" as they're normal operational states
|
||||
if stateInfo == "" && beadState != "" && beadState != "idle" && beadState != "running" {
|
||||
// Show non-observable states that represent intentional agent decisions.
|
||||
// These can't be discovered from tmux and are legitimately recorded in beads.
|
||||
beadState := agent.State
|
||||
switch beadState {
|
||||
case "stuck":
|
||||
// Agent escalated - needs help
|
||||
stateInfo = style.Warning.Render(" [stuck]")
|
||||
case "awaiting-gate":
|
||||
// Agent waiting for external trigger (phase gate)
|
||||
stateInfo = style.Dim.Render(" [awaiting-gate]")
|
||||
case "muted", "paused", "degraded":
|
||||
// Other intentional non-observable states
|
||||
stateInfo = style.Dim.Render(fmt.Sprintf(" [%s]", beadState))
|
||||
// Ignore observable states: "running", "idle", "dead", "done", "stopped", ""
|
||||
// These should be derived from tmux, not bead.
|
||||
}
|
||||
|
||||
// Build agent bead ID using canonical naming: prefix-rig-role-name
|
||||
@@ -741,22 +735,8 @@ func formatMQSummaryCompact(mq *MQSummary) string {
|
||||
|
||||
// renderAgentCompactWithSuffix renders a single-line agent status with an extra suffix
|
||||
func renderAgentCompactWithSuffix(agent AgentRuntime, indent string, hooks []AgentHookInfo, townRoot string, suffix string) {
|
||||
// Build status indicator
|
||||
var statusIndicator string
|
||||
beadState := agent.State
|
||||
sessionExists := agent.Running
|
||||
beadSaysRunning := beadState == "running" || beadState == "idle" || beadState == ""
|
||||
|
||||
switch {
|
||||
case beadSaysRunning && sessionExists:
|
||||
statusIndicator = style.Success.Render("●")
|
||||
case beadSaysRunning && !sessionExists:
|
||||
statusIndicator = style.Error.Render("●") + style.Warning.Render(" dead")
|
||||
case !beadSaysRunning && sessionExists:
|
||||
statusIndicator = style.Success.Render("●") + style.Warning.Render(" ["+beadState+"]")
|
||||
default:
|
||||
statusIndicator = style.Error.Render("○")
|
||||
}
|
||||
// Build status indicator (gt-zecmc: use tmux state, not bead state)
|
||||
statusIndicator := buildStatusIndicator(agent)
|
||||
|
||||
// Get hook info
|
||||
hookBead := agent.HookBead
|
||||
@@ -795,22 +775,8 @@ func renderAgentCompactWithSuffix(agent AgentRuntime, indent string, hooks []Age
|
||||
|
||||
// renderAgentCompact renders a single-line agent status
|
||||
func renderAgentCompact(agent AgentRuntime, indent string, hooks []AgentHookInfo, townRoot string) {
|
||||
// Build status indicator
|
||||
var statusIndicator string
|
||||
beadState := agent.State
|
||||
sessionExists := agent.Running
|
||||
beadSaysRunning := beadState == "running" || beadState == "idle" || beadState == ""
|
||||
|
||||
switch {
|
||||
case beadSaysRunning && sessionExists:
|
||||
statusIndicator = style.Success.Render("●")
|
||||
case beadSaysRunning && !sessionExists:
|
||||
statusIndicator = style.Error.Render("●") + style.Warning.Render(" dead")
|
||||
case !beadSaysRunning && sessionExists:
|
||||
statusIndicator = style.Success.Render("●") + style.Warning.Render(" ["+beadState+"]")
|
||||
default:
|
||||
statusIndicator = style.Error.Render("○")
|
||||
}
|
||||
// Build status indicator (gt-zecmc: use tmux state, not bead state)
|
||||
statusIndicator := buildStatusIndicator(agent)
|
||||
|
||||
// Get hook info
|
||||
hookBead := agent.HookBead
|
||||
@@ -847,6 +813,35 @@ func renderAgentCompact(agent AgentRuntime, indent string, hooks []AgentHookInfo
|
||||
fmt.Printf("%s%-12s %s%s%s\n", indent, agent.Name, statusIndicator, hookSuffix, mailSuffix)
|
||||
}
|
||||
|
||||
// buildStatusIndicator creates the visual status indicator for an agent.
|
||||
// Per gt-zecmc: uses tmux state (observable reality), not bead state.
|
||||
// Non-observable states (stuck, awaiting-gate, muted, etc.) are shown as suffixes.
|
||||
func buildStatusIndicator(agent AgentRuntime) string {
|
||||
sessionExists := agent.Running
|
||||
|
||||
// Base indicator from tmux state
|
||||
var indicator string
|
||||
if sessionExists {
|
||||
indicator = style.Success.Render("●")
|
||||
} else {
|
||||
indicator = style.Error.Render("○")
|
||||
}
|
||||
|
||||
// Add non-observable state suffix if present
|
||||
beadState := agent.State
|
||||
switch beadState {
|
||||
case "stuck":
|
||||
indicator += style.Warning.Render(" stuck")
|
||||
case "awaiting-gate":
|
||||
indicator += style.Dim.Render(" gate")
|
||||
case "muted", "paused", "degraded":
|
||||
indicator += style.Dim.Render(" " + beadState)
|
||||
// Ignore observable states: running, idle, dead, done, stopped, ""
|
||||
}
|
||||
|
||||
return indicator
|
||||
}
|
||||
|
||||
// formatHookInfo formats the hook bead and title for display
|
||||
func formatHookInfo(hookBead, title string, maxLen int) string {
|
||||
if hookBead == "" {
|
||||
|
||||
@@ -162,9 +162,8 @@ func runUnsling(cmd *cobra.Command, args []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Clear the hook by updating agent bead with empty hook_bead
|
||||
emptyHook := ""
|
||||
if err := b.UpdateAgentState(agentBeadID, "running", &emptyHook); err != nil {
|
||||
// Clear the hook (gt-zecmc: removed agent_state update - observable from tmux)
|
||||
if err := b.ClearHookBead(agentBeadID); err != nil {
|
||||
return fmt.Errorf("clearing hook from agent bead %s: %w", agentBeadID, err)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user