fix: remove observable states from agent_state (discover, don't track)

The agent_state field was recording observable state like "running",
"dead", "idle" which violated the "Discover, Don't Track" principle.
This caused stale state bugs where agents were marked "dead" in beads
but actually running in tmux.

Changes:
- Remove daemon's checkStaleAgents() which marked agents "dead"
- Simplify ensureXxxRunning() to use tmux.IsClaudeRunning() directly
- Remove reportAgentState() calls from gt prime and gt handoff
- Add SetHookBead/ClearHookBead helpers that don't update agent_state
- Use ClearHookBead in gt done and gt unsling
- Simplify gt status to derive state from tmux, not bead

Non-observable states (stuck, awaiting-gate, muted, paused) are still
set because they represent intentional agent decisions that can't be
discovered from tmux state.

Fixes: gt-zecmc

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gastown/crew/joe
2026-01-06 20:32:02 -08:00
committed by Steve Yegge
parent 950e35317e
commit 1f44482ad0
8 changed files with 155 additions and 231 deletions

View File

@@ -356,12 +356,10 @@ func runDone(cmd *cobra.Command, args []string) error {
return nil
}
// updateAgentStateOnDone updates the agent bead state when work is complete.
// Maps exit type to agent state:
// - COMPLETED → "done"
// - ESCALATED → "stuck"
// - DEFERRED → "idle"
// - PHASE_COMPLETE → "awaiting-gate"
// updateAgentStateOnDone clears the agent's hook and reports cleanup status.
// Per gt-zecmc: observable states ("done", "idle") removed - use tmux to discover.
// Non-observable states ("stuck", "awaiting-gate") are still set since they represent
// intentional agent decisions that can't be observed from tmux.
//
// Also self-reports cleanup_status for ZFC compliance (#10).
func updateAgentStateOnDone(cwd, townRoot, exitType, _ string) { // issueID unused but kept for future audit logging
@@ -384,22 +382,6 @@ func updateAgentStateOnDone(cwd, townRoot, exitType, _ string) { // issueID unus
return
}
// Map exit type to agent state
var newState string
switch exitType {
case ExitCompleted:
newState = "done"
case ExitEscalated:
newState = "stuck"
case ExitDeferred:
newState = "idle"
case ExitPhaseComplete:
newState = "awaiting-gate"
default:
return
}
// Update agent bead with new state and clear hook_bead (work is done)
// Use rig path for slot commands - bd slot doesn't route from town root
var beadsPath string
switch ctx.Role {
@@ -423,11 +405,26 @@ func updateAgentStateOnDone(cwd, townRoot, exitType, _ string) { // issueID unus
}
}
emptyHook := ""
if err := bd.UpdateAgentState(agentBeadID, newState, &emptyHook); err != nil {
// Log warning instead of silent ignore - helps debug cross-beads issues
fmt.Fprintf(os.Stderr, "Warning: couldn't update agent %s state on done: %v\n", agentBeadID, err)
return
// Clear the hook (work is done) - gt-zecmc
if err := bd.ClearHookBead(agentBeadID); err != nil {
fmt.Fprintf(os.Stderr, "Warning: couldn't clear agent %s hook: %v\n", agentBeadID, err)
}
// Only set non-observable states - "stuck" and "awaiting-gate" are intentional
// agent decisions that can't be discovered from tmux. Skip "done" and "idle"
// since those are observable (no session = done, session + no hook = idle).
switch exitType {
case ExitEscalated:
// "stuck" = agent is requesting help - not observable from tmux
if _, err := bd.Run("agent", "state", agentBeadID, "stuck"); err != nil {
fmt.Fprintf(os.Stderr, "Warning: couldn't set agent %s to stuck: %v\n", agentBeadID, err)
}
case ExitPhaseComplete:
// "awaiting-gate" = agent is waiting for external trigger - not observable
if _, err := bd.Run("agent", "state", agentBeadID, "awaiting-gate"); err != nil {
fmt.Fprintf(os.Stderr, "Warning: couldn't set agent %s to awaiting-gate: %v\n", agentBeadID, err)
}
// ExitCompleted and ExitDeferred don't set state - observable from tmux
}
// ZFC #10: Self-report cleanup status

View File

@@ -182,19 +182,9 @@ func runHandoff(cmd *cobra.Command, args []string) error {
}
}
// Report agent state as stopped (ZFC: agents self-report state)
cwd, _ := os.Getwd()
if townRoot, _ := workspace.FindFromCwd(); townRoot != "" {
if roleInfo, err := GetRoleWithContext(cwd, townRoot); err == nil {
reportAgentState(RoleContext{
Role: roleInfo.Role,
Rig: roleInfo.Rig,
Polecat: roleInfo.Polecat,
TownRoot: townRoot,
WorkDir: cwd,
}, "stopped")
}
}
// NOTE: reportAgentState("stopped") removed (gt-zecmc)
// Agent liveness is observable from tmux - no need to record it in bead.
// "Discover, don't track" principle: reality is truth, state is derived.
// Clear scrollback history before respawn (resets copy-mode from [0/N] to [0/0])
if err := t.ClearHistory(pane); err != nil {

View File

@@ -149,8 +149,9 @@ func runPrime(cmd *cobra.Command, args []string) error {
// Ensure beads redirect exists for worktree-based roles
ensureBeadsRedirect(ctx)
// Report agent state as running (ZFC: agents self-report state)
reportAgentState(ctx, "running")
// NOTE: reportAgentState("running") removed (gt-zecmc)
// Agent liveness is observable from tmux - no need to record it in bead.
// "Discover, don't track" principle: reality is truth, state is derived.
// Emit session_start event for seance discovery
emitSessionEvent(ctx)

View File

@@ -1033,13 +1033,13 @@ func updateAgentHookBead(agentID, beadID, workDir, townBeadsDir string) {
}
// Run from workDir WITHOUT BEADS_DIR to enable redirect-based routing.
// Update agent_state to "running" and set hook_bead to the slung work.
// For same-database beads, the hook slot is set via `bd slot set`.
// Set hook_bead to the slung work (gt-zecmc: removed agent_state update).
// Agent liveness is observable from tmux - no need to record it in bead.
// For cross-database scenarios, slot set may fail gracefully (warning only).
bd := beads.New(bdWorkDir)
if err := bd.UpdateAgentState(agentBeadID, "running", &beadID); err != nil {
if err := bd.SetHookBead(agentBeadID, beadID); err != nil {
// Log warning instead of silent ignore - helps debug cross-beads issues
fmt.Fprintf(os.Stderr, "Warning: couldn't update agent %s state: %v\n", agentBeadID, err)
fmt.Fprintf(os.Stderr, "Warning: couldn't set agent %s hook: %v\n", agentBeadID, err)
return
}
}

View File

@@ -586,40 +586,34 @@ func outputStatusText(status TownStatus) error {
// renderAgentDetails renders full agent bead details
func renderAgentDetails(agent AgentRuntime, indent string, hooks []AgentHookInfo, townRoot string) { //nolint:unparam // indent kept for future customization
// Line 1: Agent bead ID + status
// Reconcile bead state with tmux session state to surface mismatches
// States: "running" (active), "idle" (waiting), "stopped", "dead", etc.
beadState := agent.State
// Per gt-zecmc: derive status from tmux (observable reality), not bead state.
// "Discover, don't track" - agent liveness is observable from tmux session.
sessionExists := agent.Running
// "idle" is a normal operational state (running but waiting for work)
// Treat it the same as "running" for reconciliation purposes
beadSaysRunning := beadState == "running" || beadState == "idle" || beadState == ""
var statusStr string
var stateInfo string
switch {
case beadSaysRunning && sessionExists:
// Normal running state - session exists and bead agrees
if sessionExists {
statusStr = style.Success.Render("running")
case beadSaysRunning && !sessionExists:
// Bead thinks running but session is gone - stale bead state
statusStr = style.Error.Render("running")
stateInfo = style.Warning.Render(" [dead]")
case !beadSaysRunning && sessionExists:
// Session exists but bead says stopped/dead - mismatch!
// This is the key case: tmux says alive, bead says dead/stopped
statusStr = style.Success.Render("running")
stateInfo = style.Warning.Render(" [bead: " + beadState + "]")
default:
// Both agree: stopped
} else {
statusStr = style.Error.Render("stopped")
}
// Add agent state info if not already shown and state is interesting
// Skip "idle" and "running" as they're normal operational states
if stateInfo == "" && beadState != "" && beadState != "idle" && beadState != "running" {
// Show non-observable states that represent intentional agent decisions.
// These can't be discovered from tmux and are legitimately recorded in beads.
beadState := agent.State
switch beadState {
case "stuck":
// Agent escalated - needs help
stateInfo = style.Warning.Render(" [stuck]")
case "awaiting-gate":
// Agent waiting for external trigger (phase gate)
stateInfo = style.Dim.Render(" [awaiting-gate]")
case "muted", "paused", "degraded":
// Other intentional non-observable states
stateInfo = style.Dim.Render(fmt.Sprintf(" [%s]", beadState))
// Ignore observable states: "running", "idle", "dead", "done", "stopped", ""
// These should be derived from tmux, not bead.
}
// Build agent bead ID using canonical naming: prefix-rig-role-name
@@ -741,22 +735,8 @@ func formatMQSummaryCompact(mq *MQSummary) string {
// renderAgentCompactWithSuffix renders a single-line agent status with an extra suffix
func renderAgentCompactWithSuffix(agent AgentRuntime, indent string, hooks []AgentHookInfo, townRoot string, suffix string) {
// Build status indicator
var statusIndicator string
beadState := agent.State
sessionExists := agent.Running
beadSaysRunning := beadState == "running" || beadState == "idle" || beadState == ""
switch {
case beadSaysRunning && sessionExists:
statusIndicator = style.Success.Render("●")
case beadSaysRunning && !sessionExists:
statusIndicator = style.Error.Render("●") + style.Warning.Render(" dead")
case !beadSaysRunning && sessionExists:
statusIndicator = style.Success.Render("●") + style.Warning.Render(" ["+beadState+"]")
default:
statusIndicator = style.Error.Render("○")
}
// Build status indicator (gt-zecmc: use tmux state, not bead state)
statusIndicator := buildStatusIndicator(agent)
// Get hook info
hookBead := agent.HookBead
@@ -795,22 +775,8 @@ func renderAgentCompactWithSuffix(agent AgentRuntime, indent string, hooks []Age
// renderAgentCompact renders a single-line agent status
func renderAgentCompact(agent AgentRuntime, indent string, hooks []AgentHookInfo, townRoot string) {
// Build status indicator
var statusIndicator string
beadState := agent.State
sessionExists := agent.Running
beadSaysRunning := beadState == "running" || beadState == "idle" || beadState == ""
switch {
case beadSaysRunning && sessionExists:
statusIndicator = style.Success.Render("●")
case beadSaysRunning && !sessionExists:
statusIndicator = style.Error.Render("●") + style.Warning.Render(" dead")
case !beadSaysRunning && sessionExists:
statusIndicator = style.Success.Render("●") + style.Warning.Render(" ["+beadState+"]")
default:
statusIndicator = style.Error.Render("○")
}
// Build status indicator (gt-zecmc: use tmux state, not bead state)
statusIndicator := buildStatusIndicator(agent)
// Get hook info
hookBead := agent.HookBead
@@ -847,6 +813,35 @@ func renderAgentCompact(agent AgentRuntime, indent string, hooks []AgentHookInfo
fmt.Printf("%s%-12s %s%s%s\n", indent, agent.Name, statusIndicator, hookSuffix, mailSuffix)
}
// buildStatusIndicator creates the visual status indicator for an agent.
// Per gt-zecmc: uses tmux state (observable reality), not bead state.
// Non-observable states (stuck, awaiting-gate, muted, etc.) are shown as suffixes.
func buildStatusIndicator(agent AgentRuntime) string {
sessionExists := agent.Running
// Base indicator from tmux state
var indicator string
if sessionExists {
indicator = style.Success.Render("●")
} else {
indicator = style.Error.Render("○")
}
// Add non-observable state suffix if present
beadState := agent.State
switch beadState {
case "stuck":
indicator += style.Warning.Render(" stuck")
case "awaiting-gate":
indicator += style.Dim.Render(" gate")
case "muted", "paused", "degraded":
indicator += style.Dim.Render(" " + beadState)
// Ignore observable states: running, idle, dead, done, stopped, ""
}
return indicator
}
// formatHookInfo formats the hook bead and title for display
func formatHookInfo(hookBead, title string, maxLen int) string {
if hookBead == "" {

View File

@@ -162,9 +162,8 @@ func runUnsling(cmd *cobra.Command, args []string) error {
return nil
}
// Clear the hook by updating agent bead with empty hook_bead
emptyHook := ""
if err := b.UpdateAgentState(agentBeadID, "running", &emptyHook); err != nil {
// Clear the hook (gt-zecmc: removed agent_state update - observable from tmux)
if err := b.ClearHookBead(agentBeadID); err != nil {
return fmt.Errorf("clearing hook from agent bead %s: %w", agentBeadID, err)
}