fix(daemon): nudge agents on state divergence instead of silent accept

When the daemon detects that an agent bead state doesn't match tmux
(e.g., bead says stopped but Claude is running), it now:

1. Logs the divergence clearly with STATE DIVERGENCE prefix
2. Nudges the agent with an actionable command to fix its state
3. Still skips the restart (safety - don't kill healthy sessions)

This prevents silent state drift where bead state diverges from reality.
Applied to: Deacon, Witness, Refinery ensure functions.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gastown/crew/joe
2026-01-06 19:23:37 -08:00
committed by Steve Yegge
parent d89aae5b5c
commit 6dbb841e22

View File

@@ -325,7 +325,14 @@ func (d *Daemon) ensureDeaconRunning() {
hasSession, sessionErr := d.tmux.HasSession(deaconSession)
if sessionErr == nil && hasSession {
if d.tmux.IsClaudeRunning(deaconSession) {
d.logger.Println("Deacon session healthy (Claude running), skipping restart despite stale bead")
// STATE DIVERGENCE: tmux shows running but bead disagrees.
// Don't kill (safety), but nudge the agent to reconcile its state.
// This prevents silent state drift where bead and reality diverge.
d.logger.Printf("STATE DIVERGENCE: Deacon bead='%s' but Claude is running in tmux", beadState)
nudgeMsg := "[DAEMON] State divergence detected: your agent bead shows '" + beadState + "' but you appear running. Please run: bd agent state " + deaconSession + " running"
if err := d.tmux.NudgeSession(deaconSession, nudgeMsg); err != nil {
d.logger.Printf("Warning: failed to nudge Deacon about state divergence: %v", err)
}
return
}
}
@@ -460,8 +467,13 @@ func (d *Daemon) ensureWitnessRunning(rigName string) {
if err := mgr.Start(false); err != nil {
if err == witness.ErrAlreadyRunning {
// Session is healthy (Claude running) - bead state was stale
d.logger.Printf("Witness for %s session healthy (Claude running), skipping restart despite stale bead", rigName)
// STATE DIVERGENCE: tmux shows running but bead disagrees.
// Don't kill (safety), but nudge the agent to reconcile its state.
d.logger.Printf("STATE DIVERGENCE: Witness for %s bead='%s' but Claude is running in tmux", rigName, beadState)
nudgeMsg := "[DAEMON] State divergence detected: your agent bead shows '" + beadState + "' but you appear running. Please run: bd agent state " + agentID + " running"
if err := d.tmux.NudgeSession(sessionName, nudgeMsg); err != nil {
d.logger.Printf("Warning: failed to nudge Witness %s about state divergence: %v", rigName, err)
}
return
}
d.logger.Printf("Error starting witness for %s: %v", rigName, err)
@@ -522,8 +534,13 @@ func (d *Daemon) ensureRefineryRunning(rigName string) {
if err := mgr.Start(false); err != nil {
if err == refinery.ErrAlreadyRunning {
// Session is healthy (Claude running) - bead state was stale
d.logger.Printf("Refinery for %s session healthy (Claude running), skipping restart despite stale bead", rigName)
// STATE DIVERGENCE: tmux shows running but bead disagrees.
// Don't kill (safety), but nudge the agent to reconcile its state.
d.logger.Printf("STATE DIVERGENCE: Refinery for %s bead='%s' but Claude is running in tmux", rigName, beadState)
nudgeMsg := "[DAEMON] State divergence detected: your agent bead shows '" + beadState + "' but you appear running. Please run: bd agent state " + agentID + " running"
if err := d.tmux.NudgeSession(sessionName, nudgeMsg); err != nil {
d.logger.Printf("Warning: failed to nudge Refinery %s about state divergence: %v", rigName, err)
}
return
}
d.logger.Printf("Error starting refinery for %s: %v", rigName, err)