From 5838d4cd1b5c35063014829afd173aadff9ee1b6 Mon Sep 17 00:00:00 2001 From: Steve Yegge Date: Sun, 28 Dec 2025 10:02:55 -0800 Subject: [PATCH] Witness pings Deacon for second-order monitoring (gt-5v8ls) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added WITNESS_PING protocol for monitoring Deacon health: Witness patrol (mol-witness-patrol): - Added ping-deacon step after survey-workers - Sends WITNESS_PING mail to Deacon each patrol cycle - Checks Deacon agent bead last_activity timestamp - Escalates to Mayor if Deacon appears unresponsive Deacon patrol (mol-deacon-patrol): - Added WITNESS_PING handling in inbox-check - Added second-order monitoring section to description - Bumped formula version to 2 This prevents the "who watches the watchers" problem - if Deacon dies, the collective Witness fleet detects it and escalates. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../formulas/mol-deacon-patrol.formula.toml | 90 +++++++++++-------- .../formulas/mol-witness-patrol.formula.toml | 49 ++++++++-- 2 files changed, 96 insertions(+), 43 deletions(-) diff --git a/.beads/formulas/mol-deacon-patrol.formula.toml b/.beads/formulas/mol-deacon-patrol.formula.toml index cef2438e..407c7f17 100644 --- a/.beads/formulas/mol-deacon-patrol.formula.toml +++ b/.beads/formulas/mol-deacon-patrol.formula.toml @@ -1,11 +1,22 @@ description = """ Mayor's daemon patrol loop. -The Deacon is the Mayor's background process that runs continuously, handling callbacks, monitoring rig health, and performing cleanup. Each patrol cycle runs these steps in sequence, then loops or exits.""" +The Deacon is the Mayor's background process that runs continuously, handling callbacks, monitoring rig health, and performing cleanup. Each patrol cycle runs these steps in sequence, then loops or exits. + +## Second-Order Monitoring + +Witnesses send WITNESS_PING messages to verify the Deacon is alive. This +prevents the "who watches the watchers" problem - if the Deacon dies, +Witnesses detect it and escalate to the Mayor. + +The Deacon's agent bead last_activity timestamp is updated during each patrol +cycle. Witnesses check this timestamp to verify health.""" formula = "mol-deacon-patrol" -version = 1 +version = 2 [[steps]] +id = "inbox-check" +title = "Handle callbacks from agents" description = """ Handle callbacks from agents. @@ -22,11 +33,23 @@ gt mail read # Handle based on message type ``` +**WITNESS_PING**: +Witnesses periodically ping to verify Deacon is alive. Simply acknowledge +and mark as read - the fact that you're processing mail proves you're running. +Your agent bead last_activity is updated automatically during patrol. + +**HELP / Escalation**: +Assess and handle or forward to Mayor. + +**LIFECYCLE messages**: +Polecats reporting completion, refineries reporting merge results. + Callbacks may spawn new polecats, update issue state, or trigger other actions.""" -id = "inbox-check" -title = "Handle callbacks from agents" [[steps]] +id = "trigger-pending-spawns" +title = "Nudge newly spawned polecats" +needs = ["inbox-check"] description = """ Nudge newly spawned polecats that are ready for input. @@ -40,14 +63,14 @@ gt deacon pending ``` For each pending session, analyze the captured output: -- Look for Claude's prompt indicator \"> \" at the start of a line +- Look for Claude's prompt indicator "> " at the start of a line - If prompt is visible, Claude is ready for input - Make the judgment call yourself - you're the AI observer For each ready polecat: ```bash # 1. Trigger the polecat -gt nudge \"Begin.\" +gt nudge "Begin." # 2. Clear from pending list gt deacon pending @@ -57,11 +80,11 @@ This triggers the UserPromptSubmit hook, which injects mail so the polecat sees **Bootstrap mode** (daemon-only, no AI available): The daemon uses `gt deacon trigger-pending` with regex detection. This ZFC violation is acceptable during cold startup when no AI agent is running yet.""" -id = "trigger-pending-spawns" -needs = ["inbox-check"] -title = "Nudge newly spawned polecats" [[steps]] +id = "gate-evaluation" +title = "Evaluate pending async gates" +needs = ["inbox-check"] description = """ Evaluate pending async gates. @@ -86,15 +109,15 @@ bd gate list --json After closing a gate, the Waiters field contains mail addresses to notify. Send a brief notification to each waiter that the gate has cleared.""" -id = "gate-evaluation" -needs = ["inbox-check"] -title = "Evaluate pending async gates" [[steps]] +id = "health-scan" +title = "Check Witness and Refinery health" +needs = ["trigger-pending-spawns", "gate-evaluation"] description = """ Check Witness and Refinery health for each rig. -**ZFC Principle**: You (Claude) make the judgment call about what is \"stuck\" or \"unresponsive\" - there are no hardcoded thresholds in Go. Read the signals, consider context, and decide. +**ZFC Principle**: You (Claude) make the judgment call about what is "stuck" or "unresponsive" - there are no hardcoded thresholds in Go. Read the signals, consider context, and decide. For each rig, run: ```bash @@ -139,18 +162,18 @@ gt refinery restart **Escalation:** ```bash -gt mail send mayor/ -s \"Health: unresponsive\" \\ - -m \"Component has been unresponsive for N cycles. Restart attempts failed. +gt mail send mayor/ -s "Health: unresponsive" \\ + -m "Component has been unresponsive for N cycles. Restart attempts failed. Last healthy: - Error signals:
\" + Error signals:
" ``` Reset unresponsive_cycles to 0 when component responds normally.""" -id = "health-scan" -needs = ["trigger-pending-spawns", "gate-evaluation"] -title = "Check Witness and Refinery health" [[steps]] +id = "plugin-run" +title = "Execute registered plugins" +needs = ["health-scan"] description = """ Execute registered plugins. @@ -160,7 +183,7 @@ See docs/deacon-plugins.md for full documentation. Gate types: - cooldown: Time since last run (e.g., 24h) -- cron: Schedule-based (e.g., \"0 9 * * *\") +- cron: Schedule-based (e.g., "0 9 * * *") - condition: Metric threshold (e.g., wisp count > 50) - event: Trigger-based (e.g., startup, heartbeat) @@ -172,11 +195,11 @@ For each plugin: Plugins marked parallel: true can run concurrently using Task tool subagents. Sequential plugins run one at a time in directory order. Skip this step if ~/gt/plugins/ does not exist or is empty.""" -id = "plugin-run" -needs = ["health-scan"] -title = "Execute registered plugins" [[steps]] +id = "orphan-check" +title = "Find abandoned work" +needs = ["health-scan"] description = """ Find abandoned work. @@ -195,11 +218,11 @@ For each orphan: - Check if polecat session still exists - If not, mark issue for reassignment or retry - File incident beads if data loss occurred""" -id = "orphan-check" -needs = ["health-scan"] -title = "Find abandoned work" [[steps]] +id = "session-gc" +title = "Clean dead sessions" +needs = ["orphan-check"] description = """ Clean dead sessions and orphaned state. @@ -219,11 +242,11 @@ This handles: - **wisp-gc**: Garbage collect abandoned wisps (>1h old) All cleanup is handled by doctor checks - no need to run separate commands.""" -id = "session-gc" -needs = ["orphan-check"] -title = "Clean dead sessions" [[steps]] +id = "context-check" +title = "Check own context limit" +needs = ["session-gc"] description = """ Check own context limit. @@ -239,11 +262,11 @@ If context is high (>80%), prepare for handoff: - Write handoff to molecule state This enables the Deacon to burn and respawn cleanly.""" -id = "context-check" -needs = ["session-gc"] -title = "Check own context limit" [[steps]] +id = "loop-or-exit" +title = "Burn and respawn or loop" +needs = ["context-check"] description = """ Burn and let daemon respawn, or exit if context high. @@ -265,6 +288,3 @@ gt daemon status ``` This enables infinite patrol duration via context-aware respawning.""" -id = "loop-or-exit" -needs = ["context-check"] -title = "Burn and respawn or loop" diff --git a/.beads/formulas/mol-witness-patrol.formula.toml b/.beads/formulas/mol-witness-patrol.formula.toml index e9a122c2..8b537e87 100644 --- a/.beads/formulas/mol-witness-patrol.formula.toml +++ b/.beads/formulas/mol-witness-patrol.formula.toml @@ -21,7 +21,7 @@ inbox-check ─► process-cleanups ─► check-refinery ─► survey-workers │ ┌──────────────────────────────────────────────────┘ ▼ - context-check ─► loop-or-exit + ping-deacon ─► context-check ─► loop-or-exit ``` No dynamic arms. No fanout gates. No persistent nudge counters. @@ -187,10 +187,8 @@ Each polecat agent bead has fields in its description: |-------------|---------|--------| | running | Actively working | Check progress (Step 3) | | idle | No work assigned | Skip (no action needed) | -| stuck | Self-reported stuck (via gt done --exit ESCALATED) | Handle stuck protocol | -| done | Work complete (via gt done) | Verify cleanup triggered (see Step 4a) | -| dead | Marked dead by daemon (unresponsive) | Clean up and respawn | -| spawning | Recently created, initializing | Wait for running state | +| stuck | Self-reported stuck | Handle stuck protocol | +| done | Work complete | Verify cleanup triggered (see Step 4a) | **Step 3: For running polecats, assess progress** @@ -218,8 +216,6 @@ Look for: | agent_state=running, idle 15+ min | Direct nudge with deadline | | agent_state=stuck | Assess and help or escalate | | agent_state=done | Verify cleanup triggered (see Step 4a) | -| agent_state=dead | Create cleanup wisp, mark for respawn if needed | -| agent_state=spawning | Check if stuck in spawn; if >2 min, investigate | **Step 4a: Handle agent_state=done** @@ -256,10 +252,47 @@ gt mail send mayor/ -s "Escalation: stuck" \\ **ZFC Principle**: Trust agent_state from beads. Don't infer state from PID/tmux.""" +[[steps]] +id = "ping-deacon" +title = "Ping Deacon for health check" +needs = ["survey-workers"] +description = """ +Send WITNESS_PING to Deacon for second-order monitoring. + +The Witness fleet collectively monitors Deacon health - this prevents the +"who watches the watchers" problem. If Deacon dies, Witnesses detect it. + +**Step 1: Send ping** +```bash +gt mail send deacon/ -s "WITNESS_PING " -m "Rig: +Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ) +Patrol: " +``` + +**Step 2: Check Deacon health** +```bash +# Check Deacon agent bead for last_activity +bd list --type=agent --json | jq '.[] | select(.description | contains("deacon"))' +``` + +Look at the `last_activity` timestamp. If stale (>5 minutes since last update): +- Deacon may be dead or stuck + +**Step 3: Escalate if needed** +```bash +# If Deacon appears down +gt mail send mayor/ -s "ALERT: Deacon appears unresponsive" \ + -m "No Deacon activity for >5 minutes. +Last seen: +Witness: /witness" +``` + +Note: Multiple Witnesses may send this alert. Mayor should handle deduplication.""" + [[steps]] id = "context-check" title = "Check own context limit" -needs = ["survey-workers"] +needs = ["ping-deacon"] description = """ Check own context usage.