diff --git a/internal/cmd/crew.go b/internal/cmd/crew.go index 27dbf864..41ee41b2 100644 --- a/internal/cmd/crew.go +++ b/internal/cmd/crew.go @@ -27,27 +27,33 @@ var ( var crewCmd = &cobra.Command{ Use: "crew", GroupID: GroupWorkspace, - Short: "Manage crew workspaces (user-managed persistent workspaces)", + Short: "Manage crew workers (persistent workspaces for humans)", RunE: requireSubcommand, - Long: `Crew workers are user-managed persistent workspaces within a rig. + Long: `Manage crew workers - persistent workspaces for human developers. -Unlike polecats which are witness-managed and transient, crew workers are: -- Persistent: Not auto-garbage-collected -- User-managed: Overseer controls lifecycle -- Long-lived identities: recognizable names like dave, emma, fred -- Gas Town integrated: Mail, handoff mechanics work -- Tmux optional: Can work in terminal directly +CREW VS POLECATS: + Polecats: Ephemeral. Witness-managed. Auto-nuked after work. + Crew: Persistent. User-managed. Stays until you remove it. + +Crew workers are full git clones (not worktrees) for human developers +who want persistent context and control over their workspace lifecycle. +Use crew workers for exploratory work, long-running tasks, or when you +want to keep uncommitted changes around. + +Features: + - Gas Town integrated: Mail, nudge, handoff all work + - Recognizable names: dave, emma, fred (not ephemeral pool names) + - Tmux optional: Can work in terminal directly without tmux session Commands: - gt crew start Start a crew workspace (creates if needed) - gt crew stop Stop crew workspace session(s) - gt crew add Create a new crew workspace - gt crew list List crew workspaces with status - gt crew at Attach to crew workspace session - gt crew remove Remove a crew workspace - gt crew refresh Context cycling with mail-to-self handoff - gt crew restart Kill and restart session fresh (alias: rs) - gt crew status [] Show detailed workspace status`, + gt crew start Start session (creates workspace if needed) + gt crew stop Stop session(s) + gt crew add Create workspace without starting + gt crew list List workspaces with status + gt crew at Attach to session + gt crew remove Remove workspace + gt crew refresh Context cycle with handoff mail + gt crew restart Kill and restart session fresh`, } var crewAddCmd = &cobra.Command{ diff --git a/internal/cmd/deacon.go b/internal/cmd/deacon.go index e0720d08..969ac145 100644 --- a/internal/cmd/deacon.go +++ b/internal/cmd/deacon.go @@ -35,14 +35,20 @@ var deaconCmd = &cobra.Command{ Use: "deacon", Aliases: []string{"dea"}, GroupID: GroupAgents, - Short: "Manage the Deacon session", + Short: "Manage the Deacon (town-level watchdog)", RunE: requireSubcommand, - Long: `Manage the Deacon tmux session. + Long: `Manage the Deacon - the town-level watchdog for Gas Town. -The Deacon is the hierarchical health-check orchestrator for Gas Town. -It monitors the Mayor and Witnesses, handles lifecycle requests, and -keeps the town running. Use the subcommands to start, stop, attach, -and check status.`, +The Deacon ("daemon beacon") is the only agent that receives mechanical +heartbeats from the daemon. It monitors system health across all rigs: + - Watches all Witnesses (are they alive? stuck? responsive?) + - Manages Dogs for cross-rig infrastructure work + - Handles lifecycle requests (respawns, restarts) + - Receives heartbeat pokes and decides what needs attention + +The Deacon patrols the town; Witnesses patrol their rigs; Polecats work. + +Role shortcuts: "deacon" in mail/nudge addresses resolves to this agent.`, } var deaconStartCmd = &cobra.Command{ diff --git a/internal/cmd/dog.go b/internal/cmd/dog.go index 78c0702b..71459d9c 100644 --- a/internal/cmd/dog.go +++ b/internal/cmd/dog.go @@ -40,14 +40,22 @@ var dogCmd = &cobra.Command{ Use: "dog", Aliases: []string{"dogs"}, GroupID: GroupAgents, - Short: "Manage dogs (Deacon's helper workers)", - Long: `Manage dogs in the kennel. + Short: "Manage dogs (cross-rig infrastructure workers)", + Long: `Manage dogs - reusable workers for infrastructure and cleanup. -Dogs are reusable helper workers managed by the Deacon for infrastructure -and cleanup tasks. Unlike polecats (single-rig, ephemeral), dogs handle -cross-rig infrastructure work with worktrees into each rig. +CATS VS DOGS: + Polecats (cats) build features. One rig. Ephemeral (one task, then nuked). + Dogs clean up messes. Cross-rig. Reusable (multiple tasks, eventually recycled). -The kennel is located at ~/gt/deacon/dogs/.`, +Dogs are managed by the Deacon for town-level work: + - Infrastructure tasks (rebuilding, syncing, migrations) + - Cleanup operations (orphan branches, stale files) + - Cross-rig work that spans multiple projects + +Each dog has worktrees into every configured rig, enabling cross-project +operations. Dogs return to idle state after completing work (unlike cats). + +The kennel is at ~/gt/deacon/dogs/. The Deacon dispatches work to dogs.`, } var dogAddCmd = &cobra.Command{ diff --git a/internal/cmd/mayor.go b/internal/cmd/mayor.go index efaffec1..c8040e14 100644 --- a/internal/cmd/mayor.go +++ b/internal/cmd/mayor.go @@ -16,12 +16,20 @@ var mayorCmd = &cobra.Command{ Use: "mayor", Aliases: []string{"may"}, GroupID: GroupAgents, - Short: "Manage the Mayor session", + Short: "Manage the Mayor (Chief of Staff for cross-rig coordination)", RunE: requireSubcommand, - Long: `Manage the Mayor tmux session. + Long: `Manage the Mayor - the Overseer's Chief of Staff. -The Mayor is the global coordinator for Gas Town, running as a persistent -tmux session. Use the subcommands to start, stop, attach, and check status.`, +The Mayor is the global coordinator for Gas Town: + - Receives escalations from Witnesses and Deacon + - Coordinates work across multiple rigs + - Handles human communication when needed + - Routes strategic decisions and cross-project issues + +The Mayor is the primary interface between the human Overseer and the +automated agents. When in doubt, escalate to the Mayor. + +Role shortcuts: "mayor" in mail/nudge addresses resolves to this agent.`, } var mayorAgentOverride string diff --git a/internal/cmd/nudge.go b/internal/cmd/nudge.go index de9b9cd5..f6eab8a8 100644 --- a/internal/cmd/nudge.go +++ b/internal/cmd/nudge.go @@ -27,8 +27,12 @@ func init() { var nudgeCmd = &cobra.Command{ Use: "nudge [message]", GroupID: GroupComm, - Short: "Send a message to a polecat or deacon session reliably", - Long: `Sends a message to a polecat's or deacon's Claude Code session. + Short: "Send a synchronous message to any Gas Town worker", + Long: `Universal synchronous messaging API for Gas Town worker-to-worker communication. + +Delivers a message directly to any worker's Claude Code session: polecats, crew, +witness, refinery, mayor, or deacon. Use this for real-time coordination when +you need immediate attention from another worker. Uses a reliable delivery pattern: 1. Sends text in literal mode (-l flag) diff --git a/internal/cmd/polecat.go b/internal/cmd/polecat.go index 7948ed51..74b2051a 100644 --- a/internal/cmd/polecat.go +++ b/internal/cmd/polecat.go @@ -32,12 +32,25 @@ var polecatCmd = &cobra.Command{ Use: "polecat", Aliases: []string{"polecats"}, GroupID: GroupAgents, - Short: "Manage polecats in rigs", + Short: "Manage polecats (ephemeral workers, one task then nuked)", RunE: requireSubcommand, Long: `Manage polecat lifecycle in rigs. -Polecats are worker agents that operate in their own git worktrees. -Use the subcommands to add, remove, list, wake, and sleep polecats.`, +Polecats are EPHEMERAL workers: spawned for one task, nuked when done. +There is NO idle state. A polecat is either: + - Working: Actively doing assigned work + - Stalled: Session crashed mid-work (needs Witness intervention) + - Zombie: Finished but gt done failed (needs cleanup) + +Self-cleaning model: When work completes, the polecat runs 'gt done', +which pushes the branch, submits to the merge queue, and exits. The +Witness then nukes the sandbox. Polecats don't wait for more work. + +Session vs sandbox: The Claude session cycles frequently (handoffs, +compaction). The git worktree (sandbox) persists until nuke. Work +survives session restarts. + +Cats build features. Dogs clean up messes.`, } var polecatListCmd = &cobra.Command{ diff --git a/internal/cmd/refinery.go b/internal/cmd/refinery.go index d0de226e..bc620b76 100644 --- a/internal/cmd/refinery.go +++ b/internal/cmd/refinery.go @@ -26,12 +26,23 @@ var refineryCmd = &cobra.Command{ Use: "refinery", Aliases: []string{"ref"}, GroupID: GroupAgents, - Short: "Manage the merge queue processor", + Short: "Manage the Refinery (merge queue processor)", RunE: requireSubcommand, - Long: `Manage the Refinery merge queue processor for a rig. + Long: `Manage the Refinery - the per-rig merge queue processor. -The Refinery processes merge requests from polecats, merging their work -into integration branches and ultimately to main.`, +The Refinery serializes all merges to main for a rig: + - Receives MRs submitted by polecats (via gt done) + - Rebases work branches onto latest main + - Runs validation (tests, builds, checks) + - Merges to main when clear + - If conflict: spawns FRESH polecat to re-implement (original is gone) + +Work flows: Polecat completes → gt done → MR in queue → Refinery merges. +The polecat is already nuked by the time the Refinery processes. + +One Refinery per rig. Persistent agent that processes work as it arrives. + +Role shortcuts: "refinery" in mail/nudge addresses resolves to this rig's Refinery.`, } var refineryStartCmd = &cobra.Command{ diff --git a/internal/cmd/witness.go b/internal/cmd/witness.go index 66a7cff1..0b5f5de0 100644 --- a/internal/cmd/witness.go +++ b/internal/cmd/witness.go @@ -24,16 +24,23 @@ var ( var witnessCmd = &cobra.Command{ Use: "witness", GroupID: GroupAgents, - Short: "Manage the polecat monitoring agent", + Short: "Manage the Witness (per-rig polecat health monitor)", RunE: requireSubcommand, - Long: `Manage the Witness monitoring agent for a rig. + Long: `Manage the Witness - the per-rig polecat health monitor. -The Witness monitors polecats for stuck states and orphaned sandboxes, -nudges polecats that seem blocked, and reports status to the mayor. +The Witness patrols a single rig, watching over its polecats: + - Detects stalled polecats (crashed or stuck mid-work) + - Nudges unresponsive sessions back to life + - Cleans up zombie polecats (finished but failed to exit) + - Nukes sandboxes when polecats complete via 'gt done' -In the self-cleaning model, polecats nuke themselves after work completion. -The Witness handles edge cases: crashed sessions, orphaned worktrees, and -stuck polecats that need intervention.`, +The Witness does NOT force session cycles or interrupt working polecats. +Polecats manage their own sessions (via gt handoff). The Witness handles +failures and edge cases only. + +One Witness per rig. The Deacon monitors all Witnesses. + +Role shortcuts: "witness" in mail/nudge addresses resolves to this rig's Witness.`, } var witnessStartCmd = &cobra.Command{ diff --git a/internal/formula/formulas/mol-deacon-patrol.formula.toml b/internal/formula/formulas/mol-deacon-patrol.formula.toml index ea60239a..7ec83e38 100644 --- a/internal/formula/formulas/mol-deacon-patrol.formula.toml +++ b/internal/formula/formulas/mol-deacon-patrol.formula.toml @@ -84,10 +84,46 @@ Callbacks may spawn new polecats, update issue state, or trigger other actions. **Hygiene principle**: Archive messages after they're fully processed. Keep inbox near-empty - only unprocessed items should remain.""" +[[steps]] +id = "orphan-process-cleanup" +title = "Clean up orphaned claude subagent processes" +needs = ["inbox-check"] +description = """ +Clean up orphaned claude subagent processes. + +Claude Code's Task tool spawns subagent processes that sometimes don't clean up +properly after completion. These accumulate and consume significant memory. + +**Detection method:** +Orphaned processes have no controlling terminal (TTY = "?"). Legitimate claude +instances in terminals have a TTY like "pts/0". + +**Run cleanup:** +```bash +gt deacon cleanup-orphans +``` + +This command: +1. Lists all claude/codex processes with `ps -eo pid,tty,comm` +2. Filters for TTY = "?" (no controlling terminal) +3. Sends SIGTERM to each orphaned process +4. Reports how many were killed + +**Why this is safe:** +- Processes in terminals (your personal sessions) have a TTY - they won't be touched +- Only kills processes that have no controlling terminal +- These orphans are children of the tmux server with no TTY, indicating they're + detached subagents that failed to exit + +**If cleanup fails:** +Log the error but continue patrol - this is best-effort cleanup. + +**Exit criteria:** Orphan cleanup attempted (success or logged failure).""" + [[steps]] id = "trigger-pending-spawns" title = "Nudge newly spawned polecats" -needs = ["inbox-check"] +needs = ["orphan-process-cleanup"] description = """ Nudge newly spawned polecats that are ready for input. diff --git a/internal/formula/formulas/mol-witness-patrol.formula.toml b/internal/formula/formulas/mol-witness-patrol.formula.toml index 630c2f0b..12c612b9 100644 --- a/internal/formula/formulas/mol-witness-patrol.formula.toml +++ b/internal/formula/formulas/mol-witness-patrol.formula.toml @@ -38,7 +38,7 @@ needs = ['check-timer-gates'] title = 'Check if active swarm is complete' [[steps]] -description = "Send WITNESS_PING to Deacon for second-order monitoring.\n\nThe Witness fleet collectively monitors Deacon health - this prevents the\n\"who watches the watchers\" problem. If Deacon dies, Witnesses detect it.\n\n**Step 1: Send ping**\n```bash\ngt mail send deacon/ -s \"WITNESS_PING \" -m \"Rig: \nTimestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)\nPatrol: \"\n```\n\n**Step 2: Check Deacon health**\n```bash\n# Check Deacon agent bead for last_activity\nbd list --type=agent --json | jq '.[] | select(.description | contains(\"role_type: deacon\"))'\n```\n\nLook at the `last_activity` timestamp. If stale (>5 minutes since last update):\n- Deacon may be dead or stuck\n\n**Step 3: Escalate if needed**\n```bash\n# If Deacon appears down\ngt mail send mayor/ -s \"ALERT: Town-level Deacon appears unresponsive\" -m \"Town Deacon (hq-deacon) has no activity for >5 minutes.\nLast seen: \nWitness: /witness\"\n```\n\nNote: Multiple Witnesses may send this alert. Mayor should handle deduplication." +description = "Send WITNESS_PING to Deacon for second-order monitoring.\n\nThe Witness fleet collectively monitors Deacon health - this prevents the\n\"who watches the watchers\" problem. If Deacon dies, Witnesses detect it.\n\n**Step 1: Send ping**\n```bash\ngt mail send deacon/ -s \"WITNESS_PING \" -m \"Rig: \nTimestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)\nPatrol: \"\n```\n\n**Step 2: Check Deacon health**\n```bash\n# Check Deacon agent bead for last_activity\nbd list --type=agent --json | jq '.[] | select(.description | contains(\"deacon\"))'\n```\n\nLook at the `last_activity` timestamp. If stale (>5 minutes since last update):\n- Deacon may be dead or stuck\n\n**Step 3: Escalate if needed**\n```bash\n# If Deacon appears down\ngt mail send mayor/ -s \"ALERT: Deacon appears unresponsive\" -m \"No Deacon activity for >5 minutes.\nLast seen: \nWitness: /witness\"\n```\n\nNote: Multiple Witnesses may send this alert. Mayor should handle deduplication." id = 'ping-deacon' needs = ['check-swarm-completion'] title = 'Ping Deacon for health check' diff --git a/internal/templates/roles/crew.md.tmpl b/internal/templates/roles/crew.md.tmpl index 34cdf011..42de7b38 100644 --- a/internal/templates/roles/crew.md.tmpl +++ b/internal/templates/roles/crew.md.tmpl @@ -308,9 +308,49 @@ ONE exception where branches are created. But the rule still applies: - `bd sync` - Sync beads changes ### Communication -- `gt mail send -s "Subject" -m "Message"` - Send mail +- `gt mail send -s "Subject" -m "Message"` - Send mail (async, queued) - `gt mail send mayor/ -s "Subject" -m "Message"` - To Mayor - `gt mail send --human -s "Subject" -m "Message"` - To overseer +- `gt nudge "message"` - Wake an agent and send immediate message + +### gt nudge: Waking Agents + +`gt nudge` is the **core mechanism for inter-agent communication**. It sends a message +directly to another agent's Claude Code session via tmux. + +**When to use nudge vs mail:** +| Use Case | Tool | Why | +|----------|------|-----| +| Wake a sleeping agent | `gt nudge` | Immediate delivery to their session | +| Send task for later | `gt mail send` | Queued, they'll see it on next check | +| Both: assign + wake | `gt mail send` then `gt nudge` | Mail carries payload, nudge wakes them | + +**Common patterns:** +```bash +# Wake another crew member (full path: rig/crew/name) +gt nudge {{ .RigName }}/crew/peer "Check your mail - PR review waiting" + +# Wake a polecat (full path: rig/polecats/name) +gt nudge {{ .RigName }}/polecats/alpha "Work available on hook" + +# Nudge with notification flag (also sends tmux bell) +gt mail send {{ .RigName }}/peer -s "Urgent" -m "..." --notify + +# Nudge patrol agents +gt nudge witness "Check polecat health" +gt nudge deacon "Session started" +gt nudge mayor "Status update needed" +``` + +**Target shortcuts:** +- `mayor` → gt-mayor session +- `deacon` → gt-deacon session +- `witness` → gt-{{ .RigName }}-witness session +- `refinery` → gt-{{ .RigName }}-refinery session +- `channel:` → All members of a named channel + +**Important:** `gt nudge` is the ONLY reliable way to send text to Claude sessions. +Raw `tmux send-keys` is unreliable. Always use `gt nudge` for agent-to-agent communication. ## No Witness Monitoring