From 049dd13cd1ce487cc30bae687328e0f41c5b7d77 Mon Sep 17 00:00:00 2001 From: Steve Yegge Date: Tue, 23 Dec 2025 00:17:35 -0800 Subject: [PATCH 1/2] Witness: Verify POLECAT_DONE before stopping sessions (gt-ldk8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add handling for POLECAT_DONE messages in processShutdownRequests() - Track which polecats have signaled done (using SpawnedIssues with "done:" prefix) - For LIFECYCLE:shutdown requests, wait for POLECAT_DONE before cleanup - Add checkPendingCompletions() to nudge polecats with closed issues - Add 10-minute timeout with force-kill after waiting for POLECAT_DONE - Protects against losing MR submissions when Witness cleans up too early 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- internal/witness/manager.go | 257 +++++++++++++++++++++++++++++++++++- 1 file changed, 256 insertions(+), 1 deletion(-) diff --git a/internal/witness/manager.go b/internal/witness/manager.go index d1f1d2e4..fdb3c9d2 100644 --- a/internal/witness/manager.go +++ b/internal/witness/manager.go @@ -189,6 +189,11 @@ func (m *Manager) checkAndProcess(w *Witness) { fmt.Printf("Shutdown request error: %v\n", err) } + // Check for polecats with closed issues that haven't signaled done + if err := m.checkPendingCompletions(w); err != nil { + fmt.Printf("Pending completions check error: %v\n", err) + } + // Auto-spawn for ready work (if enabled) if w.Config.AutoSpawn { if err := m.autoSpawnForReadyWork(w); err != nil { @@ -391,7 +396,48 @@ func (m *Manager) processShutdownRequests(w *Witness) error { } for _, msg := range messages { - // Look for LIFECYCLE requests + // Handle POLECAT_DONE messages (polecat has completed work and is ready for cleanup) + if strings.HasPrefix(msg.Subject, "POLECAT_DONE ") { + polecatName := extractPolecatNameFromDone(msg.Subject) + if polecatName == "" { + fmt.Printf("Warning: could not extract polecat name from POLECAT_DONE message\n") + m.ackMessage(msg.ID) + continue + } + + fmt.Printf("Processing POLECAT_DONE from %s\n", polecatName) + + // Record that this polecat has signaled done + m.recordDone(w, polecatName) + + // Verify polecat state before cleanup + if err := m.verifyPolecatState(polecatName); err != nil { + fmt.Printf(" Verification failed: %v\n", err) + + // Send nudge to polecat to fix state + if err := m.sendNudge(polecatName, err.Error()); err != nil { + fmt.Printf(" Warning: failed to send nudge: %v\n", err) + } + + // Don't ack message - will retry on next check + continue + } + + // Perform cleanup + if err := m.cleanupPolecat(polecatName); err != nil { + fmt.Printf(" Cleanup error: %v\n", err) + // Don't ack message on error - will retry + continue + } + + fmt.Printf(" Cleanup complete\n") + + // Acknowledge the message + m.ackMessage(msg.ID) + continue + } + + // Handle LIFECYCLE shutdown requests (legacy/Deacon-managed) if strings.Contains(msg.Subject, "LIFECYCLE:") && strings.Contains(msg.Subject, "shutdown") { fmt.Printf("Processing shutdown request: %s\n", msg.Subject) @@ -405,6 +451,19 @@ func (m *Manager) processShutdownRequests(w *Witness) error { fmt.Printf(" Polecat: %s\n", polecatName) + // SAFETY: Only cleanup if polecat has sent POLECAT_DONE + if !m.hasSentDone(w, polecatName) { + fmt.Printf(" Waiting for POLECAT_DONE from %s before cleanup\n", polecatName) + + // Send reminder to polecat to complete shutdown sequence + if err := m.sendNudge(polecatName, "Please run 'gt done' to signal completion"); err != nil { + fmt.Printf(" Warning: failed to send nudge: %v\n", err) + } + + // Don't ack message - will retry on next check + continue + } + // Verify polecat state before cleanup if err := m.verifyPolecatState(polecatName); err != nil { fmt.Printf(" Verification failed: %v\n", err) @@ -553,6 +612,202 @@ func extractPolecatName(body string) string { return "" } +// extractPolecatNameFromDone extracts the polecat name from a POLECAT_DONE subject. +// Subject format: "POLECAT_DONE {name}" +func extractPolecatNameFromDone(subject string) string { + const prefix = "POLECAT_DONE " + if strings.HasPrefix(subject, prefix) { + return strings.TrimSpace(subject[len(prefix):]) + } + return "" +} + +// recordDone records that a polecat has sent POLECAT_DONE. +// Uses SpawnedIssues with "done:" prefix to track. +func (m *Manager) recordDone(w *Witness, polecatName string) { + doneKey := "done:" + polecatName + // Don't record duplicates + for _, entry := range w.SpawnedIssues { + if entry == doneKey { + return + } + } + w.SpawnedIssues = append(w.SpawnedIssues, doneKey) + _ = m.saveState(w) +} + +// hasSentDone checks if a polecat has sent POLECAT_DONE. +func (m *Manager) hasSentDone(w *Witness, polecatName string) bool { + doneKey := "done:" + polecatName + for _, entry := range w.SpawnedIssues { + if entry == doneKey { + return true + } + } + return false +} + +// PendingCompletionTimeout is how long to wait for POLECAT_DONE after issue is closed +// before force-killing the polecat session. +const PendingCompletionTimeout = 10 * time.Minute + +// checkPendingCompletions checks for polecats with closed issues that haven't sent POLECAT_DONE. +// It nudges them to complete, and force-kills after timeout. +func (m *Manager) checkPendingCompletions(w *Witness) error { + polecatMgr := polecat.NewManager(m.rig, git.NewGit(m.rig.Path)) + polecats, err := polecatMgr.List() + if err != nil { + return fmt.Errorf("listing polecats: %w", err) + } + + t := tmux.NewTmux() + sessMgr := session.NewManager(t, m.rig) + + for _, p := range polecats { + // Skip if not running + running, _ := sessMgr.IsRunning(p.Name) + if !running { + continue + } + + // Skip if already signaled done + if m.hasSentDone(w, p.Name) { + continue + } + + // Check if the polecat's issue is closed + issueID := m.getPolecatIssue(p.Name, p.ClonePath) + if issueID == "" { + continue + } + + closed, err := m.isIssueClosed(issueID) + if err != nil || !closed { + continue + } + + // Issue is closed but polecat hasn't sent POLECAT_DONE + waitKey := "waiting:" + p.Name + waitingSince := m.getWaitingTimestamp(w, waitKey) + + if waitingSince.IsZero() { + // First detection - record timestamp and nudge + fmt.Printf("Issue %s is closed but polecat %s hasn't signaled done\n", issueID, p.Name) + m.recordWaiting(w, waitKey) + if err := m.sendNudge(p.Name, "Your issue is closed. Please run 'gt done' to complete shutdown."); err != nil { + fmt.Printf(" Warning: failed to send nudge: %v\n", err) + } + } else if time.Since(waitingSince) > PendingCompletionTimeout { + // Timeout reached - force cleanup + fmt.Printf("Timeout waiting for POLECAT_DONE from %s, force cleaning up\n", p.Name) + + // Verify state first (this still protects uncommitted work) + if err := m.verifyPolecatState(p.Name); err != nil { + fmt.Printf(" Cannot force cleanup - %v\n", err) + // Escalate to Mayor + m.escalateToMayor(p.Name) + continue + } + + if err := m.cleanupPolecat(p.Name); err != nil { + fmt.Printf(" Force cleanup failed: %v\n", err) + continue + } + + // Clean up tracking + m.clearWaiting(w, waitKey) + } else { + // Still waiting + elapsed := time.Since(waitingSince).Round(time.Minute) + remaining := (PendingCompletionTimeout - time.Since(waitingSince)).Round(time.Minute) + fmt.Printf("Waiting for POLECAT_DONE from %s (elapsed: %v, timeout in: %v)\n", + p.Name, elapsed, remaining) + } + } + + return nil +} + +// getPolecatIssue tries to determine which issue a polecat is working on. +func (m *Manager) getPolecatIssue(polecatName, polecatPath string) string { + // Try to read from state file + stateFile := filepath.Join(polecatPath, ".runtime", "state.json") + data, err := os.ReadFile(stateFile) + if err != nil { + return "" + } + + var state struct { + IssueID string `json:"issue_id"` + } + if err := json.Unmarshal(data, &state); err != nil { + return "" + } + + return state.IssueID +} + +// isIssueClosed checks if an issue is closed. +func (m *Manager) isIssueClosed(issueID string) (bool, error) { + cmd := exec.Command("bd", "show", issueID, "--json") + cmd.Dir = m.workDir + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return false, fmt.Errorf("%s", stderr.String()) + } + + // Parse to check status + var issues []struct { + Status string `json:"status"` + } + if err := json.Unmarshal(stdout.Bytes(), &issues); err != nil { + return false, err + } + + if len(issues) == 0 { + return false, nil + } + + return issues[0].Status == "closed", nil +} + +// getWaitingTimestamp retrieves when we started waiting for a polecat. +func (m *Manager) getWaitingTimestamp(w *Witness, key string) time.Time { + // Parse timestamps from SpawnedIssues with "waiting:{name}:{timestamp}" format + for _, entry := range w.SpawnedIssues { + if strings.HasPrefix(entry, key+":") { + tsStr := entry[len(key)+1:] + if ts, err := time.Parse(time.RFC3339, tsStr); err == nil { + return ts + } + } + } + return time.Time{} +} + +// recordWaiting records when we started waiting for a polecat to complete. +func (m *Manager) recordWaiting(w *Witness, key string) { + entry := fmt.Sprintf("%s:%s", key, time.Now().Format(time.RFC3339)) + w.SpawnedIssues = append(w.SpawnedIssues, entry) + _ = m.saveState(w) +} + +// clearWaiting removes the waiting timestamp for a polecat. +func (m *Manager) clearWaiting(w *Witness, key string) { + var filtered []string + for _, entry := range w.SpawnedIssues { + if !strings.HasPrefix(entry, key) { + filtered = append(filtered, entry) + } + } + w.SpawnedIssues = filtered + _ = m.saveState(w) +} + // cleanupPolecat performs the full cleanup sequence for a transient polecat. // 1. Check for uncommitted work (stubbornly refuses to lose work) // 2. Kill session From 1287aadeb862a64fe30f647803088345c237122e Mon Sep 17 00:00:00 2001 From: Steve Yegge Date: Tue, 23 Dec 2025 00:17:58 -0800 Subject: [PATCH 2/2] bd sync: 2025-12-23 00:17:58 --- .beads/issues.jsonl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index e3c4b3b7..71a75edf 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -269,7 +269,7 @@ {"id":"gt-ca4v.7","title":"More branches to process?","description":"More branches to process?\n\nIf yes: Return to process-branch with next branch.\nIf no: Continue to generate-summary.\n\nTrack: branches processed, branches skipped (with reasons).\n\ninstantiated_from: mol-refinery-patrol\nstep: loop-check","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-22T17:13:47.746798-08:00","updated_at":"2025-12-22T17:13:47.746798-08:00","dependencies":[{"issue_id":"gt-ca4v.7","depends_on_id":"gt-ca4v","type":"parent-child","created_at":"2025-12-22T17:13:47.747125-08:00","created_by":"daemon"},{"issue_id":"gt-ca4v.7","depends_on_id":"gt-ca4v.6","type":"blocks","created_at":"2025-12-22T17:13:48.464174-08:00","created_by":"daemon"}]} {"id":"gt-ca4v.8","title":"Summarize this patrol cycle.","description":"Summarize this patrol cycle.\n\nInclude:\n- Branches processed (count, names)\n- Test results (pass/fail)\n- Issues filed (if any)\n- Branches skipped (with reasons)\n- Any escalations sent\n\nThis becomes the digest when the patrol is squashed.\n\ninstantiated_from: mol-refinery-patrol\nstep: generate-summary","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-22T17:13:47.827331-08:00","updated_at":"2025-12-22T17:13:47.827331-08:00","dependencies":[{"issue_id":"gt-ca4v.8","depends_on_id":"gt-ca4v","type":"parent-child","created_at":"2025-12-22T17:13:47.827682-08:00","created_by":"daemon"},{"issue_id":"gt-ca4v.8","depends_on_id":"gt-ca4v.7","type":"blocks","created_at":"2025-12-22T17:13:48.545582-08:00","created_by":"daemon"}]} {"id":"gt-ca4v.9","title":"Check own context usage.","description":"Check own context usage.\n\nIf context is HIGH (\u003e80%):\n- Write handoff summary\n- Prepare for burn/respawn\n\nIf context is LOW:\n- Can continue processing\n\ninstantiated_from: mol-refinery-patrol\nstep: context-check","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-22T17:13:47.909098-08:00","updated_at":"2025-12-22T17:13:47.909098-08:00","dependencies":[{"issue_id":"gt-ca4v.9","depends_on_id":"gt-ca4v","type":"parent-child","created_at":"2025-12-22T17:13:47.909418-08:00","created_by":"daemon"},{"issue_id":"gt-ca4v.9","depends_on_id":"gt-ca4v.8","type":"blocks","created_at":"2025-12-22T17:13:48.621628-08:00","created_by":"daemon"}]} -{"id":"gt-caih","title":"Witness handoff bead state persistence","description":"Implement state persistence for Witness across wisp cycles.\n\n## Problem\nWisps burn between cycles, but Witness needs to remember:\n- Which workers have been nudged\n- How many times (nudge_count)\n- When was last nudge\n- Last observed activity\n\n## Solution\nWitness handoff bead with worker_states field:\n\n```json\n{\n \"id\": \"gt-witness-state\",\n \"type\": \"handoff\",\n \"assignee\": \"\u003crig\u003e/witness\",\n \"pinned\": true,\n \"worker_states\": {\n \"furiosa\": {\n \"issue\": \"gt-123\",\n \"nudge_count\": 2,\n \"last_nudge\": \"2024-12-22T10:00:00Z\"\n }\n },\n \"last_patrol\": \"2024-12-22T10:05:00Z\"\n}\n```\n\n## Implementation\n1. On patrol start: bd show \u003cwitness-handoff-id\u003e to load state\n2. During patrol: update in-memory state\n3. On save-state step: bd update to persist\n4. State survives wisp burn/squash\n\n## Depends on\n- gt-83k0 (mol-witness-patrol definition)","status":"in_progress","priority":1,"issue_type":"task","assignee":"gastown/furiosa","created_at":"2025-12-22T16:42:57.427131-08:00","updated_at":"2025-12-23T00:02:54.779253-08:00","dependencies":[{"issue_id":"gt-caih","depends_on_id":"gt-83k0","type":"blocks","created_at":"2025-12-22T16:43:59.609821-08:00","created_by":"daemon"}]} +{"id":"gt-caih","title":"Witness handoff bead state persistence","description":"Implement state persistence for Witness across wisp cycles.\n\n## Problem\nWisps burn between cycles, but Witness needs to remember:\n- Which workers have been nudged\n- How many times (nudge_count)\n- When was last nudge\n- Last observed activity\n\n## Solution\nWitness handoff bead with worker_states field:\n\n```json\n{\n \"id\": \"gt-witness-state\",\n \"type\": \"handoff\",\n \"assignee\": \"\u003crig\u003e/witness\",\n \"pinned\": true,\n \"worker_states\": {\n \"furiosa\": {\n \"issue\": \"gt-123\",\n \"nudge_count\": 2,\n \"last_nudge\": \"2024-12-22T10:00:00Z\"\n }\n },\n \"last_patrol\": \"2024-12-22T10:05:00Z\"\n}\n```\n\n## Implementation\n1. On patrol start: bd show \u003cwitness-handoff-id\u003e to load state\n2. During patrol: update in-memory state\n3. On save-state step: bd update to persist\n4. State survives wisp burn/squash\n\n## Depends on\n- gt-83k0 (mol-witness-patrol definition)","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-22T16:42:57.427131-08:00","updated_at":"2025-12-22T16:42:57.427131-08:00","dependencies":[{"issue_id":"gt-caih","depends_on_id":"gt-83k0","type":"blocks","created_at":"2025-12-22T16:43:59.609821-08:00","created_by":"daemon"}]} {"id":"gt-caz","title":"Timed Beads: Scheduled recurring work","description":"## Summary\n\nTimed beads wake up periodically and get injected into the ready queue by the daemon.\n\n## Schema Extension\n\n```yaml\nid: gt-weekly-sync\ntype: task # or sentinel\nschedule: \"0 9 * * 1\" # cron: Monday 9am\n# OR\ninterval: 24h # every 24 hours\ntier: haiku # cheap model for routine checks\nnext_run: 2025-12-20T09:00:00Z\n```\n\n## Daemon Integration\n\nDaemon heartbeat loop:\n1. Check timed beads where `next_run \u003c= now`\n2. For each due bead:\n - Inject into ready queue (set status to open if needed)\n - Update `next_run` based on schedule/interval\n3. Witnesses pick up work via `bd ready`\n\n## Use Cases\n\n- Weekly team sync reminders\n- Daily health checks\n- Periodic cleanup tasks\n- Scheduled reports\n\n## Interaction with Pinned Beads\n\nA pinned bead can be timed - it wakes up periodically but never closes.\nThis is how you model \"background services\" in Gas Town.","status":"open","priority":2,"issue_type":"epic","created_at":"2025-12-18T18:07:39.665294-08:00","updated_at":"2025-12-18T18:07:39.665294-08:00"} {"id":"gt-cik","title":"Overseer Crew: User-managed persistent workspaces","description":"## Overview\n\nCrew workers are the overseer's (human's) personal workspaces within a rig. Unlike polecats which are witness-managed and ephemeral, crew workers are:\n\n- **Persistent**: Not auto-garbage-collected\n- **User-managed**: Overseer controls lifecycle\n- **Long-lived identities**: dave, emma, fred - recognizable names\n- **Gas Town integrated**: Mail, handoff mechanics work\n- **Tmux optional**: Can work in terminal directly\n\n## Directory Structure\n\n```\n\u003crig\u003e/\n polecats/ # Managed workers (witness controls)\n refinery/ # Merge queue processor\n witness/ # Pit boss\n crew/ # Overseer's personal workspaces\n dave/ # Full clone, persistent\n emma/ # Full clone, persistent\n fred/ # Full clone, persistent\n```\n\n## Key Differences from Polecats\n\n- Location: crew/ instead of polecats/\n- Lifecycle: User-managed, not witness-managed\n- Auto-cleanup: Never (polecats auto-cleanup on swarm land)\n- Issue assignment: Optional (polecats require it)\n- Tmux: Optional (polecats require it)\n- Mail \u0026 Handoff: Yes for both\n- Identity: Persistent (polecats are ephemeral)\n\n## CLI Commands\n\n- gt crew add \u003cname\u003e [--rig \u003crig\u003e] - Create crew workspace\n- gt crew list [--rig \u003crig\u003e] - List crew workspaces\n- gt crew at \u003crig\u003e/\u003cname\u003e - Attach to workspace (start session)\n- gt crew attach \u003cname\u003e - Attach (infer rig from cwd)\n- gt crew refresh \u003cname\u003e - Handoff + restart (context cycling)\n- gt crew remove \u003cname\u003e [--force] - Remove workspace\n- gt crew status [\u003cname\u003e] - Show workspace status\n\n## Design Notes\n\n- Crew workers use full git clones (not worktrees)\n- Optional beads integration via BEADS_DIR\n- Mail-to-self handoff works for context cycling\n- No witness monitoring or nudging\n- No automatic issue assignment required\n\n## Background\n\nUsers often maintain separate repo clones for serial agent work. This is tedious to set up manually. Crew workspaces bring these into Gas Town's infrastructure while keeping user control.","status":"closed","priority":1,"issue_type":"epic","created_at":"2025-12-16T16:47:37.529887-08:00","updated_at":"2025-12-16T20:59:46.13518-08:00","closed_at":"2025-12-16T20:59:46.13518-08:00"} {"id":"gt-cik.1","title":"Crew directory structure and config","description":"Add crew/ directory support to rig structure. Include:\n- crew/ as peer to polecats/, refinery/, witness/\n- Crew worker subdirectories with full git clones\n- Optional BEADS_DIR configuration for beads integration\n- Crew state tracking (separate from polecat state)","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-16T16:48:00.285499-08:00","updated_at":"2025-12-16T20:47:23.003869-08:00","closed_at":"2025-12-16T20:31:23.558027-08:00","dependencies":[{"issue_id":"gt-cik.1","depends_on_id":"gt-cik","type":"parent-child","created_at":"2025-12-16T16:48:00.28789-08:00","created_by":"daemon"}]} @@ -414,7 +414,7 @@ {"id":"gt-iua8","title":"Merge: gt-frs","description":"branch: polecat/Slit\ntarget: main\nsource_issue: gt-frs\nrig: gastown","status":"closed","priority":2,"issue_type":"merge-request","created_at":"2025-12-19T16:30:05.529099-08:00","updated_at":"2025-12-19T18:26:14.104887-08:00","closed_at":"2025-12-19T17:48:44.654109-08:00"} {"id":"gt-j4nu","title":"Merge: gt-g44u.3","description":"branch: polecat/Ace\ntarget: main\nsource_issue: gt-g44u.3\nrig: gastown","status":"closed","priority":0,"issue_type":"merge-request","created_at":"2025-12-19T16:14:52.767156-08:00","updated_at":"2025-12-19T17:35:36.663796-08:00","closed_at":"2025-12-19T17:35:36.663796-08:00"} {"id":"gt-j5tk","title":"Work assignment messages should auto-close on completion","description":"When a polecat completes work on an issue, the work assignment message (msg-type:task) stays open. Found 7 stale work assignments in gastown after swarm completed.\n\nProposal: When bd close is called on an issue, auto-close any work assignment messages that reference that issue in their body.\n\nAlternative: Work assignment messages could use a different lifecycle - perhaps they should be acked (closed) when the polecat starts working, not when they finish.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-20T03:12:28.403974-08:00","updated_at":"2025-12-20T03:12:28.403974-08:00"} -{"id":"gt-j6s8","title":"Refinery startup: bond mol-refinery-patrol on start","description":"Wire up Refinery to automatically bond its patrol molecule on startup.\n\n## Current state\n- mol-refinery-patrol exists in builtin_molecules.go\n- prompts/roles/refinery.md describes the protocol\n- Refinery doesn't auto-bond on startup\n\n## Desired behavior\nOn Refinery session start:\n1. gt prime detects RoleRefinery\n2. Check for existing in-progress patrol: bd list --status=in_progress --assignee=refinery\n3. If found: resume from current step\n4. If not found: bd mol bond mol-refinery-patrol --wisp\n5. Output patrol context to agent\n\n## Implementation options\nA) Add to gt prime (outputRefineryPatrolContext)\nB) Add startup hook in refinery CLAUDE.md\nC) Both (prime detects, template reinforces)\n\n## Testing\n- Start refinery session\n- Verify patrol bonds automatically\n- Kill mid-patrol, restart, verify resumes\n\n## Depends on\n- gt-3x0z.10 (existing issue for Refinery patrol)","status":"in_progress","priority":1,"issue_type":"task","assignee":"gastown/dementus","created_at":"2025-12-22T16:43:34.739741-08:00","updated_at":"2025-12-23T00:02:35.269457-08:00"} +{"id":"gt-j6s8","title":"Refinery startup: bond mol-refinery-patrol on start","description":"Wire up Refinery to automatically bond its patrol molecule on startup.\n\n## Current state\n- mol-refinery-patrol exists in builtin_molecules.go\n- prompts/roles/refinery.md describes the protocol\n- Refinery doesn't auto-bond on startup\n\n## Desired behavior\nOn Refinery session start:\n1. gt prime detects RoleRefinery\n2. Check for existing in-progress patrol: bd list --status=in_progress --assignee=refinery\n3. If found: resume from current step\n4. If not found: bd mol bond mol-refinery-patrol --wisp\n5. Output patrol context to agent\n\n## Implementation options\nA) Add to gt prime (outputRefineryPatrolContext)\nB) Add startup hook in refinery CLAUDE.md\nC) Both (prime detects, template reinforces)\n\n## Testing\n- Start refinery session\n- Verify patrol bonds automatically\n- Kill mid-patrol, restart, verify resumes\n\n## Depends on\n- gt-3x0z.10 (existing issue for Refinery patrol)","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-22T16:43:34.739741-08:00","updated_at":"2025-12-22T16:43:34.739741-08:00"} {"id":"gt-j87","title":"Design: Work flow simulation and validation","description":"Validate GGT designs through simulation before implementation.\n\n## Validation Approaches\n\n### 1. Dry-Run Simulation (Recommended First)\nMayor walks through scenarios mentally/on paper:\n- \"If polecat Toast signals done with dirty git state, what happens?\"\n- \"If Witness context fills mid-verification, what state is lost?\"\n- \"If two polecats try to close same issue, what happens?\"\n\nCreate beads for any gaps discovered.\n\n### 2. Real Work in gastown-py\nUse Python Gas Town to stress-test assumptions:\n- Run actual batch work on test repos\n- Observe edge cases in practice\n- Document issues found\n\n### 3. Edge Case Analysis\nSystematic review of failure modes:\n- Agent crashes mid-operation\n- Network failures during sync\n- Concurrent access to shared state\n- Context limits hit at bad times\n\n## Key Scenarios to Validate\n\n- [ ] Witness session cycling (state preservation)\n- [ ] Polecat decommission with dirty state\n- [ ] Merge conflicts in queue\n- [ ] Beads sync conflicts between workers\n- [ ] Escalation path (stuck worker -\u003e Mayor)\n- [ ] Cross-rig communication\n- [ ] Federation mail routing (future)\n\n## Success Criteria\n\n- No data loss scenarios identified\n- Clear recovery paths for all failure modes\n- Edge cases either handled or documented as limitations\n- Design improves as model cognition improves\n\n## Output\n\nFor each scenario validated:\n1. Document in relevant bead if issue found\n2. Create new beads for missing functionality\n3. Update architecture.md if design changes","status":"open","priority":1,"issue_type":"epic","created_at":"2025-12-15T20:24:11.251841-08:00","updated_at":"2025-12-16T17:25:49.858717-08:00"} {"id":"gt-jgdx","title":"Digest: mol-deacon-patrol","description":"Test patrol cycle - first run, no actual work done","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-22T02:07:03.388821-08:00","updated_at":"2025-12-22T02:07:03.388821-08:00","closed_at":"2025-12-22T02:07:03.388793-08:00","close_reason":"Squashed from 5 wisps"} {"id":"gt-jpt","title":"Town-level beads: Real DB for coordination mail","description":"Implement Option A from mail redesign: Town gets real beads DB for coordination.\n\n## Background\n\nMail is now Beads. But currently:\n- Town .beads/redirect points to rig beads\n- mayor/mail/ has legacy JSONL files\n- Cross-rig coordination has no clear home\n\n## Design\n\nTown beads = coordination, cross-rig mail, mayor inbox, handoffs\nRig beads = project issues, work items\n\nMatches HOP hierarchy: platform \u003e project \u003e worker\n\n## Structure\n\n~/gt/\n .beads/ # REAL beads DB (prefix: gm-)\n mayor/\n town.json\n state.json # NO mail/ directory\n gastown/\n .beads/ # Rig beads (prefix: ga-)\n\n## Tasks\n\n1. Delete ~/gt/.beads/redirect\n2. Run bd init --prefix gm at ~/gt/ (town beads)\n3. Delete ~/gt/mayor/mail/ directory\n4. Update gt mail to use beads not JSONL\n5. Add mail fields (thread_id, reply_to, msg_type)\n6. Update gt prime for two-tier model\n7. Update docs/architecture.md\n\n## Addressing\n\n- mayor/ -\u003e town beads\n- rig/agent -\u003e rig beads\n- Cross-rig -\u003e town beads","status":"closed","priority":1,"issue_type":"epic","created_at":"2025-12-17T19:09:55.855955-08:00","updated_at":"2025-12-19T01:57:17.032558-08:00","closed_at":"2025-12-19T01:57:17.032558-08:00"} @@ -643,7 +643,7 @@ {"id":"gt-upom","title":"Witness patrol: cleanup idle orphan polecats","description":"Add patrol step to find and cleanup polecats that are idle with no assigned issue. These orphans occur when polecats crash before sending DONE or Witness misses the message. Patrol should verify git is clean before removing worktree. Part of gt-rana.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-21T23:09:41.756753-08:00","updated_at":"2025-12-21T23:09:41.756753-08:00"} {"id":"gt-us8","title":"Daemon: configurable heartbeat interval","description":"Heartbeat interval is hardcoded to 60s. Should be configurable via:\n- town.json config\n- Command line flag\n- Environment variable\n\nDefault 60s is reasonable but some deployments may want faster/slower.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-18T13:38:14.282216-08:00","updated_at":"2025-12-18T13:38:14.282216-08:00","dependencies":[{"issue_id":"gt-us8","depends_on_id":"gt-99m","type":"blocks","created_at":"2025-12-18T13:38:26.704111-08:00","created_by":"daemon"}]} {"id":"gt-usy0","title":"Merge: gt-3x0z.3","description":"branch: polecat/rictus\ntarget: main\nsource_issue: gt-3x0z.3\nrig: gastown","status":"closed","priority":2,"issue_type":"merge-request","created_at":"2025-12-21T16:03:43.535266-08:00","updated_at":"2025-12-21T17:20:27.505696-08:00","closed_at":"2025-12-21T17:20:27.505696-08:00","close_reason":"ORPHANED: Branch never pushed, worktree deleted"} -{"id":"gt-utwc","title":"Self-mail should suppress tmux notification","description":"When sending mail to yourself (e.g., mayor sending to mayor/), the tmux notification shouldn't fire.\n\n**Rationale:**\n- Self-mail is intended for future-you (next session handoff)\n- Present-you just sent it, so you already know about it\n- The notification is redundant/confusing in this case\n\n**Fix:**\nSuppress tmux notification when sender == recipient address.","status":"closed","priority":3,"issue_type":"bug","created_at":"2025-12-22T17:55:39.573705-08:00","updated_at":"2025-12-22T23:58:02.827026-08:00","closed_at":"2025-12-22T23:58:02.827026-08:00","close_reason":"Skip tmux notification when sender == recipient"} +{"id":"gt-utwc","title":"Self-mail should suppress tmux notification","description":"When sending mail to yourself (e.g., mayor sending to mayor/), the tmux notification shouldn't fire.\n\n**Rationale:**\n- Self-mail is intended for future-you (next session handoff)\n- Present-you just sent it, so you already know about it\n- The notification is redundant/confusing in this case\n\n**Fix:**\nSuppress tmux notification when sender == recipient address.","status":"open","priority":3,"issue_type":"bug","created_at":"2025-12-22T17:55:39.573705-08:00","updated_at":"2025-12-22T17:55:39.573705-08:00"} {"id":"gt-uym5","title":"Implement gt mol status command","description":"Show what's on an agent's hook.\n\n```bash\ngt mol status [target]\n```\n\nOutput:\n- What's slung (molecule name, associated issue)\n- Current phase and progress\n- Whether it's a wisp\n- Next action hint\n\nIf no target, shows current agent's status.\n\nAcceptance:\n- [ ] Read pinned bead attachment\n- [ ] Display molecule/issue info\n- [ ] Show phase progress\n- [ ] Indicate wisp vs durable","status":"closed","priority":1,"issue_type":"task","assignee":"gastown/nux","created_at":"2025-12-22T03:17:34.679963-08:00","updated_at":"2025-12-22T12:34:19.942265-08:00","closed_at":"2025-12-22T12:34:19.942265-08:00","close_reason":"Implemented gt mol status command with mol alias, auto-detection, progress display, wisp detection, and next action hints"} {"id":"gt-v5hv","title":"Work on ga-y6b: Implement Refinery as Claude agent. Conve...","description":"Work on ga-y6b: Implement Refinery as Claude agent. Convert from shell to Claude agent that processes MRs in merge queue, runs tests, merges to integration branch. When done, submit MR (not PR) to integration branch for Refinery.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-19T22:58:17.576892-08:00","updated_at":"2025-12-19T23:23:22.778407-08:00","closed_at":"2025-12-19T23:23:22.778407-08:00"} {"id":"gt-v5k","title":"Design: Failure modes and recovery","description":"Document failure modes and recovery strategies for Gas Town operations.\n\n## Critical Failure Modes\n\n### 1. Agent Crash Mid-Operation\n\n**Scenario**: Polecat crashes while committing, Witness crashes while verifying\n\n**Detection**:\n- Session suddenly gone (tmux check fails)\n- State shows 'working' but no session\n- Heartbeat stops (for Witness)\n\n**Recovery**:\n- Doctor detects via ZombieSessionCheck\n- Capture any recoverable state\n- Reset agent state to 'idle'\n- For Witness: auto-restart via supervisor or manual gt witness start\n\n### 2. Git State Corruption\n\n**Scenario**: Merge conflict, failed rebase, detached HEAD\n\n**Detection**:\n- Git commands fail\n- Dirty state that won't commit\n- Branch diverged from origin\n\n**Recovery**:\n- gt doctor reports git health issues\n- Manual intervention recommended\n- Severe cases: remove clone, re-clone\n\n### 3. Beads Sync Conflict\n\n**Scenario**: Two polecats modify same issue\n\n**Detection**:\n- bd sync fails with conflict\n- Beads tombstone mechanism handles most cases\n\n**Recovery**:\n- Beads has last-write-wins semantics\n- bd sync --force in extreme cases\n- Issues may need manual dedup\n\n### 4. Tmux Failure\n\n**Scenario**: Tmux server crashes, socket issues\n\n**Detection**:\n- All sessions inaccessible\n- \"no server running\" errors\n\n**Recovery**:\n- Kill any orphan processes\n- tmux kill-server \u0026\u0026 tmux start-server\n- All agent states reset to idle\n- Re-spawn active work\n\n### 5. Claude API Issues\n\n**Scenario**: Rate limits, outages, context limits\n\n**Detection**:\n- Sessions hang or produce errors\n- Repeated failure patterns\n\n**Recovery**:\n- Exponential backoff (handled by Claude Code)\n- For context limits: session cycling (mail-to-self)\n- For outages: wait and retry\n\n### 6. Disk Full\n\n**Scenario**: Clones, logs, or beads fill disk\n\n**Detection**:\n- Write operations fail\n- git/bd commands error\n\n**Recovery**:\n- Clean up logs: rm ~/.gastown/logs/*\n- Remove old polecat clones\n- gt doctor --fix can clean some cruft\n\n### 7. Network Failure\n\n**Scenario**: Can't reach GitHub, API servers\n\n**Detection**:\n- git fetch/push fails\n- Claude sessions hang\n\n**Recovery**:\n- Work continues locally\n- Queue pushes for later\n- Sync when connectivity restored\n\n## Recovery Principles\n\n1. **Fail safe**: Prefer stopping over corrupting\n2. **State is recoverable**: Git and beads have recovery mechanisms\n3. **Doctor heals**: gt doctor --fix handles common issues\n4. **Emergency stop**: gt stop --all as last resort\n5. **Human escalation**: Some failures need Overseer intervention\n\n## Implementation\n\n- Document each failure mode in architecture.md\n- Ensure doctor checks cover detection\n- Add recovery hints to error messages\n- Log all failures for debugging","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-15T23:19:07.198289-08:00","updated_at":"2025-12-15T23:19:28.171942-08:00"}