From 74409dc32ba3777e307b6f07a98c75713a50a20e Mon Sep 17 00:00:00 2001 From: gastown/crew/gus Date: Tue, 6 Jan 2026 13:20:45 -0800 Subject: [PATCH] feat(deacon): add stale hooked bead cleanup (gt-2yls3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `gt deacon stale-hooks` command to find and unhook stale beads. Problem: Beads can get stuck in 'hooked' status when agents die or abandon work without properly unhooking. Solution: - New command scans for hooked beads older than threshold (default 1h) - Checks if assignee agent is still alive (tmux session exists) - Unhooks beads with dead agents (sets status back to 'open') - Supports --dry-run to preview without making changes Also adds "stale-hook-check" step to Deacon patrol formula. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- internal/cmd/deacon.go | 92 +++++++++ internal/deacon/stale_hooks.go | 194 ++++++++++++++++++ .../formulas/mol-deacon-patrol.formula.toml | 35 +++- 3 files changed, 320 insertions(+), 1 deletion(-) create mode 100644 internal/deacon/stale_hooks.go diff --git a/internal/cmd/deacon.go b/internal/cmd/deacon.go index 33d45b3c..f96802f2 100644 --- a/internal/cmd/deacon.go +++ b/internal/cmd/deacon.go @@ -186,6 +186,22 @@ This helps the Deacon understand which agents may need attention.`, RunE: runDeaconHealthState, } +var deaconStaleHooksCmd = &cobra.Command{ + Use: "stale-hooks", + Short: "Find and unhook stale hooked beads", + Long: `Find beads stuck in 'hooked' status and unhook them if the agent is gone. + +Beads can get stuck in 'hooked' status when agents die or abandon work. +This command finds hooked beads older than the threshold (default: 1 hour), +checks if the assignee agent is still alive, and unhooks them if not. + +Examples: + gt deacon stale-hooks # Find and unhook stale beads + gt deacon stale-hooks --dry-run # Preview what would be unhooked + gt deacon stale-hooks --max-age=30m # Use 30 minute threshold`, + RunE: runDeaconStaleHooks, +} + var ( triggerTimeout time.Duration @@ -198,6 +214,10 @@ var ( // Force kill flags forceKillReason string forceKillSkipNotify bool + + // Stale hooks flags + staleHooksMaxAge time.Duration + staleHooksDryRun bool ) func init() { @@ -211,6 +231,7 @@ func init() { deaconCmd.AddCommand(deaconHealthCheckCmd) deaconCmd.AddCommand(deaconForceKillCmd) deaconCmd.AddCommand(deaconHealthStateCmd) + deaconCmd.AddCommand(deaconStaleHooksCmd) // Flags for trigger-pending deaconTriggerPendingCmd.Flags().DurationVar(&triggerTimeout, "timeout", 2*time.Second, @@ -230,6 +251,12 @@ func init() { deaconForceKillCmd.Flags().BoolVar(&forceKillSkipNotify, "skip-notify", false, "Skip sending notification mail to mayor") + // Flags for stale-hooks + deaconStaleHooksCmd.Flags().DurationVar(&staleHooksMaxAge, "max-age", 1*time.Hour, + "Maximum age before a hooked bead is considered stale") + deaconStaleHooksCmd.Flags().BoolVar(&staleHooksDryRun, "dry-run", false, + "Preview what would be unhooked without making changes") + rootCmd.AddCommand(deaconCmd) } @@ -908,3 +935,68 @@ func updateAgentBeadState(townRoot, agent, state, _ string) { // reason unused b _ = cmd.Run() // Best effort } +// runDeaconStaleHooks finds and unhooks stale hooked beads. +func runDeaconStaleHooks(cmd *cobra.Command, args []string) error { + townRoot, err := workspace.FindFromCwdOrError() + if err != nil { + return fmt.Errorf("not in a Gas Town workspace: %w", err) + } + + cfg := &deacon.StaleHookConfig{ + MaxAge: staleHooksMaxAge, + DryRun: staleHooksDryRun, + } + + result, err := deacon.ScanStaleHooks(townRoot, cfg) + if err != nil { + return fmt.Errorf("scanning stale hooks: %w", err) + } + + // Print summary + if result.TotalHooked == 0 { + fmt.Printf("%s No hooked beads found\n", style.Dim.Render("○")) + return nil + } + + fmt.Printf("%s Found %d hooked bead(s), %d stale (older than %s)\n", + style.Bold.Render("●"), result.TotalHooked, result.StaleCount, staleHooksMaxAge) + + if result.StaleCount == 0 { + fmt.Printf("%s No stale hooked beads\n", style.Dim.Render("○")) + return nil + } + + // Print details for each stale bead + for _, r := range result.Results { + status := style.Dim.Render("○") + action := "skipped (agent alive)" + + if !r.AgentAlive { + if staleHooksDryRun { + status = style.Bold.Render("?") + action = "would unhook (agent dead)" + } else if r.Unhooked { + status = style.Bold.Render("✓") + action = "unhooked (agent dead)" + } else if r.Error != "" { + status = style.Dim.Render("✗") + action = fmt.Sprintf("error: %s", r.Error) + } + } + + fmt.Printf(" %s %s: %s (age: %s, assignee: %s)\n", + status, r.BeadID, action, r.Age, r.Assignee) + } + + // Summary + if staleHooksDryRun { + fmt.Printf("\n%s Dry run - no changes made. Run without --dry-run to unhook.\n", + style.Dim.Render("ℹ")) + } else if result.Unhooked > 0 { + fmt.Printf("\n%s Unhooked %d stale bead(s)\n", + style.Bold.Render("✓"), result.Unhooked) + } + + return nil +} + diff --git a/internal/deacon/stale_hooks.go b/internal/deacon/stale_hooks.go new file mode 100644 index 00000000..e30c3625 --- /dev/null +++ b/internal/deacon/stale_hooks.go @@ -0,0 +1,194 @@ +// Package deacon provides the Deacon agent infrastructure. +package deacon + +import ( + "encoding/json" + "fmt" + "os/exec" + "strings" + "time" + + "github.com/steveyegge/gastown/internal/tmux" +) + +// StaleHookConfig holds configurable parameters for stale hook detection. +type StaleHookConfig struct { + // MaxAge is how long a bead can be hooked before being considered stale. + MaxAge time.Duration `json:"max_age"` + // DryRun if true, only reports what would be done without making changes. + DryRun bool `json:"dry_run"` +} + +// DefaultStaleHookConfig returns the default stale hook config. +func DefaultStaleHookConfig() *StaleHookConfig { + return &StaleHookConfig{ + MaxAge: 1 * time.Hour, + DryRun: false, + } +} + +// HookedBead represents a bead in hooked status from bd list output. +type HookedBead struct { + ID string `json:"id"` + Title string `json:"title"` + Status string `json:"status"` + Assignee string `json:"assignee"` + UpdatedAt time.Time `json:"updated_at"` +} + +// StaleHookResult represents the result of processing a stale hooked bead. +type StaleHookResult struct { + BeadID string `json:"bead_id"` + Title string `json:"title"` + Assignee string `json:"assignee"` + Age string `json:"age"` + AgentAlive bool `json:"agent_alive"` + Unhooked bool `json:"unhooked"` + Error string `json:"error,omitempty"` +} + +// StaleHookScanResult contains the full results of a stale hook scan. +type StaleHookScanResult struct { + ScannedAt time.Time `json:"scanned_at"` + TotalHooked int `json:"total_hooked"` + StaleCount int `json:"stale_count"` + Unhooked int `json:"unhooked"` + Results []*StaleHookResult `json:"results"` +} + +// ScanStaleHooks finds hooked beads older than the threshold and optionally unhooks them. +func ScanStaleHooks(townRoot string, cfg *StaleHookConfig) (*StaleHookScanResult, error) { + if cfg == nil { + cfg = DefaultStaleHookConfig() + } + + result := &StaleHookScanResult{ + ScannedAt: time.Now().UTC(), + Results: make([]*StaleHookResult, 0), + } + + // Get all hooked beads + hookedBeads, err := listHookedBeads(townRoot) + if err != nil { + return nil, fmt.Errorf("listing hooked beads: %w", err) + } + + result.TotalHooked = len(hookedBeads) + + // Filter to stale ones (older than threshold) + threshold := time.Now().Add(-cfg.MaxAge) + t := tmux.NewTmux() + + for _, bead := range hookedBeads { + // Skip if updated recently (not stale) + if bead.UpdatedAt.After(threshold) { + continue + } + + result.StaleCount++ + + hookResult := &StaleHookResult{ + BeadID: bead.ID, + Title: bead.Title, + Assignee: bead.Assignee, + Age: time.Since(bead.UpdatedAt).Round(time.Minute).String(), + } + + // Check if assignee agent is still alive + if bead.Assignee != "" { + sessionName := assigneeToSessionName(bead.Assignee) + if sessionName != "" { + alive, _ := t.HasSession(sessionName) + hookResult.AgentAlive = alive + } + } + + // If agent is dead/gone and not dry run, unhook the bead + if !hookResult.AgentAlive && !cfg.DryRun { + if err := unhookBead(townRoot, bead.ID); err != nil { + hookResult.Error = err.Error() + } else { + hookResult.Unhooked = true + result.Unhooked++ + } + } + + result.Results = append(result.Results, hookResult) + } + + return result, nil +} + +// listHookedBeads returns all beads with status=hooked. +func listHookedBeads(townRoot string) ([]*HookedBead, error) { + cmd := exec.Command("bd", "list", "--status=hooked", "--json", "--limit=0") + cmd.Dir = townRoot + + output, err := cmd.Output() + if err != nil { + // No hooked beads is not an error + if strings.Contains(string(output), "no issues found") { + return nil, nil + } + return nil, err + } + + if len(output) == 0 || string(output) == "[]" || string(output) == "null\n" { + return nil, nil + } + + var beads []*HookedBead + if err := json.Unmarshal(output, &beads); err != nil { + return nil, fmt.Errorf("parsing hooked beads: %w", err) + } + + return beads, nil +} + +// assigneeToSessionName converts an assignee address to a tmux session name. +// Supports formats like "gastown/polecats/max", "gastown/crew/joe", etc. +func assigneeToSessionName(assignee string) string { + parts := strings.Split(assignee, "/") + + switch len(parts) { + case 1: + // Simple names like "deacon", "mayor" + switch assignee { + case "deacon": + return "gt-deacon" + case "mayor": + return "gt-mayor" + default: + return "" + } + case 2: + // rig/role: "gastown/witness", "gastown/refinery" + rig, role := parts[0], parts[1] + switch role { + case "witness", "refinery": + return fmt.Sprintf("gt-%s-%s", rig, role) + default: + return "" + } + case 3: + // rig/type/name: "gastown/polecats/max", "gastown/crew/joe" + rig, agentType, name := parts[0], parts[1], parts[2] + switch agentType { + case "polecats": + return fmt.Sprintf("gt-%s-%s", rig, name) + case "crew": + return fmt.Sprintf("gt-%s-crew-%s", rig, name) + default: + return "" + } + default: + return "" + } +} + +// unhookBead sets a bead's status back to 'open'. +func unhookBead(townRoot, beadID string) error { + cmd := exec.Command("bd", "update", beadID, "--status=open") + cmd.Dir = townRoot + return cmd.Run() +} diff --git a/internal/formula/formulas/mol-deacon-patrol.formula.toml b/internal/formula/formulas/mol-deacon-patrol.formula.toml index 353a57f9..adef3ee5 100644 --- a/internal/formula/formulas/mol-deacon-patrol.formula.toml +++ b/internal/formula/formulas/mol-deacon-patrol.formula.toml @@ -340,10 +340,43 @@ gt mail send mayor/ -s "Health: unresponsive" \\ Reset unresponsive_cycles to 0 when component responds normally.""" +[[steps]] +id = "stale-hook-check" +title = "Cleanup stale hooked beads" +needs = ["health-scan"] +description = """ +Find and unhook beads stuck in 'hooked' status. + +Beads can get stuck in 'hooked' status when agents die or abandon work without +properly unhooking. This step cleans them up so the work can be reassigned. + +**Step 1: Preview stale hooks** +```bash +gt deacon stale-hooks --dry-run +``` + +Review the output - it shows: +- Hooked beads older than 1 hour +- Whether the assignee agent is still alive +- What action would be taken + +**Step 2: If stale hooks found with dead agents, unhook them** +```bash +gt deacon stale-hooks +``` + +This sets status back to 'open' for beads whose assignee agent is no longer running. + +**Step 3: If no stale hooks** +No action needed - hooks are healthy. + +**Note**: This is a backstop. Primary fix is ensuring agents properly unhook +beads when they exit or hand off work.""" + [[steps]] id = "zombie-scan" title = "Backup check for zombie polecats" -needs = ["health-scan"] +needs = ["stale-hook-check"] description = """ Defense-in-depth check for zombie polecats that Witness should have cleaned.