feat(deacon): add stale hooked bead cleanup (gt-2yls3)

Add `gt deacon stale-hooks` command to find and unhook stale beads.

Problem: Beads can get stuck in 'hooked' status when agents die or
abandon work without properly unhooking.

Solution:
- New command scans for hooked beads older than threshold (default 1h)
- Checks if assignee agent is still alive (tmux session exists)
- Unhooks beads with dead agents (sets status back to 'open')
- Supports --dry-run to preview without making changes

Also adds "stale-hook-check" step to Deacon patrol formula.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gastown/crew/gus
2026-01-06 13:20:45 -08:00
committed by Steve Yegge
parent ac63b10aa8
commit 74409dc32b
3 changed files with 320 additions and 1 deletions

View File

@@ -186,6 +186,22 @@ This helps the Deacon understand which agents may need attention.`,
RunE: runDeaconHealthState, RunE: runDeaconHealthState,
} }
var deaconStaleHooksCmd = &cobra.Command{
Use: "stale-hooks",
Short: "Find and unhook stale hooked beads",
Long: `Find beads stuck in 'hooked' status and unhook them if the agent is gone.
Beads can get stuck in 'hooked' status when agents die or abandon work.
This command finds hooked beads older than the threshold (default: 1 hour),
checks if the assignee agent is still alive, and unhooks them if not.
Examples:
gt deacon stale-hooks # Find and unhook stale beads
gt deacon stale-hooks --dry-run # Preview what would be unhooked
gt deacon stale-hooks --max-age=30m # Use 30 minute threshold`,
RunE: runDeaconStaleHooks,
}
var ( var (
triggerTimeout time.Duration triggerTimeout time.Duration
@@ -198,6 +214,10 @@ var (
// Force kill flags // Force kill flags
forceKillReason string forceKillReason string
forceKillSkipNotify bool forceKillSkipNotify bool
// Stale hooks flags
staleHooksMaxAge time.Duration
staleHooksDryRun bool
) )
func init() { func init() {
@@ -211,6 +231,7 @@ func init() {
deaconCmd.AddCommand(deaconHealthCheckCmd) deaconCmd.AddCommand(deaconHealthCheckCmd)
deaconCmd.AddCommand(deaconForceKillCmd) deaconCmd.AddCommand(deaconForceKillCmd)
deaconCmd.AddCommand(deaconHealthStateCmd) deaconCmd.AddCommand(deaconHealthStateCmd)
deaconCmd.AddCommand(deaconStaleHooksCmd)
// Flags for trigger-pending // Flags for trigger-pending
deaconTriggerPendingCmd.Flags().DurationVar(&triggerTimeout, "timeout", 2*time.Second, deaconTriggerPendingCmd.Flags().DurationVar(&triggerTimeout, "timeout", 2*time.Second,
@@ -230,6 +251,12 @@ func init() {
deaconForceKillCmd.Flags().BoolVar(&forceKillSkipNotify, "skip-notify", false, deaconForceKillCmd.Flags().BoolVar(&forceKillSkipNotify, "skip-notify", false,
"Skip sending notification mail to mayor") "Skip sending notification mail to mayor")
// Flags for stale-hooks
deaconStaleHooksCmd.Flags().DurationVar(&staleHooksMaxAge, "max-age", 1*time.Hour,
"Maximum age before a hooked bead is considered stale")
deaconStaleHooksCmd.Flags().BoolVar(&staleHooksDryRun, "dry-run", false,
"Preview what would be unhooked without making changes")
rootCmd.AddCommand(deaconCmd) rootCmd.AddCommand(deaconCmd)
} }
@@ -908,3 +935,68 @@ func updateAgentBeadState(townRoot, agent, state, _ string) { // reason unused b
_ = cmd.Run() // Best effort _ = cmd.Run() // Best effort
} }
// runDeaconStaleHooks finds and unhooks stale hooked beads.
func runDeaconStaleHooks(cmd *cobra.Command, args []string) error {
townRoot, err := workspace.FindFromCwdOrError()
if err != nil {
return fmt.Errorf("not in a Gas Town workspace: %w", err)
}
cfg := &deacon.StaleHookConfig{
MaxAge: staleHooksMaxAge,
DryRun: staleHooksDryRun,
}
result, err := deacon.ScanStaleHooks(townRoot, cfg)
if err != nil {
return fmt.Errorf("scanning stale hooks: %w", err)
}
// Print summary
if result.TotalHooked == 0 {
fmt.Printf("%s No hooked beads found\n", style.Dim.Render("○"))
return nil
}
fmt.Printf("%s Found %d hooked bead(s), %d stale (older than %s)\n",
style.Bold.Render("●"), result.TotalHooked, result.StaleCount, staleHooksMaxAge)
if result.StaleCount == 0 {
fmt.Printf("%s No stale hooked beads\n", style.Dim.Render("○"))
return nil
}
// Print details for each stale bead
for _, r := range result.Results {
status := style.Dim.Render("○")
action := "skipped (agent alive)"
if !r.AgentAlive {
if staleHooksDryRun {
status = style.Bold.Render("?")
action = "would unhook (agent dead)"
} else if r.Unhooked {
status = style.Bold.Render("✓")
action = "unhooked (agent dead)"
} else if r.Error != "" {
status = style.Dim.Render("✗")
action = fmt.Sprintf("error: %s", r.Error)
}
}
fmt.Printf(" %s %s: %s (age: %s, assignee: %s)\n",
status, r.BeadID, action, r.Age, r.Assignee)
}
// Summary
if staleHooksDryRun {
fmt.Printf("\n%s Dry run - no changes made. Run without --dry-run to unhook.\n",
style.Dim.Render(""))
} else if result.Unhooked > 0 {
fmt.Printf("\n%s Unhooked %d stale bead(s)\n",
style.Bold.Render("✓"), result.Unhooked)
}
return nil
}

View File

@@ -0,0 +1,194 @@
// Package deacon provides the Deacon agent infrastructure.
package deacon
import (
"encoding/json"
"fmt"
"os/exec"
"strings"
"time"
"github.com/steveyegge/gastown/internal/tmux"
)
// StaleHookConfig holds configurable parameters for stale hook detection.
type StaleHookConfig struct {
// MaxAge is how long a bead can be hooked before being considered stale.
MaxAge time.Duration `json:"max_age"`
// DryRun if true, only reports what would be done without making changes.
DryRun bool `json:"dry_run"`
}
// DefaultStaleHookConfig returns the default stale hook config.
func DefaultStaleHookConfig() *StaleHookConfig {
return &StaleHookConfig{
MaxAge: 1 * time.Hour,
DryRun: false,
}
}
// HookedBead represents a bead in hooked status from bd list output.
type HookedBead struct {
ID string `json:"id"`
Title string `json:"title"`
Status string `json:"status"`
Assignee string `json:"assignee"`
UpdatedAt time.Time `json:"updated_at"`
}
// StaleHookResult represents the result of processing a stale hooked bead.
type StaleHookResult struct {
BeadID string `json:"bead_id"`
Title string `json:"title"`
Assignee string `json:"assignee"`
Age string `json:"age"`
AgentAlive bool `json:"agent_alive"`
Unhooked bool `json:"unhooked"`
Error string `json:"error,omitempty"`
}
// StaleHookScanResult contains the full results of a stale hook scan.
type StaleHookScanResult struct {
ScannedAt time.Time `json:"scanned_at"`
TotalHooked int `json:"total_hooked"`
StaleCount int `json:"stale_count"`
Unhooked int `json:"unhooked"`
Results []*StaleHookResult `json:"results"`
}
// ScanStaleHooks finds hooked beads older than the threshold and optionally unhooks them.
func ScanStaleHooks(townRoot string, cfg *StaleHookConfig) (*StaleHookScanResult, error) {
if cfg == nil {
cfg = DefaultStaleHookConfig()
}
result := &StaleHookScanResult{
ScannedAt: time.Now().UTC(),
Results: make([]*StaleHookResult, 0),
}
// Get all hooked beads
hookedBeads, err := listHookedBeads(townRoot)
if err != nil {
return nil, fmt.Errorf("listing hooked beads: %w", err)
}
result.TotalHooked = len(hookedBeads)
// Filter to stale ones (older than threshold)
threshold := time.Now().Add(-cfg.MaxAge)
t := tmux.NewTmux()
for _, bead := range hookedBeads {
// Skip if updated recently (not stale)
if bead.UpdatedAt.After(threshold) {
continue
}
result.StaleCount++
hookResult := &StaleHookResult{
BeadID: bead.ID,
Title: bead.Title,
Assignee: bead.Assignee,
Age: time.Since(bead.UpdatedAt).Round(time.Minute).String(),
}
// Check if assignee agent is still alive
if bead.Assignee != "" {
sessionName := assigneeToSessionName(bead.Assignee)
if sessionName != "" {
alive, _ := t.HasSession(sessionName)
hookResult.AgentAlive = alive
}
}
// If agent is dead/gone and not dry run, unhook the bead
if !hookResult.AgentAlive && !cfg.DryRun {
if err := unhookBead(townRoot, bead.ID); err != nil {
hookResult.Error = err.Error()
} else {
hookResult.Unhooked = true
result.Unhooked++
}
}
result.Results = append(result.Results, hookResult)
}
return result, nil
}
// listHookedBeads returns all beads with status=hooked.
func listHookedBeads(townRoot string) ([]*HookedBead, error) {
cmd := exec.Command("bd", "list", "--status=hooked", "--json", "--limit=0")
cmd.Dir = townRoot
output, err := cmd.Output()
if err != nil {
// No hooked beads is not an error
if strings.Contains(string(output), "no issues found") {
return nil, nil
}
return nil, err
}
if len(output) == 0 || string(output) == "[]" || string(output) == "null\n" {
return nil, nil
}
var beads []*HookedBead
if err := json.Unmarshal(output, &beads); err != nil {
return nil, fmt.Errorf("parsing hooked beads: %w", err)
}
return beads, nil
}
// assigneeToSessionName converts an assignee address to a tmux session name.
// Supports formats like "gastown/polecats/max", "gastown/crew/joe", etc.
func assigneeToSessionName(assignee string) string {
parts := strings.Split(assignee, "/")
switch len(parts) {
case 1:
// Simple names like "deacon", "mayor"
switch assignee {
case "deacon":
return "gt-deacon"
case "mayor":
return "gt-mayor"
default:
return ""
}
case 2:
// rig/role: "gastown/witness", "gastown/refinery"
rig, role := parts[0], parts[1]
switch role {
case "witness", "refinery":
return fmt.Sprintf("gt-%s-%s", rig, role)
default:
return ""
}
case 3:
// rig/type/name: "gastown/polecats/max", "gastown/crew/joe"
rig, agentType, name := parts[0], parts[1], parts[2]
switch agentType {
case "polecats":
return fmt.Sprintf("gt-%s-%s", rig, name)
case "crew":
return fmt.Sprintf("gt-%s-crew-%s", rig, name)
default:
return ""
}
default:
return ""
}
}
// unhookBead sets a bead's status back to 'open'.
func unhookBead(townRoot, beadID string) error {
cmd := exec.Command("bd", "update", beadID, "--status=open")
cmd.Dir = townRoot
return cmd.Run()
}

View File

@@ -340,10 +340,43 @@ gt mail send mayor/ -s "Health: <rig> <component> unresponsive" \\
Reset unresponsive_cycles to 0 when component responds normally.""" Reset unresponsive_cycles to 0 when component responds normally."""
[[steps]]
id = "stale-hook-check"
title = "Cleanup stale hooked beads"
needs = ["health-scan"]
description = """
Find and unhook beads stuck in 'hooked' status.
Beads can get stuck in 'hooked' status when agents die or abandon work without
properly unhooking. This step cleans them up so the work can be reassigned.
**Step 1: Preview stale hooks**
```bash
gt deacon stale-hooks --dry-run
```
Review the output - it shows:
- Hooked beads older than 1 hour
- Whether the assignee agent is still alive
- What action would be taken
**Step 2: If stale hooks found with dead agents, unhook them**
```bash
gt deacon stale-hooks
```
This sets status back to 'open' for beads whose assignee agent is no longer running.
**Step 3: If no stale hooks**
No action needed - hooks are healthy.
**Note**: This is a backstop. Primary fix is ensuring agents properly unhook
beads when they exit or hand off work."""
[[steps]] [[steps]]
id = "zombie-scan" id = "zombie-scan"
title = "Backup check for zombie polecats" title = "Backup check for zombie polecats"
needs = ["health-scan"] needs = ["stale-hook-check"]
description = """ description = """
Defense-in-depth check for zombie polecats that Witness should have cleaned. Defense-in-depth check for zombie polecats that Witness should have cleaned.