diff --git a/internal/cmd/polecat.go b/internal/cmd/polecat.go index 0ad3f7b3..1f03c9f0 100644 --- a/internal/cmd/polecat.go +++ b/internal/cmd/polecat.go @@ -199,9 +199,11 @@ var ( polecatSyncFromMain bool polecatStatusJSON bool polecatGitStateJSON bool - polecatGCDryRun bool - polecatNukeAll bool - polecatNukeDryRun bool + polecatGCDryRun bool + polecatNukeAll bool + polecatNukeDryRun bool + polecatNukeForce bool + polecatCheckRecoveryJSON bool ) var polecatGCCmd = &cobra.Command{ @@ -266,6 +268,25 @@ Examples: RunE: runPolecatGitState, } +var polecatCheckRecoveryCmd = &cobra.Command{ + Use: "check-recovery /", + Short: "Check if polecat needs recovery vs safe to nuke", + Long: `Check recovery status of a polecat based on cleanup_status in agent bead. + +Used by the Witness to determine appropriate cleanup action: + - SAFE_TO_NUKE: cleanup_status is 'clean' - no work at risk + - NEEDS_RECOVERY: cleanup_status indicates unpushed/uncommitted work + +This prevents accidental data loss when cleaning up dormant polecats. +The Witness should escalate NEEDS_RECOVERY cases to the Mayor. + +Examples: + gt polecat check-recovery greenplace/Toast + gt polecat check-recovery greenplace/Toast --json`, + Args: cobra.ExactArgs(1), + RunE: runPolecatCheckRecovery, +} + func init() { // List flags polecatListCmd.Flags().BoolVar(&polecatListJSON, "json", false, "Output as JSON") @@ -291,6 +312,10 @@ func init() { // Nuke flags polecatNukeCmd.Flags().BoolVar(&polecatNukeAll, "all", false, "Nuke all polecats in the rig") polecatNukeCmd.Flags().BoolVar(&polecatNukeDryRun, "dry-run", false, "Show what would be nuked without doing it") + polecatNukeCmd.Flags().BoolVarP(&polecatNukeForce, "force", "f", false, "Force nuke even if polecat has unpushed work") + + // Check-recovery flags + polecatCheckRecoveryCmd.Flags().BoolVar(&polecatCheckRecoveryJSON, "json", false, "Output as JSON") // Add subcommands polecatCmd.AddCommand(polecatListCmd) @@ -303,6 +328,7 @@ func init() { polecatCmd.AddCommand(polecatSyncCmd) polecatCmd.AddCommand(polecatStatusCmd) polecatCmd.AddCommand(polecatGitStateCmd) + polecatCmd.AddCommand(polecatCheckRecoveryCmd) polecatCmd.AddCommand(polecatGCCmd) polecatCmd.AddCommand(polecatNukeCmd) @@ -1054,6 +1080,122 @@ func getGitState(worktreePath string) (*GitState, error) { return state, nil } +// RecoveryStatus represents whether a polecat needs recovery or is safe to nuke. +type RecoveryStatus struct { + Rig string `json:"rig"` + Polecat string `json:"polecat"` + CleanupStatus string `json:"cleanup_status"` + NeedsRecovery bool `json:"needs_recovery"` + Verdict string `json:"verdict"` // SAFE_TO_NUKE or NEEDS_RECOVERY + Branch string `json:"branch,omitempty"` + Issue string `json:"issue,omitempty"` +} + +func runPolecatCheckRecovery(cmd *cobra.Command, args []string) error { + rigName, polecatName, err := parseAddress(args[0]) + if err != nil { + return err + } + + mgr, r, err := getPolecatManager(rigName) + if err != nil { + return err + } + + // Verify polecat exists and get info + p, err := mgr.Get(polecatName) + if err != nil { + return fmt.Errorf("polecat '%s' not found in rig '%s'", polecatName, rigName) + } + + // Get cleanup_status from agent bead + // We need to read it directly from beads since manager doesn't expose it + rigPath := r.Path + bd := beads.New(rigPath) + agentBeadID := beads.PolecatBeadID(rigName, polecatName) + _, fields, err := bd.GetAgentBead(agentBeadID) + + status := RecoveryStatus{ + Rig: rigName, + Polecat: polecatName, + Branch: p.Branch, + Issue: p.Issue, + } + + if err != nil || fields == nil { + // No agent bead or no cleanup_status - fall back to git check + // This handles polecats that haven't self-reported yet + gitState, gitErr := getGitState(p.ClonePath) + if gitErr != nil { + status.CleanupStatus = "unknown" + status.NeedsRecovery = true + status.Verdict = "NEEDS_RECOVERY" + } else if gitState.Clean { + status.CleanupStatus = "clean" + status.NeedsRecovery = false + status.Verdict = "SAFE_TO_NUKE" + } else if gitState.UnpushedCommits > 0 { + status.CleanupStatus = "has_unpushed" + status.NeedsRecovery = true + status.Verdict = "NEEDS_RECOVERY" + } else if gitState.StashCount > 0 { + status.CleanupStatus = "has_stash" + status.NeedsRecovery = true + status.Verdict = "NEEDS_RECOVERY" + } else { + status.CleanupStatus = "has_uncommitted" + status.NeedsRecovery = true + status.Verdict = "NEEDS_RECOVERY" + } + } else { + // Use cleanup_status from agent bead + status.CleanupStatus = fields.CleanupStatus + switch fields.CleanupStatus { + case "clean": + status.NeedsRecovery = false + status.Verdict = "SAFE_TO_NUKE" + case "has_uncommitted", "has_unpushed", "has_stash": + status.NeedsRecovery = true + status.Verdict = "NEEDS_RECOVERY" + default: + // Unknown or empty - be conservative + status.NeedsRecovery = true + status.Verdict = "NEEDS_RECOVERY" + } + } + + // JSON output + if polecatCheckRecoveryJSON { + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + return enc.Encode(status) + } + + // Human-readable output + fmt.Printf("%s\n\n", style.Bold.Render(fmt.Sprintf("Recovery Status: %s/%s", rigName, polecatName))) + fmt.Printf(" Cleanup Status: %s\n", status.CleanupStatus) + if status.Branch != "" { + fmt.Printf(" Branch: %s\n", status.Branch) + } + if status.Issue != "" { + fmt.Printf(" Issue: %s\n", status.Issue) + } + fmt.Println() + + if status.NeedsRecovery { + fmt.Printf(" Verdict: %s\n", style.Error.Render("NEEDS_RECOVERY")) + fmt.Println() + fmt.Printf(" %s This polecat has unpushed/uncommitted work.\n", style.Warning.Render("⚠")) + fmt.Println(" Escalate to Mayor for recovery before cleanup.") + } else { + fmt.Printf(" Verdict: %s\n", style.Success.Render("SAFE_TO_NUKE")) + fmt.Println() + fmt.Printf(" %s Safe to nuke - no work at risk.\n", style.Success.Render("✓")) + } + + return nil +} + func runPolecatGC(cmd *cobra.Command, args []string) error { rigName := args[0] @@ -1199,6 +1341,58 @@ func runPolecatNuke(cmd *cobra.Command, args []string) error { } } + // Check recovery status for each polecat unless --force is set + // This prevents accidental data loss when nuking polecats with unpushed work + if !polecatNukeForce && !polecatNukeDryRun { + var needsRecovery []string + for _, p := range toNuke { + // Check cleanup_status from agent bead + bd := beads.New(p.r.Path) + agentBeadID := beads.PolecatBeadID(p.rigName, p.polecatName) + _, fields, err := bd.GetAgentBead(agentBeadID) + + var recoveryNeeded bool + if err != nil || fields == nil { + // No agent bead - fall back to git check + polecatInfo, infoErr := p.mgr.Get(p.polecatName) + if infoErr == nil && polecatInfo != nil { + gitState, gitErr := getGitState(polecatInfo.ClonePath) + if gitErr != nil || !gitState.Clean { + recoveryNeeded = true + } + } + } else { + // Check cleanup_status from agent bead + switch fields.CleanupStatus { + case "clean": + recoveryNeeded = false + case "has_uncommitted", "has_unpushed", "has_stash", "unknown", "": + recoveryNeeded = true + default: + recoveryNeeded = true + } + } + + if recoveryNeeded { + needsRecovery = append(needsRecovery, fmt.Sprintf("%s/%s", p.rigName, p.polecatName)) + } + } + + if len(needsRecovery) > 0 { + fmt.Printf("%s The following polecats have unpushed/uncommitted work:\n", style.Error.Render("Error:")) + for _, pc := range needsRecovery { + fmt.Printf(" - %s\n", pc) + } + fmt.Println() + fmt.Println("These polecats NEED RECOVERY before cleanup.") + fmt.Println("Options:") + fmt.Printf(" 1. Escalate to Mayor: gt mail send mayor/ -s \"RECOVERY_NEEDED\" -m \"...\"\n") + fmt.Printf(" 2. Force nuke (LOSES WORK): gt polecat nuke --force %s\n", strings.Join(needsRecovery, " ")) + fmt.Println() + return fmt.Errorf("blocked: %d polecat(s) need recovery", len(needsRecovery)) + } + } + // Nuke each polecat t := tmux.NewTmux() var nukeErrors []string @@ -1214,7 +1408,11 @@ func runPolecatNuke(cmd *cobra.Command, args []string) error { continue } - fmt.Printf("Nuking %s/%s...\n", p.rigName, p.polecatName) + if polecatNukeForce { + fmt.Printf("%s Nuking %s/%s (--force)...\n", style.Warning.Render("⚠"), p.rigName, p.polecatName) + } else { + fmt.Printf("Nuking %s/%s...\n", p.rigName, p.polecatName) + } // Step 1: Kill session (force mode - no graceful shutdown) sessMgr := session.NewManager(t, p.r) diff --git a/internal/witness/handlers.go b/internal/witness/handlers.go index b75a68e0..7713ce98 100644 --- a/internal/witness/handlers.go +++ b/internal/witness/handlers.go @@ -348,6 +348,55 @@ Requested at: %s`, return msg.ID, nil } +// RecoveryPayload contains data for RECOVERY_NEEDED escalation. +type RecoveryPayload struct { + PolecatName string + Rig string + CleanupStatus string + Branch string + IssueID string + DetectedAt time.Time +} + +// EscalateRecoveryNeeded sends a RECOVERY_NEEDED escalation to the Mayor. +// This is used when a dormant polecat has unpushed work that needs recovery +// before cleanup. The Mayor should coordinate recovery (e.g., push the branch, +// save the work) before authorizing cleanup. +func EscalateRecoveryNeeded(router *mail.Router, rigName string, payload *RecoveryPayload) (string, error) { + msg := &mail.Message{ + From: fmt.Sprintf("%s/witness", rigName), + To: "mayor/", + Subject: fmt.Sprintf("RECOVERY_NEEDED %s/%s", rigName, payload.PolecatName), + Priority: mail.PriorityUrgent, + Body: fmt.Sprintf(`Polecat: %s/%s +Cleanup Status: %s +Branch: %s +Issue: %s +Detected: %s + +This polecat has unpushed/uncommitted work that will be lost if nuked. +Please coordinate recovery before authorizing cleanup: +1. Check if branch can be pushed to origin +2. Review uncommitted changes for value +3. Either recover the work or authorize force-nuke + +DO NOT nuke without --force after recovery.`, + rigName, + payload.PolecatName, + payload.CleanupStatus, + payload.Branch, + payload.IssueID, + payload.DetectedAt.Format(time.RFC3339), + ), + } + + if err := router.Send(msg); err != nil { + return "", err + } + + return msg.ID, nil +} + // UpdateCleanupWispState updates a cleanup wisp's state label. func UpdateCleanupWispState(workDir, wispID, newState string) error { // Get current labels to preserve other labels diff --git a/templates/witness-CLAUDE.md b/templates/witness-CLAUDE.md index 8c08d13d..c515a983 100644 --- a/templates/witness-CLAUDE.md +++ b/templates/witness-CLAUDE.md @@ -30,28 +30,73 @@ Check your mail with: `gt mail inbox` --- +## Dormant Polecat Recovery Protocol + +When checking dormant polecats, use the recovery check command: + +```bash +gt polecat check-recovery {{RIG}}/ +``` + +This returns one of: +- **SAFE_TO_NUKE**: cleanup_status is 'clean' - proceed with normal cleanup +- **NEEDS_RECOVERY**: cleanup_status indicates unpushed/uncommitted work + +### If NEEDS_RECOVERY + +**CRITICAL: Do NOT auto-nuke polecats with unpushed work.** + +Instead, escalate to Mayor: +```bash +gt mail send mayor/ -s "RECOVERY_NEEDED {{RIG}}/" -m "Cleanup Status: has_unpushed +Branch: +Issue: +Detected: $(date -Iseconds) + +This polecat has unpushed work that will be lost if nuked. +Please coordinate recovery before authorizing cleanup." +``` + +The nuke command will block automatically: +```bash +$ gt polecat nuke {{RIG}}/ +Error: The following polecats have unpushed/uncommitted work: + - {{RIG}}/ + +These polecats NEED RECOVERY before cleanup. +Options: + 1. Escalate to Mayor: gt mail send mayor/ -s "RECOVERY_NEEDED" -m "..." + 2. Force nuke (LOSES WORK): gt polecat nuke --force {{RIG}}/ +``` + +Only use `--force` after Mayor authorizes or confirms work is unrecoverable. + +--- + ## Pre-Kill Verification Checklist Before killing ANY polecat session, verify: ``` -[ ] 1. gt polecat git-state # Must be clean -[ ] 2. Check for uncommitted work: - cd polecats/ && git status -[ ] 3. Check for unpushed commits: - git log origin/main..HEAD -[ ] 4. Verify issue closed: +[ ] 1. gt polecat check-recovery {{RIG}}/ # Must be SAFE_TO_NUKE +[ ] 2. gt polecat git-state # Must be clean +[ ] 3. Verify issue closed: bd show # Should show 'closed' -[ ] 5. Verify PR submitted (if applicable): +[ ] 4. Verify PR submitted (if applicable): Check merge queue or PR status ``` -**If git state is dirty:** +**If NEEDS_RECOVERY:** +1. Send RECOVERY_NEEDED escalation to Mayor (see above) +2. Wait for Mayor authorization +3. Do NOT proceed with nuke + +**If git state dirty but polecat still alive:** 1. Nudge the worker to clean up 2. Wait 5 minutes for response 3. If still dirty after 3 attempts → Escalate to Mayor -**If all checks pass:** +**If SAFE_TO_NUKE and all checks pass:** 1. **Send MERGE_READY to refinery** (CRITICAL - do this BEFORE killing): ```bash gt mail send {{RIG}}/refinery -s "MERGE_READY " -m "Branch: @@ -65,6 +110,7 @@ Before killing ANY polecat session, verify: ``` NOTE: Use `gt polecat nuke` instead of raw git commands. It knows the correct worktree parent repo (mayor/rig or .repo.git) and handles cleanup properly. + The nuke will automatically block if cleanup_status indicates unpushed work. 3. **Notify Mayor** (for tracking): ```bash gt mail send mayor/ -s "Polecat processed" -m "Work: @@ -77,8 +123,11 @@ Before killing ANY polecat session, verify: ```bash # Polecat management -gt polecat list {{RIG}} # See all polecats -gt polecat git-state # Check git cleanliness +gt polecat list {{RIG}} # See all polecats +gt polecat check-recovery {{RIG}}/ # Check if safe to nuke +gt polecat git-state {{RIG}}/ # Check git cleanliness +gt polecat nuke {{RIG}}/ # Nuke (blocks on unpushed work) +gt polecat nuke --force {{RIG}}/ # Force nuke (LOSES WORK) # Session inspection tmux capture-pane -t gt-{{RIG}}- -p | tail -40 @@ -91,12 +140,15 @@ gt mail inbox gt mail read gt mail send mayor/ -s "Subject" -m "Message" gt mail send {{RIG}}/refinery -s "MERGE_READY " -m "..." +gt mail send mayor/ -s "RECOVERY_NEEDED {{RIG}}/" -m "..." # Escalate ``` --- ## Do NOT +- **Nuke polecats with unpushed work** - always check-recovery first +- Use `--force` without Mayor authorization - Kill sessions without completing pre-kill verification - Kill sessions without sending MERGE_READY to refinery - Spawn new polecats (Mayor does that)