package cmd import ( "encoding/json" "errors" "fmt" "os" "os/exec" "path/filepath" "strings" "time" "github.com/spf13/cobra" "github.com/steveyegge/gastown/internal/beads" "github.com/steveyegge/gastown/internal/claude" "github.com/steveyegge/gastown/internal/config" "github.com/steveyegge/gastown/internal/constants" "github.com/steveyegge/gastown/internal/deacon" "github.com/steveyegge/gastown/internal/polecat" "github.com/steveyegge/gastown/internal/runtime" "github.com/steveyegge/gastown/internal/session" "github.com/steveyegge/gastown/internal/style" "github.com/steveyegge/gastown/internal/tmux" "github.com/steveyegge/gastown/internal/workspace" ) // getDeaconSessionName returns the Deacon session name. func getDeaconSessionName() string { return session.DeaconSessionName() } var deaconCmd = &cobra.Command{ Use: "deacon", Aliases: []string{"dea"}, GroupID: GroupAgents, Short: "Manage the Deacon session", RunE: requireSubcommand, Long: `Manage the Deacon tmux session. The Deacon is the hierarchical health-check orchestrator for Gas Town. It monitors the Mayor and Witnesses, handles lifecycle requests, and keeps the town running. Use the subcommands to start, stop, attach, and check status.`, } var deaconStartCmd = &cobra.Command{ Use: "start", Aliases: []string{"spawn"}, Short: "Start the Deacon session", Long: `Start the Deacon tmux session. Creates a new detached tmux session for the Deacon and launches Claude. The session runs in the workspace root directory.`, RunE: runDeaconStart, } var deaconStopCmd = &cobra.Command{ Use: "stop", Short: "Stop the Deacon session", Long: `Stop the Deacon tmux session. Attempts graceful shutdown first (Ctrl-C), then kills the tmux session.`, RunE: runDeaconStop, } var deaconAttachCmd = &cobra.Command{ Use: "attach", Aliases: []string{"at"}, Short: "Attach to the Deacon session", Long: `Attach to the running Deacon tmux session. Attaches the current terminal to the Deacon's tmux session. Detach with Ctrl-B D.`, RunE: runDeaconAttach, } var deaconStatusCmd = &cobra.Command{ Use: "status", Short: "Check Deacon session status", Long: `Check if the Deacon tmux session is currently running.`, RunE: runDeaconStatus, } var deaconRestartCmd = &cobra.Command{ Use: "restart", Short: "Restart the Deacon session", Long: `Restart the Deacon tmux session. Stops the current session (if running) and starts a fresh one.`, RunE: runDeaconRestart, } var deaconAgentOverride string var deaconHeartbeatCmd = &cobra.Command{ Use: "heartbeat [action]", Short: "Update the Deacon heartbeat", Long: `Update the Deacon heartbeat file. The heartbeat signals to the daemon that the Deacon is alive and working. Call this at the start of each wake cycle to prevent daemon pokes. Examples: gt deacon heartbeat # Touch heartbeat with timestamp gt deacon heartbeat "checking mayor" # Touch with action description`, RunE: runDeaconHeartbeat, } var deaconTriggerPendingCmd = &cobra.Command{ Use: "trigger-pending", Short: "Trigger pending polecat spawns (bootstrap mode)", Long: `Check inbox for POLECAT_STARTED messages and trigger ready polecats. ⚠️ BOOTSTRAP MODE ONLY - Uses regex detection (ZFC violation acceptable). This command uses WaitForRuntimeReady (regex) to detect when the runtime is ready. This is appropriate for daemon bootstrap when no AI is available. In steady-state, the Deacon should use AI-based observation instead: gt deacon pending # View pending spawns with captured output gt peek # Observe session output (AI analyzes) gt nudge # Trigger when AI determines ready This command is typically called by the daemon during cold startup.`, RunE: runDeaconTriggerPending, } var deaconHealthCheckCmd = &cobra.Command{ Use: "health-check ", Short: "Send a health check ping to an agent and track response", Long: `Send a HEALTH_CHECK nudge to an agent and wait for response. This command is used by the Deacon during health rounds to detect stuck sessions. It tracks consecutive failures and determines when force-kill is warranted. The detection protocol: 1. Send HEALTH_CHECK nudge to the agent 2. Wait for agent to update their bead (configurable timeout, default 30s) 3. If no activity update, increment failure counter 4. After N consecutive failures (default 3), recommend force-kill Exit codes: 0 - Agent responded or is in cooldown (no action needed) 1 - Error occurred 2 - Agent should be force-killed (consecutive failures exceeded) Examples: gt deacon health-check gastown/polecats/max gt deacon health-check gastown/witness --timeout=60s gt deacon health-check deacon --failures=5`, Args: cobra.ExactArgs(1), RunE: runDeaconHealthCheck, } var deaconForceKillCmd = &cobra.Command{ Use: "force-kill ", Short: "Force-kill an unresponsive agent session", Long: `Force-kill an agent session that has been detected as stuck. This command is used by the Deacon when an agent fails consecutive health checks. It performs the force-kill protocol: 1. Log the intervention (send mail to agent) 2. Kill the tmux session 3. Update agent bead state to "killed" 4. Notify mayor (optional, for visibility) After force-kill, the agent is 'asleep'. Normal wake mechanisms apply: - gt rig boot restarts it - Or stays asleep until next activity trigger This respects the cooldown period - won't kill if recently killed. Examples: gt deacon force-kill gastown/polecats/max gt deacon force-kill gastown/witness --reason="unresponsive for 90s"`, Args: cobra.ExactArgs(1), RunE: runDeaconForceKill, } var deaconHealthStateCmd = &cobra.Command{ Use: "health-state", Short: "Show health check state for all monitored agents", Long: `Display the current health check state including: - Consecutive failure counts - Last ping and response times - Force-kill history and cooldowns This helps the Deacon understand which agents may need attention.`, RunE: runDeaconHealthState, } var deaconStaleHooksCmd = &cobra.Command{ Use: "stale-hooks", Short: "Find and unhook stale hooked beads", Long: `Find beads stuck in 'hooked' status and unhook them if the agent is gone. Beads can get stuck in 'hooked' status when agents die or abandon work. This command finds hooked beads older than the threshold (default: 1 hour), checks if the assignee agent is still alive, and unhooks them if not. Examples: gt deacon stale-hooks # Find and unhook stale beads gt deacon stale-hooks --dry-run # Preview what would be unhooked gt deacon stale-hooks --max-age=30m # Use 30 minute threshold`, RunE: runDeaconStaleHooks, } var deaconPauseCmd = &cobra.Command{ Use: "pause", Short: "Pause the Deacon to prevent patrol actions", Long: `Pause the Deacon to prevent it from performing any patrol actions. When paused, the Deacon: - Will not create patrol molecules - Will not run health checks - Will not take any autonomous actions - Will display a PAUSED message on startup The pause state persists across session restarts. Use 'gt deacon resume' to allow the Deacon to work again. Examples: gt deacon pause # Pause with no reason gt deacon pause --reason="testing" # Pause with a reason`, RunE: runDeaconPause, } var deaconResumeCmd = &cobra.Command{ Use: "resume", Short: "Resume the Deacon to allow patrol actions", Long: `Resume the Deacon so it can perform patrol actions again. This removes the pause file and allows the Deacon to work normally.`, RunE: runDeaconResume, } var ( triggerTimeout time.Duration // Health check flags healthCheckTimeout time.Duration healthCheckFailures int healthCheckCooldown time.Duration // Force kill flags forceKillReason string forceKillSkipNotify bool // Stale hooks flags staleHooksMaxAge time.Duration staleHooksDryRun bool // Pause flags pauseReason string ) func init() { deaconCmd.AddCommand(deaconStartCmd) deaconCmd.AddCommand(deaconStopCmd) deaconCmd.AddCommand(deaconAttachCmd) deaconCmd.AddCommand(deaconStatusCmd) deaconCmd.AddCommand(deaconRestartCmd) deaconCmd.AddCommand(deaconHeartbeatCmd) deaconCmd.AddCommand(deaconTriggerPendingCmd) deaconCmd.AddCommand(deaconHealthCheckCmd) deaconCmd.AddCommand(deaconForceKillCmd) deaconCmd.AddCommand(deaconHealthStateCmd) deaconCmd.AddCommand(deaconStaleHooksCmd) deaconCmd.AddCommand(deaconPauseCmd) deaconCmd.AddCommand(deaconResumeCmd) // Flags for trigger-pending deaconTriggerPendingCmd.Flags().DurationVar(&triggerTimeout, "timeout", 2*time.Second, "Timeout for checking if Claude is ready") // Flags for health-check deaconHealthCheckCmd.Flags().DurationVar(&healthCheckTimeout, "timeout", 30*time.Second, "How long to wait for agent response") deaconHealthCheckCmd.Flags().IntVar(&healthCheckFailures, "failures", 3, "Number of consecutive failures before recommending force-kill") deaconHealthCheckCmd.Flags().DurationVar(&healthCheckCooldown, "cooldown", 5*time.Minute, "Minimum time between force-kills of same agent") // Flags for force-kill deaconForceKillCmd.Flags().StringVar(&forceKillReason, "reason", "", "Reason for force-kill (included in notifications)") deaconForceKillCmd.Flags().BoolVar(&forceKillSkipNotify, "skip-notify", false, "Skip sending notification mail to mayor") // Flags for stale-hooks deaconStaleHooksCmd.Flags().DurationVar(&staleHooksMaxAge, "max-age", 1*time.Hour, "Maximum age before a hooked bead is considered stale") deaconStaleHooksCmd.Flags().BoolVar(&staleHooksDryRun, "dry-run", false, "Preview what would be unhooked without making changes") // Flags for pause deaconPauseCmd.Flags().StringVar(&pauseReason, "reason", "", "Reason for pausing the Deacon") deaconStartCmd.Flags().StringVar(&deaconAgentOverride, "agent", "", "Agent alias to run the Deacon with (overrides town default)") deaconAttachCmd.Flags().StringVar(&deaconAgentOverride, "agent", "", "Agent alias to run the Deacon with (overrides town default)") deaconRestartCmd.Flags().StringVar(&deaconAgentOverride, "agent", "", "Agent alias to run the Deacon with (overrides town default)") rootCmd.AddCommand(deaconCmd) } func runDeaconStart(cmd *cobra.Command, args []string) error { t := tmux.NewTmux() sessionName := getDeaconSessionName() // Check if session already exists running, err := t.HasSession(sessionName) if err != nil { return fmt.Errorf("checking session: %w", err) } if running { return fmt.Errorf("Deacon session already running. Attach with: gt deacon attach") } if err := startDeaconSession(t, sessionName, deaconAgentOverride); err != nil { return err } fmt.Printf("%s Deacon session started. Attach with: %s\n", style.Bold.Render("✓"), style.Dim.Render("gt deacon attach")) return nil } // startDeaconSession creates and initializes the Deacon tmux session. func startDeaconSession(t *tmux.Tmux, sessionName, agentOverride string) error { // Find workspace root townRoot, err := workspace.FindFromCwdOrError() if err != nil { return fmt.Errorf("not in a Gas Town workspace: %w", err) } // Deacon runs from its own directory (for correct role detection by gt prime) deaconDir := filepath.Join(townRoot, "deacon") // Ensure deacon directory exists if err := os.MkdirAll(deaconDir, 0755); err != nil { return fmt.Errorf("creating deacon directory: %w", err) } // Ensure Claude settings exist (autonomous role needs mail in SessionStart) if err := claude.EnsureSettingsForRole(deaconDir, "deacon"); err != nil { style.PrintWarning("Could not create deacon settings: %v", err) } // Create session in deacon directory fmt.Println("Starting Deacon session...") if err := t.NewSession(sessionName, deaconDir); err != nil { return fmt.Errorf("creating session: %w", err) } // Set environment (non-fatal: session works without these) _ = t.SetEnvironment(sessionName, "GT_ROLE", "deacon") _ = t.SetEnvironment(sessionName, "BD_ACTOR", "deacon") // Apply Deacon theme (non-fatal: theming failure doesn't affect operation) // Note: ConfigureGasTownSession includes cycle bindings theme := tmux.DeaconTheme() _ = t.ConfigureGasTownSession(sessionName, theme, "", "Deacon", "health-check") // Launch Claude directly (no shell respawn loop) // Restarts are handled by daemon via ensureDeaconRunning on each heartbeat // The startup hook handles context loading automatically // Export GT_ROLE and BD_ACTOR in the command since tmux SetEnvironment only affects new panes startupCmd, err := config.BuildAgentStartupCommandWithAgentOverride("deacon", "deacon", "", "", agentOverride) if err != nil { return fmt.Errorf("building startup command: %w", err) } if err := t.SendKeys(sessionName, startupCmd); err != nil { return fmt.Errorf("sending command: %w", err) } // Wait for Claude to start (non-fatal) if err := t.WaitForCommand(sessionName, constants.SupportedShells, constants.ClaudeStartTimeout); err != nil { // Non-fatal } time.Sleep(constants.ShutdownNotifyDelay) runtimeConfig := config.LoadRuntimeConfig("") _ = runtime.RunStartupFallback(t, sessionName, "deacon", runtimeConfig) // Inject startup nudge for predecessor discovery via /resume _ = session.StartupNudge(t, sessionName, session.StartupNudgeConfig{ Recipient: "deacon", Sender: "daemon", Topic: "patrol", }) // Non-fatal // GUPP: Gas Town Universal Propulsion Principle // Send the propulsion nudge to trigger autonomous patrol execution. // Wait for beacon to be fully processed (needs to be separate prompt) time.Sleep(2 * time.Second) _ = t.NudgeSession(sessionName, session.PropulsionNudgeForRole("deacon", deaconDir)) // Non-fatal return nil } func runDeaconStop(cmd *cobra.Command, args []string) error { t := tmux.NewTmux() sessionName := getDeaconSessionName() // Check if session exists running, err := t.HasSession(sessionName) if err != nil { return fmt.Errorf("checking session: %w", err) } if !running { return errors.New("Deacon session is not running") } fmt.Println("Stopping Deacon session...") // Try graceful shutdown first (best-effort interrupt) _ = t.SendKeysRaw(sessionName, "C-c") time.Sleep(100 * time.Millisecond) // Kill the session if err := t.KillSession(sessionName); err != nil { return fmt.Errorf("killing session: %w", err) } fmt.Printf("%s Deacon session stopped.\n", style.Bold.Render("✓")) return nil } func runDeaconAttach(cmd *cobra.Command, args []string) error { t := tmux.NewTmux() sessionName := getDeaconSessionName() // Check if session exists running, err := t.HasSession(sessionName) if err != nil { return fmt.Errorf("checking session: %w", err) } if !running { // Auto-start if not running fmt.Println("Deacon session not running, starting...") if err := startDeaconSession(t, sessionName, deaconAgentOverride); err != nil { return err } } // Session uses a respawn loop, so Claude restarts automatically if it exits // Use shared attach helper (smart: links if inside tmux, attaches if outside) return attachToTmuxSession(sessionName) } func runDeaconStatus(cmd *cobra.Command, args []string) error { t := tmux.NewTmux() sessionName := getDeaconSessionName() // Check pause state first (most important) townRoot, _ := workspace.FindFromCwdOrError() if townRoot != "" { paused, state, err := deacon.IsPaused(townRoot) if err == nil && paused { fmt.Printf("%s DEACON PAUSED\n", style.Bold.Render("⏸️")) if state.Reason != "" { fmt.Printf(" Reason: %s\n", state.Reason) } fmt.Printf(" Paused at: %s\n", state.PausedAt.Format(time.RFC3339)) fmt.Printf(" Paused by: %s\n", state.PausedBy) fmt.Println() fmt.Printf("Resume with: %s\n", style.Dim.Render("gt deacon resume")) fmt.Println() } } running, err := t.HasSession(sessionName) if err != nil { return fmt.Errorf("checking session: %w", err) } if running { // Get session info for more details info, err := t.GetSessionInfo(sessionName) if err == nil { status := "detached" if info.Attached { status = "attached" } fmt.Printf("%s Deacon session is %s\n", style.Bold.Render("●"), style.Bold.Render("running")) fmt.Printf(" Status: %s\n", status) fmt.Printf(" Created: %s\n", info.Created) fmt.Printf("\nAttach with: %s\n", style.Dim.Render("gt deacon attach")) } else { fmt.Printf("%s Deacon session is %s\n", style.Bold.Render("●"), style.Bold.Render("running")) } } else { fmt.Printf("%s Deacon session is %s\n", style.Dim.Render("○"), "not running") fmt.Printf("\nStart with: %s\n", style.Dim.Render("gt deacon start")) } return nil } func runDeaconRestart(cmd *cobra.Command, args []string) error { t := tmux.NewTmux() sessionName := getDeaconSessionName() running, err := t.HasSession(sessionName) if err != nil { return fmt.Errorf("checking session: %w", err) } fmt.Println("Restarting Deacon...") if running { // Kill existing session if err := t.KillSession(sessionName); err != nil { style.PrintWarning("failed to kill session: %v", err) } } // Start fresh if err := runDeaconStart(cmd, args); err != nil { return err } fmt.Printf("%s Deacon restarted\n", style.Bold.Render("✓")) fmt.Printf(" %s\n", style.Dim.Render("Use 'gt deacon attach' to connect")) return nil } func runDeaconHeartbeat(cmd *cobra.Command, args []string) error { townRoot, err := workspace.FindFromCwdOrError() if err != nil { return fmt.Errorf("not in a Gas Town workspace: %w", err) } // Check if Deacon is paused - if so, refuse to update heartbeat paused, state, err := deacon.IsPaused(townRoot) if err != nil { return fmt.Errorf("checking pause state: %w", err) } if paused { fmt.Printf("%s Deacon is paused. Use 'gt deacon resume' to unpause.\n", style.Bold.Render("⏸️")) if state.Reason != "" { fmt.Printf(" Reason: %s\n", state.Reason) } return errors.New("Deacon is paused") } action := "" if len(args) > 0 { action = strings.Join(args, " ") } if action != "" { if err := deacon.TouchWithAction(townRoot, action, 0, 0); err != nil { return fmt.Errorf("updating heartbeat: %w", err) } fmt.Printf("%s Heartbeat updated: %s\n", style.Bold.Render("✓"), action) } else { if err := deacon.Touch(townRoot); err != nil { return fmt.Errorf("updating heartbeat: %w", err) } fmt.Printf("%s Heartbeat updated\n", style.Bold.Render("✓")) } return nil } func runDeaconTriggerPending(cmd *cobra.Command, args []string) error { townRoot, err := workspace.FindFromCwdOrError() if err != nil { return fmt.Errorf("not in a Gas Town workspace: %w", err) } // Step 1: Check inbox for new POLECAT_STARTED messages pending, err := polecat.CheckInboxForSpawns(townRoot) if err != nil { return fmt.Errorf("checking inbox: %w", err) } if len(pending) == 0 { fmt.Printf("%s No pending spawns\n", style.Dim.Render("○")) return nil } fmt.Printf("%s Found %d pending spawn(s)\n", style.Bold.Render("●"), len(pending)) // Step 2: Try to trigger each pending spawn results, err := polecat.TriggerPendingSpawns(townRoot, triggerTimeout) if err != nil { return fmt.Errorf("triggering: %w", err) } // Report results triggered := 0 for _, r := range results { if r.Triggered { triggered++ fmt.Printf(" %s Triggered %s/%s\n", style.Bold.Render("✓"), r.Spawn.Rig, r.Spawn.Polecat) } else if r.Error != nil { fmt.Printf(" %s %s/%s: %v\n", style.Dim.Render("⚠"), r.Spawn.Rig, r.Spawn.Polecat, r.Error) } } // Step 3: Prune stale pending spawns (older than 5 minutes) pruned, _ := polecat.PruneStalePending(townRoot, 5*time.Minute) if pruned > 0 { fmt.Printf(" %s Pruned %d stale spawn(s)\n", style.Dim.Render("○"), pruned) } // Summary remaining := len(pending) - triggered if remaining > 0 { fmt.Printf("%s %d spawn(s) still waiting for Claude\n", style.Dim.Render("○"), remaining) } return nil } // runDeaconHealthCheck implements the health-check command. // It sends a HEALTH_CHECK nudge to an agent, waits for response, and tracks state. func runDeaconHealthCheck(cmd *cobra.Command, args []string) error { agent := args[0] townRoot, err := workspace.FindFromCwdOrError() if err != nil { return fmt.Errorf("not in a Gas Town workspace: %w", err) } // Load health check state state, err := deacon.LoadHealthCheckState(townRoot) if err != nil { return fmt.Errorf("loading health check state: %w", err) } agentState := state.GetAgentState(agent) // Check if agent is in cooldown if agentState.IsInCooldown(healthCheckCooldown) { remaining := agentState.CooldownRemaining(healthCheckCooldown) fmt.Printf("%s Agent %s is in cooldown (remaining: %s)\n", style.Dim.Render("○"), agent, remaining.Round(time.Second)) return nil } // Get agent bead info before ping (for baseline) beadID, sessionName, err := agentAddressToIDs(agent) if err != nil { return fmt.Errorf("invalid agent address: %w", err) } t := tmux.NewTmux() // Check if session exists exists, err := t.HasSession(sessionName) if err != nil { return fmt.Errorf("checking session: %w", err) } if !exists { fmt.Printf("%s Agent %s session not running\n", style.Dim.Render("○"), agent) return nil } // Get current bead update time baselineTime, err := getAgentBeadUpdateTime(townRoot, beadID) if err != nil { // Bead might not exist yet - that's okay baselineTime = time.Time{} } // Record ping agentState.RecordPing() // Send health check nudge if err := t.NudgeSession(sessionName, "HEALTH_CHECK: respond with any action to confirm responsiveness"); err != nil { return fmt.Errorf("sending nudge: %w", err) } fmt.Printf("%s Sent HEALTH_CHECK to %s, waiting %s...\n", style.Bold.Render("→"), agent, healthCheckTimeout) // Wait for response deadline := time.Now().Add(healthCheckTimeout) responded := false for time.Now().Before(deadline) { time.Sleep(2 * time.Second) // Check every 2 seconds newTime, err := getAgentBeadUpdateTime(townRoot, beadID) if err != nil { continue } // If bead was updated after our baseline, agent responded if newTime.After(baselineTime) { responded = true break } } // Record result if responded { agentState.RecordResponse() if err := deacon.SaveHealthCheckState(townRoot, state); err != nil { style.PrintWarning("failed to save health check state: %v", err) } fmt.Printf("%s Agent %s responded (failures reset to 0)\n", style.Bold.Render("✓"), agent) return nil } // No response - record failure agentState.RecordFailure() if err := deacon.SaveHealthCheckState(townRoot, state); err != nil { style.PrintWarning("failed to save health check state: %v", err) } fmt.Printf("%s Agent %s did not respond (consecutive failures: %d/%d)\n", style.Dim.Render("⚠"), agent, agentState.ConsecutiveFailures, healthCheckFailures) // Check if force-kill threshold reached if agentState.ShouldForceKill(healthCheckFailures) { fmt.Printf("%s Agent %s should be force-killed\n", style.Bold.Render("✗"), agent) os.Exit(2) // Exit code 2 = should force-kill } return nil } // runDeaconForceKill implements the force-kill command. // It kills a stuck agent session and updates its bead state. func runDeaconForceKill(cmd *cobra.Command, args []string) error { agent := args[0] townRoot, err := workspace.FindFromCwdOrError() if err != nil { return fmt.Errorf("not in a Gas Town workspace: %w", err) } // Load health check state state, err := deacon.LoadHealthCheckState(townRoot) if err != nil { return fmt.Errorf("loading health check state: %w", err) } agentState := state.GetAgentState(agent) // Check cooldown (unless bypassed) if agentState.IsInCooldown(healthCheckCooldown) { remaining := agentState.CooldownRemaining(healthCheckCooldown) return fmt.Errorf("agent %s is in cooldown (remaining: %s) - cannot force-kill yet", agent, remaining.Round(time.Second)) } // Get session name _, sessionName, err := agentAddressToIDs(agent) if err != nil { return fmt.Errorf("invalid agent address: %w", err) } t := tmux.NewTmux() // Check if session exists exists, err := t.HasSession(sessionName) if err != nil { return fmt.Errorf("checking session: %w", err) } if !exists { fmt.Printf("%s Agent %s session not running\n", style.Dim.Render("○"), agent) return nil } // Build reason reason := forceKillReason if reason == "" { reason = fmt.Sprintf("unresponsive after %d consecutive health check failures", agentState.ConsecutiveFailures) } // Step 1: Log the intervention (send mail to agent) fmt.Printf("%s Sending force-kill notification to %s...\n", style.Dim.Render("1."), agent) mailBody := fmt.Sprintf("Deacon detected %s as unresponsive.\nReason: %s\nAction: force-killing session", agent, reason) sendMail(townRoot, agent, "FORCE_KILL: unresponsive", mailBody) // Step 2: Kill the tmux session fmt.Printf("%s Killing tmux session %s...\n", style.Dim.Render("2."), sessionName) if err := t.KillSession(sessionName); err != nil { return fmt.Errorf("killing session: %w", err) } // Step 3: Update agent bead state (optional - best effort) fmt.Printf("%s Updating agent bead state to 'killed'...\n", style.Dim.Render("3.")) updateAgentBeadState(townRoot, agent, "killed", reason) // Step 4: Notify mayor (optional) if !forceKillSkipNotify { fmt.Printf("%s Notifying mayor...\n", style.Dim.Render("4.")) notifyBody := fmt.Sprintf("Agent %s was force-killed by Deacon.\nReason: %s", agent, reason) sendMail(townRoot, "mayor/", "Agent killed: "+agent, notifyBody) } // Record force-kill in state agentState.RecordForceKill() if err := deacon.SaveHealthCheckState(townRoot, state); err != nil { style.PrintWarning("failed to save health check state: %v", err) } fmt.Printf("%s Force-killed agent %s (total kills: %d)\n", style.Bold.Render("✓"), agent, agentState.ForceKillCount) fmt.Printf(" %s\n", style.Dim.Render("Agent is now 'asleep'. Use 'gt rig boot' to restart.")) return nil } // runDeaconHealthState shows the current health check state. func runDeaconHealthState(cmd *cobra.Command, args []string) error { townRoot, err := workspace.FindFromCwdOrError() if err != nil { return fmt.Errorf("not in a Gas Town workspace: %w", err) } state, err := deacon.LoadHealthCheckState(townRoot) if err != nil { return fmt.Errorf("loading health check state: %w", err) } if len(state.Agents) == 0 { fmt.Printf("%s No health check state recorded yet\n", style.Dim.Render("○")) return nil } fmt.Printf("%s Health Check State (updated %s)\n\n", style.Bold.Render("●"), state.LastUpdated.Format(time.RFC3339)) for agentID, agentState := range state.Agents { fmt.Printf("Agent: %s\n", style.Bold.Render(agentID)) if !agentState.LastPingTime.IsZero() { fmt.Printf(" Last ping: %s ago\n", time.Since(agentState.LastPingTime).Round(time.Second)) } if !agentState.LastResponseTime.IsZero() { fmt.Printf(" Last response: %s ago\n", time.Since(agentState.LastResponseTime).Round(time.Second)) } fmt.Printf(" Consecutive failures: %d\n", agentState.ConsecutiveFailures) fmt.Printf(" Total force-kills: %d\n", agentState.ForceKillCount) if !agentState.LastForceKillTime.IsZero() { fmt.Printf(" Last force-kill: %s ago\n", time.Since(agentState.LastForceKillTime).Round(time.Second)) if agentState.IsInCooldown(healthCheckCooldown) { remaining := agentState.CooldownRemaining(healthCheckCooldown) fmt.Printf(" Cooldown: %s remaining\n", remaining.Round(time.Second)) } } fmt.Println() } return nil } // agentAddressToIDs converts an agent address to bead ID and session name. // Supports formats: "gastown/polecats/max", "gastown/witness", "deacon", "mayor" // Note: Town-level agents (Mayor, Deacon) use hq- prefix bead IDs stored in town beads. func agentAddressToIDs(address string) (beadID, sessionName string, err error) { switch address { case "deacon": return beads.DeaconBeadIDTown(), session.DeaconSessionName(), nil case "mayor": return beads.MayorBeadIDTown(), session.MayorSessionName(), nil } parts := strings.Split(address, "/") switch len(parts) { case 2: // rig/role: "gastown/witness", "gastown/refinery" rig, role := parts[0], parts[1] switch role { case "witness": return fmt.Sprintf("gt-%s-witness", rig), fmt.Sprintf("gt-%s-witness", rig), nil case "refinery": return fmt.Sprintf("gt-%s-refinery", rig), fmt.Sprintf("gt-%s-refinery", rig), nil default: return "", "", fmt.Errorf("unknown role: %s", role) } case 3: // rig/type/name: "gastown/polecats/max", "gastown/crew/alpha" rig, agentType, name := parts[0], parts[1], parts[2] switch agentType { case "polecats": return fmt.Sprintf("gt-%s-polecat-%s", rig, name), fmt.Sprintf("gt-%s-%s", rig, name), nil case "crew": return fmt.Sprintf("gt-%s-crew-%s", rig, name), fmt.Sprintf("gt-%s-crew-%s", rig, name), nil default: return "", "", fmt.Errorf("unknown agent type: %s", agentType) } default: return "", "", fmt.Errorf("invalid agent address format: %s (expected rig/type/name or rig/role)", address) } } // getAgentBeadUpdateTime gets the update time from an agent bead. func getAgentBeadUpdateTime(townRoot, beadID string) (time.Time, error) { cmd := exec.Command("bd", "show", beadID, "--json") cmd.Dir = townRoot output, err := cmd.Output() if err != nil { return time.Time{}, err } var issues []struct { UpdatedAt string `json:"updated_at"` } if err := json.Unmarshal(output, &issues); err != nil { return time.Time{}, err } if len(issues) == 0 { return time.Time{}, fmt.Errorf("bead not found: %s", beadID) } return time.Parse(time.RFC3339, issues[0].UpdatedAt) } // sendMail sends a mail message using gt mail send. func sendMail(townRoot, to, subject, body string) { cmd := exec.Command("gt", "mail", "send", to, "-s", subject, "-m", body) cmd.Dir = townRoot _ = cmd.Run() // Best effort } // updateAgentBeadState updates an agent bead's state. func updateAgentBeadState(townRoot, agent, state, _ string) { // reason unused but kept for API consistency beadID, _, err := agentAddressToIDs(agent) if err != nil { return } // Use bd agent state command cmd := exec.Command("bd", "agent", "state", beadID, state) cmd.Dir = townRoot _ = cmd.Run() // Best effort } // runDeaconStaleHooks finds and unhooks stale hooked beads. func runDeaconStaleHooks(cmd *cobra.Command, args []string) error { townRoot, err := workspace.FindFromCwdOrError() if err != nil { return fmt.Errorf("not in a Gas Town workspace: %w", err) } cfg := &deacon.StaleHookConfig{ MaxAge: staleHooksMaxAge, DryRun: staleHooksDryRun, } result, err := deacon.ScanStaleHooks(townRoot, cfg) if err != nil { return fmt.Errorf("scanning stale hooks: %w", err) } // Print summary if result.TotalHooked == 0 { fmt.Printf("%s No hooked beads found\n", style.Dim.Render("○")) return nil } fmt.Printf("%s Found %d hooked bead(s), %d stale (older than %s)\n", style.Bold.Render("●"), result.TotalHooked, result.StaleCount, staleHooksMaxAge) if result.StaleCount == 0 { fmt.Printf("%s No stale hooked beads\n", style.Dim.Render("○")) return nil } // Print details for each stale bead for _, r := range result.Results { status := style.Dim.Render("○") action := "skipped (agent alive)" if !r.AgentAlive { if staleHooksDryRun { status = style.Bold.Render("?") action = "would unhook (agent dead)" } else if r.Unhooked { status = style.Bold.Render("✓") action = "unhooked (agent dead)" } else if r.Error != "" { status = style.Dim.Render("✗") action = fmt.Sprintf("error: %s", r.Error) } } fmt.Printf(" %s %s: %s (age: %s, assignee: %s)\n", status, r.BeadID, action, r.Age, r.Assignee) } // Summary if staleHooksDryRun { fmt.Printf("\n%s Dry run - no changes made. Run without --dry-run to unhook.\n", style.Dim.Render("ℹ")) } else if result.Unhooked > 0 { fmt.Printf("\n%s Unhooked %d stale bead(s)\n", style.Bold.Render("✓"), result.Unhooked) } return nil } // runDeaconPause pauses the Deacon to prevent patrol actions. func runDeaconPause(cmd *cobra.Command, args []string) error { townRoot, err := workspace.FindFromCwdOrError() if err != nil { return fmt.Errorf("not in a Gas Town workspace: %w", err) } // Check if already paused paused, state, err := deacon.IsPaused(townRoot) if err != nil { return fmt.Errorf("checking pause state: %w", err) } if paused { fmt.Printf("%s Deacon is already paused\n", style.Dim.Render("○")) fmt.Printf(" Reason: %s\n", state.Reason) fmt.Printf(" Paused at: %s\n", state.PausedAt.Format(time.RFC3339)) fmt.Printf(" Paused by: %s\n", state.PausedBy) return nil } // Pause the Deacon if err := deacon.Pause(townRoot, pauseReason, "human"); err != nil { return fmt.Errorf("pausing Deacon: %w", err) } fmt.Printf("%s Deacon paused\n", style.Bold.Render("⏸️")) if pauseReason != "" { fmt.Printf(" Reason: %s\n", pauseReason) } fmt.Printf(" Pause file: %s\n", deacon.GetPauseFile(townRoot)) fmt.Println() fmt.Printf("The Deacon will not perform any patrol actions until resumed.\n") fmt.Printf("Resume with: %s\n", style.Dim.Render("gt deacon resume")) return nil } // runDeaconResume resumes the Deacon to allow patrol actions. func runDeaconResume(cmd *cobra.Command, args []string) error { townRoot, err := workspace.FindFromCwdOrError() if err != nil { return fmt.Errorf("not in a Gas Town workspace: %w", err) } // Check if paused paused, _, err := deacon.IsPaused(townRoot) if err != nil { return fmt.Errorf("checking pause state: %w", err) } if !paused { fmt.Printf("%s Deacon is not paused\n", style.Dim.Render("○")) return nil } // Resume the Deacon if err := deacon.Resume(townRoot); err != nil { return fmt.Errorf("resuming Deacon: %w", err) } fmt.Printf("%s Deacon resumed\n", style.Bold.Render("▶️")) fmt.Println("The Deacon can now perform patrol actions.") return nil }