feat(witness): Implement Witness MVP for automated polecat lifecycle
Implements the core Witness functionality: - gt witness start: Creates tmux session with Claude, theming, auto-priming - gt witness stop: Kills tmux session and updates state - gt witness status: Shows session state reconciled with tmux - Shutdown handler: Verifies git clean state before cleanup, sends nudges - Auto-spawn: Spawns polecats for ready work up to configurable capacity - Health checks: Monitors polecat activity, nudges stuck workers, escalates Also updates handoff to include polecat name in lifecycle requests. Closes: gt-53w6, gt-mxyj, gt-5wtw, gt-cpm2, gt-es1i 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -321,6 +321,25 @@ Check gt mail inbox for messages received during transition.
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getPolecatName extracts the polecat name from the tmux session.
|
||||||
|
// Returns empty string if not a polecat session.
|
||||||
|
func getPolecatName() string {
|
||||||
|
out, err := exec.Command("tmux", "display-message", "-p", "#{session_name}").Output()
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
sessionName := strings.TrimSpace(string(out))
|
||||||
|
|
||||||
|
// Polecat sessions: gt-<rig>-<name>
|
||||||
|
if strings.HasPrefix(sessionName, "gt-") {
|
||||||
|
parts := strings.SplitN(sessionName, "-", 3)
|
||||||
|
if len(parts) >= 3 {
|
||||||
|
return parts[2] // The polecat name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
// sendLifecycleRequest sends the lifecycle request to our manager.
|
// sendLifecycleRequest sends the lifecycle request to our manager.
|
||||||
func sendLifecycleRequest(manager string, role Role, action HandoffAction, townRoot string) error {
|
func sendLifecycleRequest(manager string, role Role, action HandoffAction, townRoot string) error {
|
||||||
if manager == "human" {
|
if manager == "human" {
|
||||||
@@ -329,14 +348,21 @@ func sendLifecycleRequest(manager string, role Role, action HandoffAction, townR
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For polecats, include the specific name
|
||||||
|
polecatName := ""
|
||||||
|
if role == RolePolecat {
|
||||||
|
polecatName = getPolecatName()
|
||||||
|
}
|
||||||
|
|
||||||
subject := fmt.Sprintf("LIFECYCLE: %s requesting %s", role, action)
|
subject := fmt.Sprintf("LIFECYCLE: %s requesting %s", role, action)
|
||||||
body := fmt.Sprintf(`Lifecycle request from %s.
|
body := fmt.Sprintf(`Lifecycle request from %s.
|
||||||
|
|
||||||
Action: %s
|
Action: %s
|
||||||
Time: %s
|
Time: %s
|
||||||
|
Polecat: %s
|
||||||
|
|
||||||
Please verify state and execute lifecycle action.
|
Please verify state and execute lifecycle action.
|
||||||
`, role, action, time.Now().Format(time.RFC3339))
|
`, role, action, time.Now().Format(time.RFC3339), polecatName)
|
||||||
|
|
||||||
// Send via bd mail (syntax: bd mail send <recipient> -s <subject> -m <body>)
|
// Send via bd mail (syntax: bd mail send <recipient> -s <subject> -m <body>)
|
||||||
cmd := exec.Command("bd", "mail", "send", manager,
|
cmd := exec.Command("bd", "mail", "send", manager,
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
"github.com/steveyegge/gastown/internal/config"
|
"github.com/steveyegge/gastown/internal/config"
|
||||||
@@ -124,27 +125,42 @@ func getWitnessManager(rigName string) (*witness.Manager, *rig.Rig, error) {
|
|||||||
func runWitnessStart(cmd *cobra.Command, args []string) error {
|
func runWitnessStart(cmd *cobra.Command, args []string) error {
|
||||||
rigName := args[0]
|
rigName := args[0]
|
||||||
|
|
||||||
mgr, _, err := getWitnessManager(rigName)
|
mgr, r, err := getWitnessManager(rigName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("Starting witness for %s...\n", rigName)
|
fmt.Printf("Starting witness for %s...\n", rigName)
|
||||||
|
|
||||||
if err := mgr.Start(witnessForeground); err != nil {
|
|
||||||
if err == witness.ErrAlreadyRunning {
|
|
||||||
fmt.Printf("%s Witness is already running\n", style.Dim.Render("⚠"))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return fmt.Errorf("starting witness: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if witnessForeground {
|
if witnessForeground {
|
||||||
// This will block until stopped
|
// Foreground mode: run monitoring loop in current process (blocking)
|
||||||
|
if err := mgr.Start(true); err != nil {
|
||||||
|
if err == witness.ErrAlreadyRunning {
|
||||||
|
fmt.Printf("%s Witness is already running\n", style.Dim.Render("⚠"))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf("starting witness: %w", err)
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Background mode: create tmux session with Claude
|
||||||
|
created, err := ensureWitnessSession(rigName, r)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if !created {
|
||||||
|
fmt.Printf("%s Witness session already running\n", style.Dim.Render("⚠"))
|
||||||
|
fmt.Printf(" %s\n", style.Dim.Render("Use 'gt witness attach' to connect"))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update manager state to reflect running session
|
||||||
|
_ = mgr.Start(false) // Mark as running in state file
|
||||||
|
|
||||||
fmt.Printf("%s Witness started for %s\n", style.Bold.Render("✓"), rigName)
|
fmt.Printf("%s Witness started for %s\n", style.Bold.Render("✓"), rigName)
|
||||||
|
fmt.Printf(" %s\n", style.Dim.Render("Use 'gt witness attach' to connect"))
|
||||||
fmt.Printf(" %s\n", style.Dim.Render("Use 'gt witness status' to check progress"))
|
fmt.Printf(" %s\n", style.Dim.Render("Use 'gt witness status' to check progress"))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -157,12 +173,26 @@ func runWitnessStop(cmd *cobra.Command, args []string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Kill tmux session if it exists
|
||||||
|
t := tmux.NewTmux()
|
||||||
|
sessionName := witnessSessionName(rigName)
|
||||||
|
running, _ := t.HasSession(sessionName)
|
||||||
|
if running {
|
||||||
|
if err := t.KillSession(sessionName); err != nil {
|
||||||
|
fmt.Printf("%s Warning: failed to kill session: %v\n", style.Dim.Render("⚠"), err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update state file
|
||||||
if err := mgr.Stop(); err != nil {
|
if err := mgr.Stop(); err != nil {
|
||||||
if err == witness.ErrNotRunning {
|
if err == witness.ErrNotRunning && !running {
|
||||||
fmt.Printf("%s Witness is not running\n", style.Dim.Render("⚠"))
|
fmt.Printf("%s Witness is not running\n", style.Dim.Render("⚠"))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return fmt.Errorf("stopping witness: %w", err)
|
// Even if manager.Stop fails, if we killed the session it's stopped
|
||||||
|
if !running {
|
||||||
|
return fmt.Errorf("stopping witness: %w", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("%s Witness stopped for %s\n", style.Bold.Render("✓"), rigName)
|
fmt.Printf("%s Witness stopped for %s\n", style.Bold.Render("✓"), rigName)
|
||||||
@@ -182,6 +212,18 @@ func runWitnessStatus(cmd *cobra.Command, args []string) error {
|
|||||||
return fmt.Errorf("getting status: %w", err)
|
return fmt.Errorf("getting status: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check actual tmux session state (more reliable than state file)
|
||||||
|
t := tmux.NewTmux()
|
||||||
|
sessionName := witnessSessionName(rigName)
|
||||||
|
sessionRunning, _ := t.HasSession(sessionName)
|
||||||
|
|
||||||
|
// Reconcile state: tmux session is the source of truth for background mode
|
||||||
|
if sessionRunning && w.State != witness.StateRunning {
|
||||||
|
w.State = witness.StateRunning
|
||||||
|
} else if !sessionRunning && w.State == witness.StateRunning {
|
||||||
|
w.State = witness.StateStopped
|
||||||
|
}
|
||||||
|
|
||||||
// JSON output
|
// JSON output
|
||||||
if witnessStatusJSON {
|
if witnessStatusJSON {
|
||||||
enc := json.NewEncoder(os.Stdout)
|
enc := json.NewEncoder(os.Stdout)
|
||||||
@@ -202,6 +244,9 @@ func runWitnessStatus(cmd *cobra.Command, args []string) error {
|
|||||||
stateStr = style.Dim.Render("⏸ paused")
|
stateStr = style.Dim.Render("⏸ paused")
|
||||||
}
|
}
|
||||||
fmt.Printf(" State: %s\n", stateStr)
|
fmt.Printf(" State: %s\n", stateStr)
|
||||||
|
if sessionRunning {
|
||||||
|
fmt.Printf(" Session: %s\n", sessionName)
|
||||||
|
}
|
||||||
|
|
||||||
if w.StartedAt != nil {
|
if w.StartedAt != nil {
|
||||||
fmt.Printf(" Started: %s\n", w.StartedAt.Format("2006-01-02 15:04:05"))
|
fmt.Printf(" Started: %s\n", w.StartedAt.Format("2006-01-02 15:04:05"))
|
||||||
@@ -236,6 +281,52 @@ func witnessSessionName(rigName string) string {
|
|||||||
return fmt.Sprintf("gt-witness-%s", rigName)
|
return fmt.Sprintf("gt-witness-%s", rigName)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ensureWitnessSession creates a witness tmux session if it doesn't exist.
|
||||||
|
// Returns true if a new session was created, false if it already existed.
|
||||||
|
func ensureWitnessSession(rigName string, r *rig.Rig) (bool, error) {
|
||||||
|
t := tmux.NewTmux()
|
||||||
|
sessionName := witnessSessionName(rigName)
|
||||||
|
|
||||||
|
// Check if session already exists
|
||||||
|
running, err := t.HasSession(sessionName)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("checking session: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if running {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create new tmux session
|
||||||
|
if err := t.NewSession(sessionName, r.Path); err != nil {
|
||||||
|
return false, fmt.Errorf("creating session: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set environment
|
||||||
|
t.SetEnvironment(sessionName, "GT_ROLE", "witness")
|
||||||
|
t.SetEnvironment(sessionName, "GT_RIG", rigName)
|
||||||
|
|
||||||
|
// Apply Gas Town theming
|
||||||
|
theme := tmux.AssignTheme(rigName)
|
||||||
|
_ = t.ConfigureGasTownSession(sessionName, theme, rigName, "witness", "witness")
|
||||||
|
|
||||||
|
// Launch Claude in a respawn loop
|
||||||
|
loopCmd := `while true; do echo "👁️ Starting Witness for ` + rigName + `..."; claude --dangerously-skip-permissions; echo ""; echo "Witness exited. Restarting in 2s... (Ctrl-C to stop)"; sleep 2; done`
|
||||||
|
if err := t.SendKeysDelayed(sessionName, loopCmd, 200); err != nil {
|
||||||
|
return false, fmt.Errorf("sending command: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait briefly then send gt prime to initialize context
|
||||||
|
// This runs after Claude starts up in the respawn loop
|
||||||
|
time.Sleep(3 * time.Second)
|
||||||
|
if err := t.SendKeys(sessionName, "gt prime"); err != nil {
|
||||||
|
// Non-fatal - Claude will still work, just without auto-priming
|
||||||
|
fmt.Printf("Warning: failed to send gt prime: %v\n", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
|
||||||
func runWitnessAttach(cmd *cobra.Command, args []string) error {
|
func runWitnessAttach(cmd *cobra.Command, args []string) error {
|
||||||
rigName := args[0]
|
rigName := args[0]
|
||||||
|
|
||||||
@@ -245,42 +336,16 @@ func runWitnessAttach(cmd *cobra.Command, args []string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
t := tmux.NewTmux()
|
|
||||||
sessionName := witnessSessionName(rigName)
|
sessionName := witnessSessionName(rigName)
|
||||||
|
|
||||||
// Check if session exists
|
// Ensure session exists (creates if needed)
|
||||||
running, err := t.HasSession(sessionName)
|
created, err := ensureWitnessSession(rigName, r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("checking session: %w", err)
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Witness working directory - use <rig>/witness/ for proper role detection
|
if created {
|
||||||
witnessDir := filepath.Join(r.Path, "witness")
|
fmt.Printf("Started witness session for %s\n", rigName)
|
||||||
if err := os.MkdirAll(witnessDir, 0755); err != nil {
|
|
||||||
return fmt.Errorf("creating witness directory: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !running {
|
|
||||||
// Start witness session (like Mayor)
|
|
||||||
fmt.Printf("Starting witness session for %s...\n", rigName)
|
|
||||||
|
|
||||||
if err := t.NewSession(sessionName, witnessDir); err != nil {
|
|
||||||
return fmt.Errorf("creating session: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set environment
|
|
||||||
t.SetEnvironment(sessionName, "GT_ROLE", "witness")
|
|
||||||
t.SetEnvironment(sessionName, "GT_RIG", rigName)
|
|
||||||
|
|
||||||
// Apply theme (same as rig polecats)
|
|
||||||
theme := tmux.AssignTheme(rigName)
|
|
||||||
_ = t.ConfigureGasTownSession(sessionName, theme, rigName, "witness", "witness")
|
|
||||||
|
|
||||||
// Launch Claude in a respawn loop
|
|
||||||
loopCmd := `while true; do echo "👁️ Starting Witness for ` + rigName + `..."; claude --dangerously-skip-permissions; echo ""; echo "Witness exited. Restarting in 2s... (Ctrl-C to stop)"; sleep 2; done`
|
|
||||||
if err := t.SendKeysDelayed(sessionName, loopCmd, 200); err != nil {
|
|
||||||
return fmt.Errorf("sending command: %w", err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Attach to the session
|
// Attach to the session
|
||||||
|
|||||||
@@ -177,7 +177,7 @@ func (m *Manager) run(w *Witness) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkAndProcess performs health check and processes shutdown requests.
|
// checkAndProcess performs health check, shutdown processing, and auto-spawn.
|
||||||
func (m *Manager) checkAndProcess(w *Witness) {
|
func (m *Manager) checkAndProcess(w *Witness) {
|
||||||
// Perform health check
|
// Perform health check
|
||||||
if err := m.healthCheck(w); err != nil {
|
if err := m.healthCheck(w); err != nil {
|
||||||
@@ -188,6 +188,13 @@ func (m *Manager) checkAndProcess(w *Witness) {
|
|||||||
if err := m.processShutdownRequests(w); err != nil {
|
if err := m.processShutdownRequests(w); err != nil {
|
||||||
fmt.Printf("Shutdown request error: %v\n", err)
|
fmt.Printf("Shutdown request error: %v\n", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Auto-spawn for ready work (if enabled)
|
||||||
|
if w.Config.AutoSpawn {
|
||||||
|
if err := m.autoSpawnForReadyWork(w); err != nil {
|
||||||
|
fmt.Printf("Auto-spawn error: %v\n", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// healthCheck performs a health check on all monitored polecats.
|
// healthCheck performs a health check on all monitored polecats.
|
||||||
@@ -197,9 +204,184 @@ func (m *Manager) healthCheck(w *Witness) error {
|
|||||||
w.Stats.TotalChecks++
|
w.Stats.TotalChecks++
|
||||||
w.Stats.TodayChecks++
|
w.Stats.TodayChecks++
|
||||||
|
|
||||||
|
// List polecats
|
||||||
|
polecatMgr := polecat.NewManager(m.rig, git.NewGit(m.rig.Path))
|
||||||
|
polecats, err := polecatMgr.List()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("listing polecats: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
t := tmux.NewTmux()
|
||||||
|
sessMgr := session.NewManager(t, m.rig)
|
||||||
|
|
||||||
|
// Update monitored polecats list
|
||||||
|
var active []string
|
||||||
|
for _, p := range polecats {
|
||||||
|
running, _ := sessMgr.IsRunning(p.Name)
|
||||||
|
if running {
|
||||||
|
active = append(active, p.Name)
|
||||||
|
|
||||||
|
// Check health of each active polecat
|
||||||
|
status := m.checkPolecatHealth(p.Name, p.ClonePath)
|
||||||
|
if status == PolecatStuck {
|
||||||
|
m.handleStuckPolecat(w, p.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
w.MonitoredPolecats = active
|
||||||
|
|
||||||
return m.saveState(w)
|
return m.saveState(w)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PolecatHealthStatus represents the health status of a polecat.
|
||||||
|
type PolecatHealthStatus int
|
||||||
|
|
||||||
|
const (
|
||||||
|
// PolecatHealthy means the polecat is working normally.
|
||||||
|
PolecatHealthy PolecatHealthStatus = iota
|
||||||
|
// PolecatStuck means the polecat has no recent activity.
|
||||||
|
PolecatStuck
|
||||||
|
// PolecatDead means the polecat session is not responding.
|
||||||
|
PolecatDead
|
||||||
|
)
|
||||||
|
|
||||||
|
// StuckThresholdMinutes is the default time without activity before a polecat is considered stuck.
|
||||||
|
const StuckThresholdMinutes = 30
|
||||||
|
|
||||||
|
// checkPolecatHealth checks if a polecat is healthy based on recent activity.
|
||||||
|
func (m *Manager) checkPolecatHealth(name, path string) PolecatHealthStatus {
|
||||||
|
threshold := time.Duration(StuckThresholdMinutes) * time.Minute
|
||||||
|
|
||||||
|
// Check 1: Git activity (most reliable indicator of work)
|
||||||
|
gitPath := filepath.Join(path, ".git")
|
||||||
|
if info, err := os.Stat(gitPath); err == nil {
|
||||||
|
if time.Since(info.ModTime()) < threshold {
|
||||||
|
return PolecatHealthy
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check 2: State file activity
|
||||||
|
stateFile := filepath.Join(path, ".gastown", "state.json")
|
||||||
|
if info, err := os.Stat(stateFile); err == nil {
|
||||||
|
if time.Since(info.ModTime()) < threshold {
|
||||||
|
return PolecatHealthy
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check 3: Any file modification in the polecat directory
|
||||||
|
latestMod := m.getLatestModTime(path)
|
||||||
|
if !latestMod.IsZero() && time.Since(latestMod) < threshold {
|
||||||
|
return PolecatHealthy
|
||||||
|
}
|
||||||
|
|
||||||
|
return PolecatStuck
|
||||||
|
}
|
||||||
|
|
||||||
|
// getLatestModTime finds the most recent modification time in a directory.
|
||||||
|
func (m *Manager) getLatestModTime(dir string) time.Time {
|
||||||
|
var latest time.Time
|
||||||
|
|
||||||
|
// Quick check: just look at a few key locations
|
||||||
|
locations := []string{
|
||||||
|
filepath.Join(dir, ".git", "logs", "HEAD"),
|
||||||
|
filepath.Join(dir, ".git", "index"),
|
||||||
|
filepath.Join(dir, ".beads", "issues.jsonl"),
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, loc := range locations {
|
||||||
|
if info, err := os.Stat(loc); err == nil {
|
||||||
|
if info.ModTime().After(latest) {
|
||||||
|
latest = info.ModTime()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return latest
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleStuckPolecat handles a polecat that appears to be stuck.
|
||||||
|
func (m *Manager) handleStuckPolecat(w *Witness, polecatName string) {
|
||||||
|
fmt.Printf("Polecat %s appears stuck (no activity for %d minutes)\n",
|
||||||
|
polecatName, StuckThresholdMinutes)
|
||||||
|
|
||||||
|
// Check nudge history for this polecat
|
||||||
|
nudgeCount := m.getNudgeCount(w, polecatName)
|
||||||
|
|
||||||
|
if nudgeCount == 0 {
|
||||||
|
// First stuck detection: send a nudge
|
||||||
|
fmt.Printf(" Sending nudge to %s...\n", polecatName)
|
||||||
|
if err := m.sendNudge(polecatName, "No activity detected. Are you still working?"); err != nil {
|
||||||
|
fmt.Printf(" Warning: failed to send nudge: %v\n", err)
|
||||||
|
}
|
||||||
|
m.recordNudge(w, polecatName)
|
||||||
|
w.Stats.TotalNudges++
|
||||||
|
w.Stats.TodayNudges++
|
||||||
|
} else if nudgeCount == 1 {
|
||||||
|
// Second stuck detection: escalate to Mayor
|
||||||
|
fmt.Printf(" Escalating %s to Mayor (no response to nudge)...\n", polecatName)
|
||||||
|
if err := m.escalateToMayor(polecatName); err != nil {
|
||||||
|
fmt.Printf(" Warning: failed to escalate: %v\n", err)
|
||||||
|
}
|
||||||
|
w.Stats.TotalEscalations++
|
||||||
|
m.recordNudge(w, polecatName)
|
||||||
|
} else {
|
||||||
|
// Third+ stuck detection: log but wait for human confirmation
|
||||||
|
fmt.Printf(" %s still stuck (waiting for human intervention)\n", polecatName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// getNudgeCount returns how many times a polecat has been nudged.
|
||||||
|
func (m *Manager) getNudgeCount(w *Witness, polecatName string) int {
|
||||||
|
// Count occurrences in SpawnedIssues that start with "nudge:" prefix
|
||||||
|
// We reuse SpawnedIssues to track nudges with a "nudge:<name>" pattern
|
||||||
|
count := 0
|
||||||
|
nudgeKey := "nudge:" + polecatName
|
||||||
|
for _, entry := range w.SpawnedIssues {
|
||||||
|
if entry == nudgeKey {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count
|
||||||
|
}
|
||||||
|
|
||||||
|
// recordNudge records that a nudge was sent to a polecat.
|
||||||
|
func (m *Manager) recordNudge(w *Witness, polecatName string) {
|
||||||
|
nudgeKey := "nudge:" + polecatName
|
||||||
|
w.SpawnedIssues = append(w.SpawnedIssues, nudgeKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
// escalateToMayor sends an escalation message to the Mayor.
|
||||||
|
func (m *Manager) escalateToMayor(polecatName string) error {
|
||||||
|
subject := fmt.Sprintf("ESCALATION: Polecat %s stuck", polecatName)
|
||||||
|
body := fmt.Sprintf(`Polecat %s in rig %s appears stuck.
|
||||||
|
|
||||||
|
This polecat has been unresponsive for over %d minutes despite nudging.
|
||||||
|
|
||||||
|
Recommended actions:
|
||||||
|
1. Check 'gt session attach %s/%s' to see current state
|
||||||
|
2. If truly stuck, run 'gt session stop %s/%s' to kill the session
|
||||||
|
3. Investigate root cause
|
||||||
|
|
||||||
|
Rig: %s
|
||||||
|
Time: %s
|
||||||
|
`, polecatName, m.rig.Name, StuckThresholdMinutes*2,
|
||||||
|
m.rig.Name, polecatName,
|
||||||
|
m.rig.Name, polecatName,
|
||||||
|
m.rig.Name, time.Now().Format(time.RFC3339))
|
||||||
|
|
||||||
|
cmd := exec.Command("bd", "mail", "send", "mayor/",
|
||||||
|
"-s", subject,
|
||||||
|
"-m", body,
|
||||||
|
)
|
||||||
|
cmd.Dir = m.workDir
|
||||||
|
|
||||||
|
if out, err := cmd.CombinedOutput(); err != nil {
|
||||||
|
return fmt.Errorf("%w: %s", err, string(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// processShutdownRequests checks mail for lifecycle requests and handles them.
|
// processShutdownRequests checks mail for lifecycle requests and handles them.
|
||||||
func (m *Manager) processShutdownRequests(w *Witness) error {
|
func (m *Manager) processShutdownRequests(w *Witness) error {
|
||||||
// Get witness mailbox via bd mail inbox
|
// Get witness mailbox via bd mail inbox
|
||||||
@@ -223,6 +405,19 @@ func (m *Manager) processShutdownRequests(w *Witness) error {
|
|||||||
|
|
||||||
fmt.Printf(" Polecat: %s\n", polecatName)
|
fmt.Printf(" Polecat: %s\n", polecatName)
|
||||||
|
|
||||||
|
// Verify polecat state before cleanup
|
||||||
|
if err := m.verifyPolecatState(polecatName); err != nil {
|
||||||
|
fmt.Printf(" Verification failed: %v\n", err)
|
||||||
|
|
||||||
|
// Send nudge to polecat
|
||||||
|
if err := m.sendNudge(polecatName, err.Error()); err != nil {
|
||||||
|
fmt.Printf(" Warning: failed to send nudge: %v\n", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't ack message - will retry on next check
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// Perform cleanup
|
// Perform cleanup
|
||||||
if err := m.cleanupPolecat(polecatName); err != nil {
|
if err := m.cleanupPolecat(polecatName); err != nil {
|
||||||
fmt.Printf(" Cleanup error: %v\n", err)
|
fmt.Printf(" Cleanup error: %v\n", err)
|
||||||
@@ -240,6 +435,63 @@ func (m *Manager) processShutdownRequests(w *Witness) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// verifyPolecatState checks that a polecat is safe to clean up.
|
||||||
|
func (m *Manager) verifyPolecatState(polecatName string) error {
|
||||||
|
polecatPath := filepath.Join(m.rig.Path, "polecats", polecatName)
|
||||||
|
|
||||||
|
// Check if polecat directory exists
|
||||||
|
if _, err := os.Stat(polecatPath); os.IsNotExist(err) {
|
||||||
|
// Already cleaned up, that's fine
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1. Check git status is clean
|
||||||
|
polecatGit := git.NewGit(polecatPath)
|
||||||
|
status, err := polecatGit.Status()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("checking git status: %w", err)
|
||||||
|
}
|
||||||
|
if !status.Clean {
|
||||||
|
return fmt.Errorf("git working tree is not clean")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: beads changes would be reflected in git status above,
|
||||||
|
// since beads files are tracked in git.
|
||||||
|
|
||||||
|
// Note: MR submission is now done automatically by polecat's handoff command,
|
||||||
|
// so we don't need to verify it here - the polecat wouldn't have requested
|
||||||
|
// shutdown if that step failed
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// sendNudge sends a message to a polecat asking it to fix its state.
|
||||||
|
func (m *Manager) sendNudge(polecatName, reason string) error {
|
||||||
|
subject := fmt.Sprintf("NUDGE: Cannot shutdown - %s", reason)
|
||||||
|
body := fmt.Sprintf(`Your shutdown request was denied because: %s
|
||||||
|
|
||||||
|
Please fix the issue and run 'gt handoff' again.
|
||||||
|
|
||||||
|
Polecat: %s
|
||||||
|
Rig: %s
|
||||||
|
Time: %s
|
||||||
|
`, reason, polecatName, m.rig.Name, time.Now().Format(time.RFC3339))
|
||||||
|
|
||||||
|
// Send via bd mail
|
||||||
|
recipient := fmt.Sprintf("%s/%s", m.rig.Name, polecatName)
|
||||||
|
cmd := exec.Command("bd", "mail", "send", recipient,
|
||||||
|
"-s", subject,
|
||||||
|
"-m", body,
|
||||||
|
)
|
||||||
|
cmd.Dir = m.workDir
|
||||||
|
|
||||||
|
if out, err := cmd.CombinedOutput(); err != nil {
|
||||||
|
return fmt.Errorf("%w: %s", err, string(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// WitnessMessage represents a mail message for the witness.
|
// WitnessMessage represents a mail message for the witness.
|
||||||
type WitnessMessage struct {
|
type WitnessMessage struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
@@ -356,3 +608,172 @@ func processExists(pid int) bool {
|
|||||||
err = proc.Signal(nil)
|
err = proc.Signal(nil)
|
||||||
return err == nil
|
return err == nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ReadyIssue represents an issue from bd ready --json output.
|
||||||
|
type ReadyIssue struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Type string `json:"issue_type"`
|
||||||
|
Status string `json:"status"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// autoSpawnForReadyWork spawns polecats for ready work up to capacity.
|
||||||
|
func (m *Manager) autoSpawnForReadyWork(w *Witness) error {
|
||||||
|
// Get current active polecat count
|
||||||
|
activeCount, err := m.getActivePolecatCount()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("counting polecats: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
maxWorkers := w.Config.MaxWorkers
|
||||||
|
if maxWorkers <= 0 {
|
||||||
|
maxWorkers = 4 // Default
|
||||||
|
}
|
||||||
|
|
||||||
|
if activeCount >= maxWorkers {
|
||||||
|
// At capacity, nothing to do
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get ready issues
|
||||||
|
issues, err := m.getReadyIssues()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("getting ready issues: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter issues (exclude merge-requests, epics, and already-spawned issues)
|
||||||
|
var spawnableIssues []ReadyIssue
|
||||||
|
for _, issue := range issues {
|
||||||
|
// Skip merge-requests and epics
|
||||||
|
if issue.Type == "merge-request" || issue.Type == "epic" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip if already spawned
|
||||||
|
if m.isAlreadySpawned(w, issue.ID) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter by epic if configured
|
||||||
|
if w.Config.EpicID != "" {
|
||||||
|
// TODO: Check if issue is a child of the configured epic
|
||||||
|
// For now, we skip this filter
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter by prefix if configured
|
||||||
|
if w.Config.IssuePrefix != "" {
|
||||||
|
if !strings.HasPrefix(issue.ID, w.Config.IssuePrefix) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
spawnableIssues = append(spawnableIssues, issue)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Spawn up to capacity
|
||||||
|
spawnDelay := w.Config.SpawnDelayMs
|
||||||
|
if spawnDelay <= 0 {
|
||||||
|
spawnDelay = 5000 // Default 5 seconds
|
||||||
|
}
|
||||||
|
|
||||||
|
spawned := 0
|
||||||
|
for _, issue := range spawnableIssues {
|
||||||
|
if activeCount+spawned >= maxWorkers {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Auto-spawning for issue %s: %s\n", issue.ID, issue.Title)
|
||||||
|
|
||||||
|
if err := m.spawnPolecat(issue.ID); err != nil {
|
||||||
|
fmt.Printf(" Spawn failed: %v\n", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track that we spawned for this issue
|
||||||
|
w.SpawnedIssues = append(w.SpawnedIssues, issue.ID)
|
||||||
|
spawned++
|
||||||
|
|
||||||
|
// Delay between spawns
|
||||||
|
if spawned < len(spawnableIssues) && activeCount+spawned < maxWorkers {
|
||||||
|
time.Sleep(time.Duration(spawnDelay) * time.Millisecond)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if spawned > 0 {
|
||||||
|
// Save state to persist spawned issues list
|
||||||
|
return m.saveState(w)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// getActivePolecatCount returns the number of polecats with active tmux sessions.
|
||||||
|
func (m *Manager) getActivePolecatCount() (int, error) {
|
||||||
|
polecatMgr := polecat.NewManager(m.rig, git.NewGit(m.rig.Path))
|
||||||
|
polecats, err := polecatMgr.List()
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
t := tmux.NewTmux()
|
||||||
|
sessMgr := session.NewManager(t, m.rig)
|
||||||
|
|
||||||
|
count := 0
|
||||||
|
for _, p := range polecats {
|
||||||
|
running, _ := sessMgr.IsRunning(p.Name)
|
||||||
|
if running {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return count, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// getReadyIssues returns issues ready to work (no blockers).
|
||||||
|
func (m *Manager) getReadyIssues() ([]ReadyIssue, error) {
|
||||||
|
cmd := exec.Command("bd", "ready", "--json")
|
||||||
|
cmd.Dir = m.workDir
|
||||||
|
|
||||||
|
var stdout, stderr bytes.Buffer
|
||||||
|
cmd.Stdout = &stdout
|
||||||
|
cmd.Stderr = &stderr
|
||||||
|
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
return nil, fmt.Errorf("%s", stderr.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
if stdout.Len() == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var issues []ReadyIssue
|
||||||
|
if err := json.Unmarshal(stdout.Bytes(), &issues); err != nil {
|
||||||
|
return nil, fmt.Errorf("parsing ready issues: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return issues, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isAlreadySpawned checks if an issue has already been spawned.
|
||||||
|
func (m *Manager) isAlreadySpawned(w *Witness, issueID string) bool {
|
||||||
|
for _, id := range w.SpawnedIssues {
|
||||||
|
if id == issueID {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// spawnPolecat spawns a polecat for an issue using gt spawn.
|
||||||
|
func (m *Manager) spawnPolecat(issueID string) error {
|
||||||
|
cmd := exec.Command("gt", "spawn", "--rig", m.rig.Name, "--issue", issueID)
|
||||||
|
cmd.Dir = m.workDir
|
||||||
|
|
||||||
|
output, err := cmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("%s", strings.TrimSpace(string(output)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf(" Spawned: %s\n", strings.TrimSpace(string(output)))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -41,6 +41,30 @@ type Witness struct {
|
|||||||
|
|
||||||
// Stats contains cumulative statistics.
|
// Stats contains cumulative statistics.
|
||||||
Stats WitnessStats `json:"stats"`
|
Stats WitnessStats `json:"stats"`
|
||||||
|
|
||||||
|
// Config contains auto-spawn configuration.
|
||||||
|
Config WitnessConfig `json:"config"`
|
||||||
|
|
||||||
|
// SpawnedIssues tracks which issues have been spawned (to avoid duplicates).
|
||||||
|
SpawnedIssues []string `json:"spawned_issues,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// WitnessConfig contains configuration for the witness.
|
||||||
|
type WitnessConfig struct {
|
||||||
|
// MaxWorkers is the maximum number of concurrent polecats (default: 4).
|
||||||
|
MaxWorkers int `json:"max_workers"`
|
||||||
|
|
||||||
|
// SpawnDelayMs is the delay between spawns in milliseconds (default: 5000).
|
||||||
|
SpawnDelayMs int `json:"spawn_delay_ms"`
|
||||||
|
|
||||||
|
// AutoSpawn enables automatic spawning for ready issues (default: true).
|
||||||
|
AutoSpawn bool `json:"auto_spawn"`
|
||||||
|
|
||||||
|
// EpicID limits spawning to children of this epic (optional).
|
||||||
|
EpicID string `json:"epic_id,omitempty"`
|
||||||
|
|
||||||
|
// IssuePrefix limits spawning to issues with this prefix (optional).
|
||||||
|
IssuePrefix string `json:"issue_prefix,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// WitnessStats contains cumulative witness statistics.
|
// WitnessStats contains cumulative witness statistics.
|
||||||
|
|||||||
Reference in New Issue
Block a user