feat(witness): Implement Witness MVP for automated polecat lifecycle
Implements the core Witness functionality: - gt witness start: Creates tmux session with Claude, theming, auto-priming - gt witness stop: Kills tmux session and updates state - gt witness status: Shows session state reconciled with tmux - Shutdown handler: Verifies git clean state before cleanup, sends nudges - Auto-spawn: Spawns polecats for ready work up to configurable capacity - Health checks: Monitors polecat activity, nudges stuck workers, escalates Also updates handoff to include polecat name in lifecycle requests. Closes: gt-53w6, gt-mxyj, gt-5wtw, gt-cpm2, gt-es1i 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -321,6 +321,25 @@ Check gt mail inbox for messages received during transition.
|
||||
return nil
|
||||
}
|
||||
|
||||
// getPolecatName extracts the polecat name from the tmux session.
|
||||
// Returns empty string if not a polecat session.
|
||||
func getPolecatName() string {
|
||||
out, err := exec.Command("tmux", "display-message", "-p", "#{session_name}").Output()
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
sessionName := strings.TrimSpace(string(out))
|
||||
|
||||
// Polecat sessions: gt-<rig>-<name>
|
||||
if strings.HasPrefix(sessionName, "gt-") {
|
||||
parts := strings.SplitN(sessionName, "-", 3)
|
||||
if len(parts) >= 3 {
|
||||
return parts[2] // The polecat name
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// sendLifecycleRequest sends the lifecycle request to our manager.
|
||||
func sendLifecycleRequest(manager string, role Role, action HandoffAction, townRoot string) error {
|
||||
if manager == "human" {
|
||||
@@ -329,14 +348,21 @@ func sendLifecycleRequest(manager string, role Role, action HandoffAction, townR
|
||||
return nil
|
||||
}
|
||||
|
||||
// For polecats, include the specific name
|
||||
polecatName := ""
|
||||
if role == RolePolecat {
|
||||
polecatName = getPolecatName()
|
||||
}
|
||||
|
||||
subject := fmt.Sprintf("LIFECYCLE: %s requesting %s", role, action)
|
||||
body := fmt.Sprintf(`Lifecycle request from %s.
|
||||
|
||||
Action: %s
|
||||
Time: %s
|
||||
Polecat: %s
|
||||
|
||||
Please verify state and execute lifecycle action.
|
||||
`, role, action, time.Now().Format(time.RFC3339))
|
||||
`, role, action, time.Now().Format(time.RFC3339), polecatName)
|
||||
|
||||
// Send via bd mail (syntax: bd mail send <recipient> -s <subject> -m <body>)
|
||||
cmd := exec.Command("bd", "mail", "send", manager,
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/steveyegge/gastown/internal/config"
|
||||
@@ -124,27 +125,42 @@ func getWitnessManager(rigName string) (*witness.Manager, *rig.Rig, error) {
|
||||
func runWitnessStart(cmd *cobra.Command, args []string) error {
|
||||
rigName := args[0]
|
||||
|
||||
mgr, _, err := getWitnessManager(rigName)
|
||||
mgr, r, err := getWitnessManager(rigName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Printf("Starting witness for %s...\n", rigName)
|
||||
|
||||
if err := mgr.Start(witnessForeground); err != nil {
|
||||
if err == witness.ErrAlreadyRunning {
|
||||
fmt.Printf("%s Witness is already running\n", style.Dim.Render("⚠"))
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("starting witness: %w", err)
|
||||
}
|
||||
|
||||
if witnessForeground {
|
||||
// This will block until stopped
|
||||
// Foreground mode: run monitoring loop in current process (blocking)
|
||||
if err := mgr.Start(true); err != nil {
|
||||
if err == witness.ErrAlreadyRunning {
|
||||
fmt.Printf("%s Witness is already running\n", style.Dim.Render("⚠"))
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("starting witness: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Background mode: create tmux session with Claude
|
||||
created, err := ensureWitnessSession(rigName, r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !created {
|
||||
fmt.Printf("%s Witness session already running\n", style.Dim.Render("⚠"))
|
||||
fmt.Printf(" %s\n", style.Dim.Render("Use 'gt witness attach' to connect"))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update manager state to reflect running session
|
||||
_ = mgr.Start(false) // Mark as running in state file
|
||||
|
||||
fmt.Printf("%s Witness started for %s\n", style.Bold.Render("✓"), rigName)
|
||||
fmt.Printf(" %s\n", style.Dim.Render("Use 'gt witness attach' to connect"))
|
||||
fmt.Printf(" %s\n", style.Dim.Render("Use 'gt witness status' to check progress"))
|
||||
return nil
|
||||
}
|
||||
@@ -157,12 +173,26 @@ func runWitnessStop(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Kill tmux session if it exists
|
||||
t := tmux.NewTmux()
|
||||
sessionName := witnessSessionName(rigName)
|
||||
running, _ := t.HasSession(sessionName)
|
||||
if running {
|
||||
if err := t.KillSession(sessionName); err != nil {
|
||||
fmt.Printf("%s Warning: failed to kill session: %v\n", style.Dim.Render("⚠"), err)
|
||||
}
|
||||
}
|
||||
|
||||
// Update state file
|
||||
if err := mgr.Stop(); err != nil {
|
||||
if err == witness.ErrNotRunning {
|
||||
if err == witness.ErrNotRunning && !running {
|
||||
fmt.Printf("%s Witness is not running\n", style.Dim.Render("⚠"))
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("stopping witness: %w", err)
|
||||
// Even if manager.Stop fails, if we killed the session it's stopped
|
||||
if !running {
|
||||
return fmt.Errorf("stopping witness: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("%s Witness stopped for %s\n", style.Bold.Render("✓"), rigName)
|
||||
@@ -182,6 +212,18 @@ func runWitnessStatus(cmd *cobra.Command, args []string) error {
|
||||
return fmt.Errorf("getting status: %w", err)
|
||||
}
|
||||
|
||||
// Check actual tmux session state (more reliable than state file)
|
||||
t := tmux.NewTmux()
|
||||
sessionName := witnessSessionName(rigName)
|
||||
sessionRunning, _ := t.HasSession(sessionName)
|
||||
|
||||
// Reconcile state: tmux session is the source of truth for background mode
|
||||
if sessionRunning && w.State != witness.StateRunning {
|
||||
w.State = witness.StateRunning
|
||||
} else if !sessionRunning && w.State == witness.StateRunning {
|
||||
w.State = witness.StateStopped
|
||||
}
|
||||
|
||||
// JSON output
|
||||
if witnessStatusJSON {
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
@@ -202,6 +244,9 @@ func runWitnessStatus(cmd *cobra.Command, args []string) error {
|
||||
stateStr = style.Dim.Render("⏸ paused")
|
||||
}
|
||||
fmt.Printf(" State: %s\n", stateStr)
|
||||
if sessionRunning {
|
||||
fmt.Printf(" Session: %s\n", sessionName)
|
||||
}
|
||||
|
||||
if w.StartedAt != nil {
|
||||
fmt.Printf(" Started: %s\n", w.StartedAt.Format("2006-01-02 15:04:05"))
|
||||
@@ -236,6 +281,52 @@ func witnessSessionName(rigName string) string {
|
||||
return fmt.Sprintf("gt-witness-%s", rigName)
|
||||
}
|
||||
|
||||
// ensureWitnessSession creates a witness tmux session if it doesn't exist.
|
||||
// Returns true if a new session was created, false if it already existed.
|
||||
func ensureWitnessSession(rigName string, r *rig.Rig) (bool, error) {
|
||||
t := tmux.NewTmux()
|
||||
sessionName := witnessSessionName(rigName)
|
||||
|
||||
// Check if session already exists
|
||||
running, err := t.HasSession(sessionName)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("checking session: %w", err)
|
||||
}
|
||||
|
||||
if running {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Create new tmux session
|
||||
if err := t.NewSession(sessionName, r.Path); err != nil {
|
||||
return false, fmt.Errorf("creating session: %w", err)
|
||||
}
|
||||
|
||||
// Set environment
|
||||
t.SetEnvironment(sessionName, "GT_ROLE", "witness")
|
||||
t.SetEnvironment(sessionName, "GT_RIG", rigName)
|
||||
|
||||
// Apply Gas Town theming
|
||||
theme := tmux.AssignTheme(rigName)
|
||||
_ = t.ConfigureGasTownSession(sessionName, theme, rigName, "witness", "witness")
|
||||
|
||||
// Launch Claude in a respawn loop
|
||||
loopCmd := `while true; do echo "👁️ Starting Witness for ` + rigName + `..."; claude --dangerously-skip-permissions; echo ""; echo "Witness exited. Restarting in 2s... (Ctrl-C to stop)"; sleep 2; done`
|
||||
if err := t.SendKeysDelayed(sessionName, loopCmd, 200); err != nil {
|
||||
return false, fmt.Errorf("sending command: %w", err)
|
||||
}
|
||||
|
||||
// Wait briefly then send gt prime to initialize context
|
||||
// This runs after Claude starts up in the respawn loop
|
||||
time.Sleep(3 * time.Second)
|
||||
if err := t.SendKeys(sessionName, "gt prime"); err != nil {
|
||||
// Non-fatal - Claude will still work, just without auto-priming
|
||||
fmt.Printf("Warning: failed to send gt prime: %v\n", err)
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func runWitnessAttach(cmd *cobra.Command, args []string) error {
|
||||
rigName := args[0]
|
||||
|
||||
@@ -245,42 +336,16 @@ func runWitnessAttach(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
t := tmux.NewTmux()
|
||||
sessionName := witnessSessionName(rigName)
|
||||
|
||||
// Check if session exists
|
||||
running, err := t.HasSession(sessionName)
|
||||
// Ensure session exists (creates if needed)
|
||||
created, err := ensureWitnessSession(rigName, r)
|
||||
if err != nil {
|
||||
return fmt.Errorf("checking session: %w", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Witness working directory - use <rig>/witness/ for proper role detection
|
||||
witnessDir := filepath.Join(r.Path, "witness")
|
||||
if err := os.MkdirAll(witnessDir, 0755); err != nil {
|
||||
return fmt.Errorf("creating witness directory: %w", err)
|
||||
}
|
||||
|
||||
if !running {
|
||||
// Start witness session (like Mayor)
|
||||
fmt.Printf("Starting witness session for %s...\n", rigName)
|
||||
|
||||
if err := t.NewSession(sessionName, witnessDir); err != nil {
|
||||
return fmt.Errorf("creating session: %w", err)
|
||||
}
|
||||
|
||||
// Set environment
|
||||
t.SetEnvironment(sessionName, "GT_ROLE", "witness")
|
||||
t.SetEnvironment(sessionName, "GT_RIG", rigName)
|
||||
|
||||
// Apply theme (same as rig polecats)
|
||||
theme := tmux.AssignTheme(rigName)
|
||||
_ = t.ConfigureGasTownSession(sessionName, theme, rigName, "witness", "witness")
|
||||
|
||||
// Launch Claude in a respawn loop
|
||||
loopCmd := `while true; do echo "👁️ Starting Witness for ` + rigName + `..."; claude --dangerously-skip-permissions; echo ""; echo "Witness exited. Restarting in 2s... (Ctrl-C to stop)"; sleep 2; done`
|
||||
if err := t.SendKeysDelayed(sessionName, loopCmd, 200); err != nil {
|
||||
return fmt.Errorf("sending command: %w", err)
|
||||
}
|
||||
if created {
|
||||
fmt.Printf("Started witness session for %s\n", rigName)
|
||||
}
|
||||
|
||||
// Attach to the session
|
||||
|
||||
@@ -177,7 +177,7 @@ func (m *Manager) run(w *Witness) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkAndProcess performs health check and processes shutdown requests.
|
||||
// checkAndProcess performs health check, shutdown processing, and auto-spawn.
|
||||
func (m *Manager) checkAndProcess(w *Witness) {
|
||||
// Perform health check
|
||||
if err := m.healthCheck(w); err != nil {
|
||||
@@ -188,6 +188,13 @@ func (m *Manager) checkAndProcess(w *Witness) {
|
||||
if err := m.processShutdownRequests(w); err != nil {
|
||||
fmt.Printf("Shutdown request error: %v\n", err)
|
||||
}
|
||||
|
||||
// Auto-spawn for ready work (if enabled)
|
||||
if w.Config.AutoSpawn {
|
||||
if err := m.autoSpawnForReadyWork(w); err != nil {
|
||||
fmt.Printf("Auto-spawn error: %v\n", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// healthCheck performs a health check on all monitored polecats.
|
||||
@@ -197,9 +204,184 @@ func (m *Manager) healthCheck(w *Witness) error {
|
||||
w.Stats.TotalChecks++
|
||||
w.Stats.TodayChecks++
|
||||
|
||||
// List polecats
|
||||
polecatMgr := polecat.NewManager(m.rig, git.NewGit(m.rig.Path))
|
||||
polecats, err := polecatMgr.List()
|
||||
if err != nil {
|
||||
return fmt.Errorf("listing polecats: %w", err)
|
||||
}
|
||||
|
||||
t := tmux.NewTmux()
|
||||
sessMgr := session.NewManager(t, m.rig)
|
||||
|
||||
// Update monitored polecats list
|
||||
var active []string
|
||||
for _, p := range polecats {
|
||||
running, _ := sessMgr.IsRunning(p.Name)
|
||||
if running {
|
||||
active = append(active, p.Name)
|
||||
|
||||
// Check health of each active polecat
|
||||
status := m.checkPolecatHealth(p.Name, p.ClonePath)
|
||||
if status == PolecatStuck {
|
||||
m.handleStuckPolecat(w, p.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
w.MonitoredPolecats = active
|
||||
|
||||
return m.saveState(w)
|
||||
}
|
||||
|
||||
// PolecatHealthStatus represents the health status of a polecat.
|
||||
type PolecatHealthStatus int
|
||||
|
||||
const (
|
||||
// PolecatHealthy means the polecat is working normally.
|
||||
PolecatHealthy PolecatHealthStatus = iota
|
||||
// PolecatStuck means the polecat has no recent activity.
|
||||
PolecatStuck
|
||||
// PolecatDead means the polecat session is not responding.
|
||||
PolecatDead
|
||||
)
|
||||
|
||||
// StuckThresholdMinutes is the default time without activity before a polecat is considered stuck.
|
||||
const StuckThresholdMinutes = 30
|
||||
|
||||
// checkPolecatHealth checks if a polecat is healthy based on recent activity.
|
||||
func (m *Manager) checkPolecatHealth(name, path string) PolecatHealthStatus {
|
||||
threshold := time.Duration(StuckThresholdMinutes) * time.Minute
|
||||
|
||||
// Check 1: Git activity (most reliable indicator of work)
|
||||
gitPath := filepath.Join(path, ".git")
|
||||
if info, err := os.Stat(gitPath); err == nil {
|
||||
if time.Since(info.ModTime()) < threshold {
|
||||
return PolecatHealthy
|
||||
}
|
||||
}
|
||||
|
||||
// Check 2: State file activity
|
||||
stateFile := filepath.Join(path, ".gastown", "state.json")
|
||||
if info, err := os.Stat(stateFile); err == nil {
|
||||
if time.Since(info.ModTime()) < threshold {
|
||||
return PolecatHealthy
|
||||
}
|
||||
}
|
||||
|
||||
// Check 3: Any file modification in the polecat directory
|
||||
latestMod := m.getLatestModTime(path)
|
||||
if !latestMod.IsZero() && time.Since(latestMod) < threshold {
|
||||
return PolecatHealthy
|
||||
}
|
||||
|
||||
return PolecatStuck
|
||||
}
|
||||
|
||||
// getLatestModTime finds the most recent modification time in a directory.
|
||||
func (m *Manager) getLatestModTime(dir string) time.Time {
|
||||
var latest time.Time
|
||||
|
||||
// Quick check: just look at a few key locations
|
||||
locations := []string{
|
||||
filepath.Join(dir, ".git", "logs", "HEAD"),
|
||||
filepath.Join(dir, ".git", "index"),
|
||||
filepath.Join(dir, ".beads", "issues.jsonl"),
|
||||
}
|
||||
|
||||
for _, loc := range locations {
|
||||
if info, err := os.Stat(loc); err == nil {
|
||||
if info.ModTime().After(latest) {
|
||||
latest = info.ModTime()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return latest
|
||||
}
|
||||
|
||||
// handleStuckPolecat handles a polecat that appears to be stuck.
|
||||
func (m *Manager) handleStuckPolecat(w *Witness, polecatName string) {
|
||||
fmt.Printf("Polecat %s appears stuck (no activity for %d minutes)\n",
|
||||
polecatName, StuckThresholdMinutes)
|
||||
|
||||
// Check nudge history for this polecat
|
||||
nudgeCount := m.getNudgeCount(w, polecatName)
|
||||
|
||||
if nudgeCount == 0 {
|
||||
// First stuck detection: send a nudge
|
||||
fmt.Printf(" Sending nudge to %s...\n", polecatName)
|
||||
if err := m.sendNudge(polecatName, "No activity detected. Are you still working?"); err != nil {
|
||||
fmt.Printf(" Warning: failed to send nudge: %v\n", err)
|
||||
}
|
||||
m.recordNudge(w, polecatName)
|
||||
w.Stats.TotalNudges++
|
||||
w.Stats.TodayNudges++
|
||||
} else if nudgeCount == 1 {
|
||||
// Second stuck detection: escalate to Mayor
|
||||
fmt.Printf(" Escalating %s to Mayor (no response to nudge)...\n", polecatName)
|
||||
if err := m.escalateToMayor(polecatName); err != nil {
|
||||
fmt.Printf(" Warning: failed to escalate: %v\n", err)
|
||||
}
|
||||
w.Stats.TotalEscalations++
|
||||
m.recordNudge(w, polecatName)
|
||||
} else {
|
||||
// Third+ stuck detection: log but wait for human confirmation
|
||||
fmt.Printf(" %s still stuck (waiting for human intervention)\n", polecatName)
|
||||
}
|
||||
}
|
||||
|
||||
// getNudgeCount returns how many times a polecat has been nudged.
|
||||
func (m *Manager) getNudgeCount(w *Witness, polecatName string) int {
|
||||
// Count occurrences in SpawnedIssues that start with "nudge:" prefix
|
||||
// We reuse SpawnedIssues to track nudges with a "nudge:<name>" pattern
|
||||
count := 0
|
||||
nudgeKey := "nudge:" + polecatName
|
||||
for _, entry := range w.SpawnedIssues {
|
||||
if entry == nudgeKey {
|
||||
count++
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
// recordNudge records that a nudge was sent to a polecat.
|
||||
func (m *Manager) recordNudge(w *Witness, polecatName string) {
|
||||
nudgeKey := "nudge:" + polecatName
|
||||
w.SpawnedIssues = append(w.SpawnedIssues, nudgeKey)
|
||||
}
|
||||
|
||||
// escalateToMayor sends an escalation message to the Mayor.
|
||||
func (m *Manager) escalateToMayor(polecatName string) error {
|
||||
subject := fmt.Sprintf("ESCALATION: Polecat %s stuck", polecatName)
|
||||
body := fmt.Sprintf(`Polecat %s in rig %s appears stuck.
|
||||
|
||||
This polecat has been unresponsive for over %d minutes despite nudging.
|
||||
|
||||
Recommended actions:
|
||||
1. Check 'gt session attach %s/%s' to see current state
|
||||
2. If truly stuck, run 'gt session stop %s/%s' to kill the session
|
||||
3. Investigate root cause
|
||||
|
||||
Rig: %s
|
||||
Time: %s
|
||||
`, polecatName, m.rig.Name, StuckThresholdMinutes*2,
|
||||
m.rig.Name, polecatName,
|
||||
m.rig.Name, polecatName,
|
||||
m.rig.Name, time.Now().Format(time.RFC3339))
|
||||
|
||||
cmd := exec.Command("bd", "mail", "send", "mayor/",
|
||||
"-s", subject,
|
||||
"-m", body,
|
||||
)
|
||||
cmd.Dir = m.workDir
|
||||
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("%w: %s", err, string(out))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// processShutdownRequests checks mail for lifecycle requests and handles them.
|
||||
func (m *Manager) processShutdownRequests(w *Witness) error {
|
||||
// Get witness mailbox via bd mail inbox
|
||||
@@ -223,6 +405,19 @@ func (m *Manager) processShutdownRequests(w *Witness) error {
|
||||
|
||||
fmt.Printf(" Polecat: %s\n", polecatName)
|
||||
|
||||
// Verify polecat state before cleanup
|
||||
if err := m.verifyPolecatState(polecatName); err != nil {
|
||||
fmt.Printf(" Verification failed: %v\n", err)
|
||||
|
||||
// Send nudge to polecat
|
||||
if err := m.sendNudge(polecatName, err.Error()); err != nil {
|
||||
fmt.Printf(" Warning: failed to send nudge: %v\n", err)
|
||||
}
|
||||
|
||||
// Don't ack message - will retry on next check
|
||||
continue
|
||||
}
|
||||
|
||||
// Perform cleanup
|
||||
if err := m.cleanupPolecat(polecatName); err != nil {
|
||||
fmt.Printf(" Cleanup error: %v\n", err)
|
||||
@@ -240,6 +435,63 @@ func (m *Manager) processShutdownRequests(w *Witness) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// verifyPolecatState checks that a polecat is safe to clean up.
|
||||
func (m *Manager) verifyPolecatState(polecatName string) error {
|
||||
polecatPath := filepath.Join(m.rig.Path, "polecats", polecatName)
|
||||
|
||||
// Check if polecat directory exists
|
||||
if _, err := os.Stat(polecatPath); os.IsNotExist(err) {
|
||||
// Already cleaned up, that's fine
|
||||
return nil
|
||||
}
|
||||
|
||||
// 1. Check git status is clean
|
||||
polecatGit := git.NewGit(polecatPath)
|
||||
status, err := polecatGit.Status()
|
||||
if err != nil {
|
||||
return fmt.Errorf("checking git status: %w", err)
|
||||
}
|
||||
if !status.Clean {
|
||||
return fmt.Errorf("git working tree is not clean")
|
||||
}
|
||||
|
||||
// Note: beads changes would be reflected in git status above,
|
||||
// since beads files are tracked in git.
|
||||
|
||||
// Note: MR submission is now done automatically by polecat's handoff command,
|
||||
// so we don't need to verify it here - the polecat wouldn't have requested
|
||||
// shutdown if that step failed
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// sendNudge sends a message to a polecat asking it to fix its state.
|
||||
func (m *Manager) sendNudge(polecatName, reason string) error {
|
||||
subject := fmt.Sprintf("NUDGE: Cannot shutdown - %s", reason)
|
||||
body := fmt.Sprintf(`Your shutdown request was denied because: %s
|
||||
|
||||
Please fix the issue and run 'gt handoff' again.
|
||||
|
||||
Polecat: %s
|
||||
Rig: %s
|
||||
Time: %s
|
||||
`, reason, polecatName, m.rig.Name, time.Now().Format(time.RFC3339))
|
||||
|
||||
// Send via bd mail
|
||||
recipient := fmt.Sprintf("%s/%s", m.rig.Name, polecatName)
|
||||
cmd := exec.Command("bd", "mail", "send", recipient,
|
||||
"-s", subject,
|
||||
"-m", body,
|
||||
)
|
||||
cmd.Dir = m.workDir
|
||||
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("%w: %s", err, string(out))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// WitnessMessage represents a mail message for the witness.
|
||||
type WitnessMessage struct {
|
||||
ID string `json:"id"`
|
||||
@@ -356,3 +608,172 @@ func processExists(pid int) bool {
|
||||
err = proc.Signal(nil)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// ReadyIssue represents an issue from bd ready --json output.
|
||||
type ReadyIssue struct {
|
||||
ID string `json:"id"`
|
||||
Title string `json:"title"`
|
||||
Type string `json:"issue_type"`
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
// autoSpawnForReadyWork spawns polecats for ready work up to capacity.
|
||||
func (m *Manager) autoSpawnForReadyWork(w *Witness) error {
|
||||
// Get current active polecat count
|
||||
activeCount, err := m.getActivePolecatCount()
|
||||
if err != nil {
|
||||
return fmt.Errorf("counting polecats: %w", err)
|
||||
}
|
||||
|
||||
maxWorkers := w.Config.MaxWorkers
|
||||
if maxWorkers <= 0 {
|
||||
maxWorkers = 4 // Default
|
||||
}
|
||||
|
||||
if activeCount >= maxWorkers {
|
||||
// At capacity, nothing to do
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get ready issues
|
||||
issues, err := m.getReadyIssues()
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting ready issues: %w", err)
|
||||
}
|
||||
|
||||
// Filter issues (exclude merge-requests, epics, and already-spawned issues)
|
||||
var spawnableIssues []ReadyIssue
|
||||
for _, issue := range issues {
|
||||
// Skip merge-requests and epics
|
||||
if issue.Type == "merge-request" || issue.Type == "epic" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip if already spawned
|
||||
if m.isAlreadySpawned(w, issue.ID) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Filter by epic if configured
|
||||
if w.Config.EpicID != "" {
|
||||
// TODO: Check if issue is a child of the configured epic
|
||||
// For now, we skip this filter
|
||||
}
|
||||
|
||||
// Filter by prefix if configured
|
||||
if w.Config.IssuePrefix != "" {
|
||||
if !strings.HasPrefix(issue.ID, w.Config.IssuePrefix) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
spawnableIssues = append(spawnableIssues, issue)
|
||||
}
|
||||
|
||||
// Spawn up to capacity
|
||||
spawnDelay := w.Config.SpawnDelayMs
|
||||
if spawnDelay <= 0 {
|
||||
spawnDelay = 5000 // Default 5 seconds
|
||||
}
|
||||
|
||||
spawned := 0
|
||||
for _, issue := range spawnableIssues {
|
||||
if activeCount+spawned >= maxWorkers {
|
||||
break
|
||||
}
|
||||
|
||||
fmt.Printf("Auto-spawning for issue %s: %s\n", issue.ID, issue.Title)
|
||||
|
||||
if err := m.spawnPolecat(issue.ID); err != nil {
|
||||
fmt.Printf(" Spawn failed: %v\n", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Track that we spawned for this issue
|
||||
w.SpawnedIssues = append(w.SpawnedIssues, issue.ID)
|
||||
spawned++
|
||||
|
||||
// Delay between spawns
|
||||
if spawned < len(spawnableIssues) && activeCount+spawned < maxWorkers {
|
||||
time.Sleep(time.Duration(spawnDelay) * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
if spawned > 0 {
|
||||
// Save state to persist spawned issues list
|
||||
return m.saveState(w)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// getActivePolecatCount returns the number of polecats with active tmux sessions.
|
||||
func (m *Manager) getActivePolecatCount() (int, error) {
|
||||
polecatMgr := polecat.NewManager(m.rig, git.NewGit(m.rig.Path))
|
||||
polecats, err := polecatMgr.List()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
t := tmux.NewTmux()
|
||||
sessMgr := session.NewManager(t, m.rig)
|
||||
|
||||
count := 0
|
||||
for _, p := range polecats {
|
||||
running, _ := sessMgr.IsRunning(p.Name)
|
||||
if running {
|
||||
count++
|
||||
}
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
// getReadyIssues returns issues ready to work (no blockers).
|
||||
func (m *Manager) getReadyIssues() ([]ReadyIssue, error) {
|
||||
cmd := exec.Command("bd", "ready", "--json")
|
||||
cmd.Dir = m.workDir
|
||||
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
return nil, fmt.Errorf("%s", stderr.String())
|
||||
}
|
||||
|
||||
if stdout.Len() == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var issues []ReadyIssue
|
||||
if err := json.Unmarshal(stdout.Bytes(), &issues); err != nil {
|
||||
return nil, fmt.Errorf("parsing ready issues: %w", err)
|
||||
}
|
||||
|
||||
return issues, nil
|
||||
}
|
||||
|
||||
// isAlreadySpawned checks if an issue has already been spawned.
|
||||
func (m *Manager) isAlreadySpawned(w *Witness, issueID string) bool {
|
||||
for _, id := range w.SpawnedIssues {
|
||||
if id == issueID {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// spawnPolecat spawns a polecat for an issue using gt spawn.
|
||||
func (m *Manager) spawnPolecat(issueID string) error {
|
||||
cmd := exec.Command("gt", "spawn", "--rig", m.rig.Name, "--issue", issueID)
|
||||
cmd.Dir = m.workDir
|
||||
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s", strings.TrimSpace(string(output)))
|
||||
}
|
||||
|
||||
fmt.Printf(" Spawned: %s\n", strings.TrimSpace(string(output)))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -41,6 +41,30 @@ type Witness struct {
|
||||
|
||||
// Stats contains cumulative statistics.
|
||||
Stats WitnessStats `json:"stats"`
|
||||
|
||||
// Config contains auto-spawn configuration.
|
||||
Config WitnessConfig `json:"config"`
|
||||
|
||||
// SpawnedIssues tracks which issues have been spawned (to avoid duplicates).
|
||||
SpawnedIssues []string `json:"spawned_issues,omitempty"`
|
||||
}
|
||||
|
||||
// WitnessConfig contains configuration for the witness.
|
||||
type WitnessConfig struct {
|
||||
// MaxWorkers is the maximum number of concurrent polecats (default: 4).
|
||||
MaxWorkers int `json:"max_workers"`
|
||||
|
||||
// SpawnDelayMs is the delay between spawns in milliseconds (default: 5000).
|
||||
SpawnDelayMs int `json:"spawn_delay_ms"`
|
||||
|
||||
// AutoSpawn enables automatic spawning for ready issues (default: true).
|
||||
AutoSpawn bool `json:"auto_spawn"`
|
||||
|
||||
// EpicID limits spawning to children of this epic (optional).
|
||||
EpicID string `json:"epic_id,omitempty"`
|
||||
|
||||
// IssuePrefix limits spawning to issues with this prefix (optional).
|
||||
IssuePrefix string `json:"issue_prefix,omitempty"`
|
||||
}
|
||||
|
||||
// WitnessStats contains cumulative witness statistics.
|
||||
|
||||
Reference in New Issue
Block a user