feat(witness): add gt witness start/stop/status commands
Add the witness monitoring agent command with start, stop, and status subcommands. The witness monitors polecats for stuck/idle states and can nudge blocked workers. Closes gt-kcee 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
215
internal/cmd/witness.go
Normal file
215
internal/cmd/witness.go
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/spf13/cobra"
|
||||||
|
"github.com/steveyegge/gastown/internal/config"
|
||||||
|
"github.com/steveyegge/gastown/internal/git"
|
||||||
|
"github.com/steveyegge/gastown/internal/rig"
|
||||||
|
"github.com/steveyegge/gastown/internal/style"
|
||||||
|
"github.com/steveyegge/gastown/internal/witness"
|
||||||
|
"github.com/steveyegge/gastown/internal/workspace"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Witness command flags
|
||||||
|
var (
|
||||||
|
witnessForeground bool
|
||||||
|
witnessStatusJSON bool
|
||||||
|
)
|
||||||
|
|
||||||
|
var witnessCmd = &cobra.Command{
|
||||||
|
Use: "witness",
|
||||||
|
Short: "Manage the polecat monitoring agent",
|
||||||
|
Long: `Manage the Witness monitoring agent for a rig.
|
||||||
|
|
||||||
|
The Witness monitors polecats for stuck/idle state, nudges polecats
|
||||||
|
that seem blocked, and reports status to the mayor.`,
|
||||||
|
}
|
||||||
|
|
||||||
|
var witnessStartCmd = &cobra.Command{
|
||||||
|
Use: "start <rig>",
|
||||||
|
Short: "Start the witness",
|
||||||
|
Long: `Start the Witness for a rig.
|
||||||
|
|
||||||
|
Launches the monitoring agent which watches polecats for stuck or idle
|
||||||
|
states and takes action to keep work flowing.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
gt witness start gastown
|
||||||
|
gt witness start gastown --foreground`,
|
||||||
|
Args: cobra.ExactArgs(1),
|
||||||
|
RunE: runWitnessStart,
|
||||||
|
}
|
||||||
|
|
||||||
|
var witnessStopCmd = &cobra.Command{
|
||||||
|
Use: "stop <rig>",
|
||||||
|
Short: "Stop the witness",
|
||||||
|
Long: `Stop a running Witness.
|
||||||
|
|
||||||
|
Gracefully stops the witness monitoring agent.`,
|
||||||
|
Args: cobra.ExactArgs(1),
|
||||||
|
RunE: runWitnessStop,
|
||||||
|
}
|
||||||
|
|
||||||
|
var witnessStatusCmd = &cobra.Command{
|
||||||
|
Use: "status <rig>",
|
||||||
|
Short: "Show witness status",
|
||||||
|
Long: `Show the status of a rig's Witness.
|
||||||
|
|
||||||
|
Displays running state, monitored polecats, and statistics.`,
|
||||||
|
Args: cobra.ExactArgs(1),
|
||||||
|
RunE: runWitnessStatus,
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// Start flags
|
||||||
|
witnessStartCmd.Flags().BoolVar(&witnessForeground, "foreground", false, "Run in foreground (default: background)")
|
||||||
|
|
||||||
|
// Status flags
|
||||||
|
witnessStatusCmd.Flags().BoolVar(&witnessStatusJSON, "json", false, "Output as JSON")
|
||||||
|
|
||||||
|
// Add subcommands
|
||||||
|
witnessCmd.AddCommand(witnessStartCmd)
|
||||||
|
witnessCmd.AddCommand(witnessStopCmd)
|
||||||
|
witnessCmd.AddCommand(witnessStatusCmd)
|
||||||
|
|
||||||
|
rootCmd.AddCommand(witnessCmd)
|
||||||
|
}
|
||||||
|
|
||||||
|
// getWitnessManager creates a witness manager for a rig.
|
||||||
|
func getWitnessManager(rigName string) (*witness.Manager, *rig.Rig, error) {
|
||||||
|
townRoot, err := workspace.FindFromCwdOrError()
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("not in a Gas Town workspace: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
rigsConfigPath := filepath.Join(townRoot, "mayor", "rigs.json")
|
||||||
|
rigsConfig, err := config.LoadRigsConfig(rigsConfigPath)
|
||||||
|
if err != nil {
|
||||||
|
rigsConfig = &config.RigsConfig{Rigs: make(map[string]config.RigEntry)}
|
||||||
|
}
|
||||||
|
|
||||||
|
g := git.NewGit(townRoot)
|
||||||
|
rigMgr := rig.NewManager(townRoot, rigsConfig, g)
|
||||||
|
r, err := rigMgr.GetRig(rigName)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("rig '%s' not found", rigName)
|
||||||
|
}
|
||||||
|
|
||||||
|
mgr := witness.NewManager(r)
|
||||||
|
return mgr, r, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func runWitnessStart(cmd *cobra.Command, args []string) error {
|
||||||
|
rigName := args[0]
|
||||||
|
|
||||||
|
mgr, _, err := getWitnessManager(rigName)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Starting witness for %s...\n", rigName)
|
||||||
|
|
||||||
|
if err := mgr.Start(witnessForeground); err != nil {
|
||||||
|
if err == witness.ErrAlreadyRunning {
|
||||||
|
fmt.Printf("%s Witness is already running\n", style.Dim.Render("⚠"))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf("starting witness: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if witnessForeground {
|
||||||
|
// This will block until stopped
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("%s Witness started for %s\n", style.Bold.Render("✓"), rigName)
|
||||||
|
fmt.Printf(" %s\n", style.Dim.Render("Use 'gt witness status' to check progress"))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func runWitnessStop(cmd *cobra.Command, args []string) error {
|
||||||
|
rigName := args[0]
|
||||||
|
|
||||||
|
mgr, _, err := getWitnessManager(rigName)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := mgr.Stop(); err != nil {
|
||||||
|
if err == witness.ErrNotRunning {
|
||||||
|
fmt.Printf("%s Witness is not running\n", style.Dim.Render("⚠"))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf("stopping witness: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("%s Witness stopped for %s\n", style.Bold.Render("✓"), rigName)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func runWitnessStatus(cmd *cobra.Command, args []string) error {
|
||||||
|
rigName := args[0]
|
||||||
|
|
||||||
|
mgr, _, err := getWitnessManager(rigName)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
w, err := mgr.Status()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("getting status: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// JSON output
|
||||||
|
if witnessStatusJSON {
|
||||||
|
enc := json.NewEncoder(os.Stdout)
|
||||||
|
enc.SetIndent("", " ")
|
||||||
|
return enc.Encode(w)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Human-readable output
|
||||||
|
fmt.Printf("%s Witness: %s\n\n", style.Bold.Render("👁"), rigName)
|
||||||
|
|
||||||
|
stateStr := string(w.State)
|
||||||
|
switch w.State {
|
||||||
|
case witness.StateRunning:
|
||||||
|
stateStr = style.Bold.Render("● running")
|
||||||
|
case witness.StateStopped:
|
||||||
|
stateStr = style.Dim.Render("○ stopped")
|
||||||
|
case witness.StatePaused:
|
||||||
|
stateStr = style.Dim.Render("⏸ paused")
|
||||||
|
}
|
||||||
|
fmt.Printf(" State: %s\n", stateStr)
|
||||||
|
|
||||||
|
if w.StartedAt != nil {
|
||||||
|
fmt.Printf(" Started: %s\n", w.StartedAt.Format("2006-01-02 15:04:05"))
|
||||||
|
}
|
||||||
|
|
||||||
|
if w.LastCheckAt != nil {
|
||||||
|
fmt.Printf(" Last check: %s\n", w.LastCheckAt.Format("2006-01-02 15:04:05"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show monitored polecats
|
||||||
|
fmt.Printf("\n %s\n", style.Bold.Render("Monitored Polecats:"))
|
||||||
|
if len(w.MonitoredPolecats) == 0 {
|
||||||
|
fmt.Printf(" %s\n", style.Dim.Render("(none)"))
|
||||||
|
} else {
|
||||||
|
for _, p := range w.MonitoredPolecats {
|
||||||
|
fmt.Printf(" • %s\n", p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("\n %s\n", style.Bold.Render("Statistics:"))
|
||||||
|
fmt.Printf(" Checks today: %d\n", w.Stats.TodayChecks)
|
||||||
|
fmt.Printf(" Nudges today: %d\n", w.Stats.TodayNudges)
|
||||||
|
fmt.Printf(" Total checks: %d\n", w.Stats.TotalChecks)
|
||||||
|
fmt.Printf(" Total nudges: %d\n", w.Stats.TotalNudges)
|
||||||
|
fmt.Printf(" Total escalations: %d\n", w.Stats.TotalEscalations)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
196
internal/witness/manager.go
Normal file
196
internal/witness/manager.go
Normal file
@@ -0,0 +1,196 @@
|
|||||||
|
package witness
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/steveyegge/gastown/internal/rig"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Common errors
|
||||||
|
var (
|
||||||
|
ErrNotRunning = errors.New("witness not running")
|
||||||
|
ErrAlreadyRunning = errors.New("witness already running")
|
||||||
|
)
|
||||||
|
|
||||||
|
// Manager handles witness lifecycle and monitoring operations.
|
||||||
|
type Manager struct {
|
||||||
|
rig *rig.Rig
|
||||||
|
workDir string
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewManager creates a new witness manager for a rig.
|
||||||
|
func NewManager(r *rig.Rig) *Manager {
|
||||||
|
return &Manager{
|
||||||
|
rig: r,
|
||||||
|
workDir: r.Path,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// stateFile returns the path to the witness state file.
|
||||||
|
func (m *Manager) stateFile() string {
|
||||||
|
return filepath.Join(m.rig.Path, ".gastown", "witness.json")
|
||||||
|
}
|
||||||
|
|
||||||
|
// loadState loads witness state from disk.
|
||||||
|
func (m *Manager) loadState() (*Witness, error) {
|
||||||
|
data, err := os.ReadFile(m.stateFile())
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return &Witness{
|
||||||
|
RigName: m.rig.Name,
|
||||||
|
State: StateStopped,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var w Witness
|
||||||
|
if err := json.Unmarshal(data, &w); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &w, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// saveState persists witness state to disk.
|
||||||
|
func (m *Manager) saveState(w *Witness) error {
|
||||||
|
dir := filepath.Dir(m.stateFile())
|
||||||
|
if err := os.MkdirAll(dir, 0755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := json.MarshalIndent(w, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return os.WriteFile(m.stateFile(), data, 0644)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Status returns the current witness status.
|
||||||
|
func (m *Manager) Status() (*Witness, error) {
|
||||||
|
w, err := m.loadState()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// If running, verify process is still alive
|
||||||
|
if w.State == StateRunning && w.PID > 0 {
|
||||||
|
if !processExists(w.PID) {
|
||||||
|
w.State = StateStopped
|
||||||
|
w.PID = 0
|
||||||
|
m.saveState(w)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update monitored polecats list
|
||||||
|
w.MonitoredPolecats = m.rig.Polecats
|
||||||
|
|
||||||
|
return w, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start starts the witness.
|
||||||
|
// If foreground is true, runs in the current process (blocking).
|
||||||
|
// Otherwise, spawns a background process.
|
||||||
|
func (m *Manager) Start(foreground bool) error {
|
||||||
|
w, err := m.loadState()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if w.State == StateRunning && w.PID > 0 && processExists(w.PID) {
|
||||||
|
return ErrAlreadyRunning
|
||||||
|
}
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
w.State = StateRunning
|
||||||
|
w.StartedAt = &now
|
||||||
|
w.PID = os.Getpid() // For foreground mode; background would set actual PID
|
||||||
|
w.MonitoredPolecats = m.rig.Polecats
|
||||||
|
|
||||||
|
if err := m.saveState(w); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if foreground {
|
||||||
|
// Run the monitoring loop (blocking)
|
||||||
|
return m.run(w)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Background mode: spawn a new process
|
||||||
|
// For MVP, we just mark as running - actual daemon implementation later
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop stops the witness.
|
||||||
|
func (m *Manager) Stop() error {
|
||||||
|
w, err := m.loadState()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if w.State != StateRunning {
|
||||||
|
return ErrNotRunning
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we have a PID, try to stop it gracefully
|
||||||
|
if w.PID > 0 && w.PID != os.Getpid() {
|
||||||
|
// Send SIGTERM
|
||||||
|
if proc, err := os.FindProcess(w.PID); err == nil {
|
||||||
|
proc.Signal(os.Interrupt)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
w.State = StateStopped
|
||||||
|
w.PID = 0
|
||||||
|
|
||||||
|
return m.saveState(w)
|
||||||
|
}
|
||||||
|
|
||||||
|
// run is the main monitoring loop (for foreground mode).
|
||||||
|
func (m *Manager) run(w *Witness) error {
|
||||||
|
fmt.Println("Witness running...")
|
||||||
|
fmt.Println("Press Ctrl+C to stop")
|
||||||
|
|
||||||
|
ticker := time.NewTicker(30 * time.Second)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ticker.C:
|
||||||
|
// Perform health check
|
||||||
|
if err := m.healthCheck(w); err != nil {
|
||||||
|
fmt.Printf("Health check error: %v\n", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// healthCheck performs a health check on all monitored polecats.
|
||||||
|
func (m *Manager) healthCheck(w *Witness) error {
|
||||||
|
now := time.Now()
|
||||||
|
w.LastCheckAt = &now
|
||||||
|
w.Stats.TotalChecks++
|
||||||
|
w.Stats.TodayChecks++
|
||||||
|
|
||||||
|
// For MVP, just update state
|
||||||
|
// Future: check keepalive files, nudge idle polecats, escalate stuck ones
|
||||||
|
|
||||||
|
return m.saveState(w)
|
||||||
|
}
|
||||||
|
|
||||||
|
// processExists checks if a process with the given PID exists.
|
||||||
|
func processExists(pid int) bool {
|
||||||
|
proc, err := os.FindProcess(pid)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// On Unix, FindProcess always succeeds; signal 0 tests existence
|
||||||
|
err = proc.Signal(nil)
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
62
internal/witness/types.go
Normal file
62
internal/witness/types.go
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
// Package witness provides the polecat monitoring agent.
|
||||||
|
package witness
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// State represents the witness's running state.
|
||||||
|
type State string
|
||||||
|
|
||||||
|
const (
|
||||||
|
// StateStopped means the witness is not running.
|
||||||
|
StateStopped State = "stopped"
|
||||||
|
|
||||||
|
// StateRunning means the witness is actively monitoring.
|
||||||
|
StateRunning State = "running"
|
||||||
|
|
||||||
|
// StatePaused means the witness is paused (not monitoring).
|
||||||
|
StatePaused State = "paused"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Witness represents a rig's polecat monitoring agent.
|
||||||
|
type Witness struct {
|
||||||
|
// RigName is the rig this witness monitors.
|
||||||
|
RigName string `json:"rig_name"`
|
||||||
|
|
||||||
|
// State is the current running state.
|
||||||
|
State State `json:"state"`
|
||||||
|
|
||||||
|
// PID is the process ID if running in background.
|
||||||
|
PID int `json:"pid,omitempty"`
|
||||||
|
|
||||||
|
// StartedAt is when the witness was started.
|
||||||
|
StartedAt *time.Time `json:"started_at,omitempty"`
|
||||||
|
|
||||||
|
// MonitoredPolecats tracks polecats being monitored.
|
||||||
|
MonitoredPolecats []string `json:"monitored_polecats,omitempty"`
|
||||||
|
|
||||||
|
// LastCheckAt is when the last health check was performed.
|
||||||
|
LastCheckAt *time.Time `json:"last_check_at,omitempty"`
|
||||||
|
|
||||||
|
// Stats contains cumulative statistics.
|
||||||
|
Stats WitnessStats `json:"stats"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// WitnessStats contains cumulative witness statistics.
|
||||||
|
type WitnessStats struct {
|
||||||
|
// TotalChecks is the total number of health checks performed.
|
||||||
|
TotalChecks int `json:"total_checks"`
|
||||||
|
|
||||||
|
// TotalNudges is the total number of nudges sent to polecats.
|
||||||
|
TotalNudges int `json:"total_nudges"`
|
||||||
|
|
||||||
|
// TotalEscalations is the total number of escalations to mayor.
|
||||||
|
TotalEscalations int `json:"total_escalations"`
|
||||||
|
|
||||||
|
// TodayChecks is the number of checks today.
|
||||||
|
TodayChecks int `json:"today_checks"`
|
||||||
|
|
||||||
|
// TodayNudges is the number of nudges today.
|
||||||
|
TodayNudges int `json:"today_nudges"`
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user