refactor(witness,refinery): ZFC-compliant state management

Remove state files from witness and refinery managers, following
the "Discover, Don't Track" principle. Tmux session existence is
now the source of truth for running state (like deacon).

Changes:
- Add IsRunning() that checks tmux HasSession
- Change Status() to return *tmux.SessionInfo
- Remove loadState/saveState/stateManager
- Simplify Start()/Stop() to not use state files
- Update CLI commands (witness/refinery/rig) for new API
- Update tests to be ZFC-compliant

This fixes state file divergence issues where witness/refinery
could show "running" when the actual tmux session was dead.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gastown/crew/mel
2026-01-20 20:00:43 -08:00
committed by Steve Yegge
parent 126ec84bb3
commit 5218102f49
6 changed files with 237 additions and 513 deletions

View File

@@ -337,6 +337,14 @@ func runRefineryStop(cmd *cobra.Command, args []string) error {
return nil
}
// RefineryStatusOutput is the JSON output format for refinery status.
type RefineryStatusOutput struct {
Running bool `json:"running"`
RigName string `json:"rig_name"`
Session string `json:"session,omitempty"`
QueueLength int `json:"queue_length"`
}
func runRefineryStatus(cmd *cobra.Command, args []string) error {
rigName := ""
if len(args) > 0 {
@@ -348,58 +356,42 @@ func runRefineryStatus(cmd *cobra.Command, args []string) error {
return err
}
ref, err := mgr.Status()
if err != nil {
return fmt.Errorf("getting status: %w", err)
}
// ZFC: tmux is source of truth for running state
running, _ := mgr.IsRunning()
sessionInfo, _ := mgr.Status() // may be nil if not running
// Get queue from beads
queue, _ := mgr.Queue()
queueLen := len(queue)
// JSON output
if refineryStatusJSON {
output := RefineryStatusOutput{
Running: running,
RigName: rigName,
QueueLength: queueLen,
}
if sessionInfo != nil {
output.Session = sessionInfo.Name
}
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
return enc.Encode(ref)
return enc.Encode(output)
}
// Human-readable output
fmt.Printf("%s Refinery: %s\n\n", style.Bold.Render("⚙"), rigName)
stateStr := string(ref.State)
switch ref.State {
case refinery.StateRunning:
stateStr = style.Bold.Render("● running")
case refinery.StateStopped:
stateStr = style.Dim.Render("○ stopped")
case refinery.StatePaused:
stateStr = style.Dim.Render("⏸ paused")
}
fmt.Printf(" State: %s\n", stateStr)
if ref.StartedAt != nil {
fmt.Printf(" Started: %s\n", ref.StartedAt.Format("2006-01-02 15:04:05"))
}
if ref.CurrentMR != nil {
fmt.Printf("\n %s\n", style.Bold.Render("Currently Processing:"))
fmt.Printf(" Branch: %s\n", ref.CurrentMR.Branch)
fmt.Printf(" Worker: %s\n", ref.CurrentMR.Worker)
if ref.CurrentMR.IssueID != "" {
fmt.Printf(" Issue: %s\n", ref.CurrentMR.IssueID)
if running {
fmt.Printf(" State: %s\n", style.Bold.Render("● running"))
if sessionInfo != nil {
fmt.Printf(" Session: %s\n", sessionInfo.Name)
}
} else {
fmt.Printf(" State: %s\n", style.Dim.Render("○ stopped"))
}
// Get queue length
queue, _ := mgr.Queue()
pendingCount := 0
for _, item := range queue {
if item.Position > 0 { // Not currently processing
pendingCount++
}
}
fmt.Printf("\n Queue: %d pending\n", pendingCount)
if ref.LastMergeAt != nil {
fmt.Printf(" Last merge: %s\n", ref.LastMergeAt.Format("2006-01-02 15:04:05"))
}
fmt.Printf("\n Queue: %d pending\n", queueLen)
return nil
}

View File

@@ -977,8 +977,7 @@ func runRigShutdown(cmd *cobra.Command, args []string) error {
// 2. Stop the refinery
refMgr := refinery.NewManager(r)
refStatus, err := refMgr.Status()
if err == nil && refStatus.State == refinery.StateRunning {
if running, _ := refMgr.IsRunning(); running {
fmt.Printf(" Stopping refinery...\n")
if err := refMgr.Stop(); err != nil {
errors = append(errors, fmt.Sprintf("refinery: %v", err))
@@ -987,8 +986,7 @@ func runRigShutdown(cmd *cobra.Command, args []string) error {
// 3. Stop the witness
witMgr := witness.NewManager(r)
witStatus, err := witMgr.Status()
if err == nil && witStatus.State == witness.StateRunning {
if running, _ := witMgr.IsRunning(); running {
fmt.Printf(" Stopping witness...\n")
if err := witMgr.Stop(); err != nil {
errors = append(errors, fmt.Sprintf("witness: %v", err))
@@ -1077,14 +1075,9 @@ func runRigStatus(cmd *cobra.Command, args []string) error {
fmt.Printf("%s\n", style.Bold.Render("Witness"))
witnessSession := fmt.Sprintf("gt-%s-witness", rigName)
witnessRunning, _ := t.HasSession(witnessSession)
witMgr := witness.NewManager(r)
witStatus, _ := witMgr.Status()
_ = witness.NewManager(r) // silence unused warning, manager created for consistency
if witnessRunning {
fmt.Printf(" %s running", style.Success.Render("●"))
if witStatus != nil && witStatus.StartedAt != nil {
fmt.Printf(" (uptime: %s)", formatDuration(time.Since(*witStatus.StartedAt)))
}
fmt.Printf("\n")
fmt.Printf(" %s running\n", style.Success.Render("●"))
} else {
fmt.Printf(" %s stopped\n", style.Dim.Render("○"))
}
@@ -1092,16 +1085,10 @@ func runRigStatus(cmd *cobra.Command, args []string) error {
// Refinery status
fmt.Printf("%s\n", style.Bold.Render("Refinery"))
refinerySession := fmt.Sprintf("gt-%s-refinery", rigName)
refineryRunning, _ := t.HasSession(refinerySession)
refMgr := refinery.NewManager(r)
refStatus, _ := refMgr.Status()
refineryRunning, _ := refMgr.IsRunning()
if refineryRunning {
fmt.Printf(" %s running", style.Success.Render("●"))
if refStatus != nil && refStatus.StartedAt != nil {
fmt.Printf(" (uptime: %s)", formatDuration(time.Since(*refStatus.StartedAt)))
}
fmt.Printf("\n")
fmt.Printf(" %s running\n", style.Success.Render("●"))
// Show queue size
queue, err := refMgr.Queue()
if err == nil && len(queue) > 0 {
@@ -1254,8 +1241,7 @@ func runRigStop(cmd *cobra.Command, args []string) error {
// 2. Stop the refinery
refMgr := refinery.NewManager(r)
refStatus, err := refMgr.Status()
if err == nil && refStatus.State == refinery.StateRunning {
if running, _ := refMgr.IsRunning(); running {
fmt.Printf(" Stopping refinery...\n")
if err := refMgr.Stop(); err != nil {
errors = append(errors, fmt.Sprintf("refinery: %v", err))
@@ -1264,8 +1250,7 @@ func runRigStop(cmd *cobra.Command, args []string) error {
// 3. Stop the witness
witMgr := witness.NewManager(r)
witStatus, err := witMgr.Status()
if err == nil && witStatus.State == witness.StateRunning {
if running, _ := witMgr.IsRunning(); running {
fmt.Printf(" Stopping witness...\n")
if err := witMgr.Stop(); err != nil {
errors = append(errors, fmt.Sprintf("witness: %v", err))
@@ -1387,8 +1372,7 @@ func runRigRestart(cmd *cobra.Command, args []string) error {
// 2. Stop the refinery
refMgr := refinery.NewManager(r)
refStatus, err := refMgr.Status()
if err == nil && refStatus.State == refinery.StateRunning {
if running, _ := refMgr.IsRunning(); running {
fmt.Printf(" Stopping refinery...\n")
if err := refMgr.Stop(); err != nil {
stopErrors = append(stopErrors, fmt.Sprintf("refinery: %v", err))
@@ -1397,8 +1381,7 @@ func runRigRestart(cmd *cobra.Command, args []string) error {
// 3. Stop the witness
witMgr := witness.NewManager(r)
witStatus, err := witMgr.Status()
if err == nil && witStatus.State == witness.StateRunning {
if running, _ := witMgr.IsRunning(); running {
fmt.Printf(" Stopping witness...\n")
if err := witMgr.Stop(); err != nil {
stopErrors = append(stopErrors, fmt.Sprintf("witness: %v", err))

View File

@@ -218,65 +218,65 @@ func runWitnessStop(cmd *cobra.Command, args []string) error {
return nil
}
// WitnessStatusOutput is the JSON output format for witness status.
type WitnessStatusOutput struct {
Running bool `json:"running"`
RigName string `json:"rig_name"`
Session string `json:"session,omitempty"`
MonitoredPolecats []string `json:"monitored_polecats,omitempty"`
}
func runWitnessStatus(cmd *cobra.Command, args []string) error {
rigName := args[0]
mgr, err := getWitnessManager(rigName)
// Get rig for polecat info
_, r, err := getRig(rigName)
if err != nil {
return err
}
w, err := mgr.Status()
if err != nil {
return fmt.Errorf("getting status: %w", err)
}
mgr := witness.NewManager(r)
// Check actual tmux session state (more reliable than state file)
t := tmux.NewTmux()
sessionName := witnessSessionName(rigName)
sessionRunning, _ := t.HasSession(sessionName)
// ZFC: tmux is source of truth for running state
running, _ := mgr.IsRunning()
sessionInfo, _ := mgr.Status() // may be nil if not running
// Reconcile state: tmux session is the source of truth for background mode
if sessionRunning && w.State != witness.StateRunning {
w.State = witness.StateRunning
} else if !sessionRunning && w.State == witness.StateRunning {
w.State = witness.StateStopped
}
// Polecats come from rig config, not state file
polecats := r.Polecats
// JSON output
if witnessStatusJSON {
output := WitnessStatusOutput{
Running: running,
RigName: rigName,
MonitoredPolecats: polecats,
}
if sessionInfo != nil {
output.Session = sessionInfo.Name
}
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
return enc.Encode(w)
return enc.Encode(output)
}
// Human-readable output
fmt.Printf("%s Witness: %s\n\n", style.Bold.Render(AgentTypeIcons[AgentWitness]), rigName)
stateStr := string(w.State)
switch w.State {
case witness.StateRunning:
stateStr = style.Bold.Render("● running")
case witness.StateStopped:
stateStr = style.Dim.Render("○ stopped")
case witness.StatePaused:
stateStr = style.Dim.Render("⏸ paused")
}
fmt.Printf(" State: %s\n", stateStr)
if sessionRunning {
fmt.Printf(" Session: %s\n", sessionName)
}
if w.StartedAt != nil {
fmt.Printf(" Started: %s\n", w.StartedAt.Format("2006-01-02 15:04:05"))
if running {
fmt.Printf(" State: %s\n", style.Bold.Render("● running"))
if sessionInfo != nil {
fmt.Printf(" Session: %s\n", sessionInfo.Name)
}
} else {
fmt.Printf(" State: %s\n", style.Dim.Render("○ stopped"))
}
// Show monitored polecats
fmt.Printf("\n %s\n", style.Bold.Render("Monitored Polecats:"))
if len(w.MonitoredPolecats) == 0 {
if len(polecats) == 0 {
fmt.Printf(" %s\n", style.Dim.Render("(none)"))
} else {
for _, p := range w.MonitoredPolecats {
for _, p := range polecats {
fmt.Printf(" • %s\n", p)
}
}