refactor(witness,refinery): ZFC-compliant state management
Remove state files from witness and refinery managers, following the "Discover, Don't Track" principle. Tmux session existence is now the source of truth for running state (like deacon). Changes: - Add IsRunning() that checks tmux HasSession - Change Status() to return *tmux.SessionInfo - Remove loadState/saveState/stateManager - Simplify Start()/Stop() to not use state files - Update CLI commands (witness/refinery/rig) for new API - Update tests to be ZFC-compliant This fixes state file divergence issues where witness/refinery could show "running" when the actual tmux session was dead. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
committed by
Steve Yegge
parent
126ec84bb3
commit
5218102f49
@@ -337,6 +337,14 @@ func runRefineryStop(cmd *cobra.Command, args []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// RefineryStatusOutput is the JSON output format for refinery status.
|
||||
type RefineryStatusOutput struct {
|
||||
Running bool `json:"running"`
|
||||
RigName string `json:"rig_name"`
|
||||
Session string `json:"session,omitempty"`
|
||||
QueueLength int `json:"queue_length"`
|
||||
}
|
||||
|
||||
func runRefineryStatus(cmd *cobra.Command, args []string) error {
|
||||
rigName := ""
|
||||
if len(args) > 0 {
|
||||
@@ -348,58 +356,42 @@ func runRefineryStatus(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
ref, err := mgr.Status()
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting status: %w", err)
|
||||
}
|
||||
// ZFC: tmux is source of truth for running state
|
||||
running, _ := mgr.IsRunning()
|
||||
sessionInfo, _ := mgr.Status() // may be nil if not running
|
||||
|
||||
// Get queue from beads
|
||||
queue, _ := mgr.Queue()
|
||||
queueLen := len(queue)
|
||||
|
||||
// JSON output
|
||||
if refineryStatusJSON {
|
||||
output := RefineryStatusOutput{
|
||||
Running: running,
|
||||
RigName: rigName,
|
||||
QueueLength: queueLen,
|
||||
}
|
||||
if sessionInfo != nil {
|
||||
output.Session = sessionInfo.Name
|
||||
}
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
return enc.Encode(ref)
|
||||
return enc.Encode(output)
|
||||
}
|
||||
|
||||
// Human-readable output
|
||||
fmt.Printf("%s Refinery: %s\n\n", style.Bold.Render("⚙"), rigName)
|
||||
|
||||
stateStr := string(ref.State)
|
||||
switch ref.State {
|
||||
case refinery.StateRunning:
|
||||
stateStr = style.Bold.Render("● running")
|
||||
case refinery.StateStopped:
|
||||
stateStr = style.Dim.Render("○ stopped")
|
||||
case refinery.StatePaused:
|
||||
stateStr = style.Dim.Render("⏸ paused")
|
||||
}
|
||||
fmt.Printf(" State: %s\n", stateStr)
|
||||
|
||||
if ref.StartedAt != nil {
|
||||
fmt.Printf(" Started: %s\n", ref.StartedAt.Format("2006-01-02 15:04:05"))
|
||||
}
|
||||
|
||||
if ref.CurrentMR != nil {
|
||||
fmt.Printf("\n %s\n", style.Bold.Render("Currently Processing:"))
|
||||
fmt.Printf(" Branch: %s\n", ref.CurrentMR.Branch)
|
||||
fmt.Printf(" Worker: %s\n", ref.CurrentMR.Worker)
|
||||
if ref.CurrentMR.IssueID != "" {
|
||||
fmt.Printf(" Issue: %s\n", ref.CurrentMR.IssueID)
|
||||
if running {
|
||||
fmt.Printf(" State: %s\n", style.Bold.Render("● running"))
|
||||
if sessionInfo != nil {
|
||||
fmt.Printf(" Session: %s\n", sessionInfo.Name)
|
||||
}
|
||||
} else {
|
||||
fmt.Printf(" State: %s\n", style.Dim.Render("○ stopped"))
|
||||
}
|
||||
|
||||
// Get queue length
|
||||
queue, _ := mgr.Queue()
|
||||
pendingCount := 0
|
||||
for _, item := range queue {
|
||||
if item.Position > 0 { // Not currently processing
|
||||
pendingCount++
|
||||
}
|
||||
}
|
||||
fmt.Printf("\n Queue: %d pending\n", pendingCount)
|
||||
|
||||
if ref.LastMergeAt != nil {
|
||||
fmt.Printf(" Last merge: %s\n", ref.LastMergeAt.Format("2006-01-02 15:04:05"))
|
||||
}
|
||||
fmt.Printf("\n Queue: %d pending\n", queueLen)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -977,8 +977,7 @@ func runRigShutdown(cmd *cobra.Command, args []string) error {
|
||||
|
||||
// 2. Stop the refinery
|
||||
refMgr := refinery.NewManager(r)
|
||||
refStatus, err := refMgr.Status()
|
||||
if err == nil && refStatus.State == refinery.StateRunning {
|
||||
if running, _ := refMgr.IsRunning(); running {
|
||||
fmt.Printf(" Stopping refinery...\n")
|
||||
if err := refMgr.Stop(); err != nil {
|
||||
errors = append(errors, fmt.Sprintf("refinery: %v", err))
|
||||
@@ -987,8 +986,7 @@ func runRigShutdown(cmd *cobra.Command, args []string) error {
|
||||
|
||||
// 3. Stop the witness
|
||||
witMgr := witness.NewManager(r)
|
||||
witStatus, err := witMgr.Status()
|
||||
if err == nil && witStatus.State == witness.StateRunning {
|
||||
if running, _ := witMgr.IsRunning(); running {
|
||||
fmt.Printf(" Stopping witness...\n")
|
||||
if err := witMgr.Stop(); err != nil {
|
||||
errors = append(errors, fmt.Sprintf("witness: %v", err))
|
||||
@@ -1077,14 +1075,9 @@ func runRigStatus(cmd *cobra.Command, args []string) error {
|
||||
fmt.Printf("%s\n", style.Bold.Render("Witness"))
|
||||
witnessSession := fmt.Sprintf("gt-%s-witness", rigName)
|
||||
witnessRunning, _ := t.HasSession(witnessSession)
|
||||
witMgr := witness.NewManager(r)
|
||||
witStatus, _ := witMgr.Status()
|
||||
_ = witness.NewManager(r) // silence unused warning, manager created for consistency
|
||||
if witnessRunning {
|
||||
fmt.Printf(" %s running", style.Success.Render("●"))
|
||||
if witStatus != nil && witStatus.StartedAt != nil {
|
||||
fmt.Printf(" (uptime: %s)", formatDuration(time.Since(*witStatus.StartedAt)))
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
fmt.Printf(" %s running\n", style.Success.Render("●"))
|
||||
} else {
|
||||
fmt.Printf(" %s stopped\n", style.Dim.Render("○"))
|
||||
}
|
||||
@@ -1092,16 +1085,10 @@ func runRigStatus(cmd *cobra.Command, args []string) error {
|
||||
|
||||
// Refinery status
|
||||
fmt.Printf("%s\n", style.Bold.Render("Refinery"))
|
||||
refinerySession := fmt.Sprintf("gt-%s-refinery", rigName)
|
||||
refineryRunning, _ := t.HasSession(refinerySession)
|
||||
refMgr := refinery.NewManager(r)
|
||||
refStatus, _ := refMgr.Status()
|
||||
refineryRunning, _ := refMgr.IsRunning()
|
||||
if refineryRunning {
|
||||
fmt.Printf(" %s running", style.Success.Render("●"))
|
||||
if refStatus != nil && refStatus.StartedAt != nil {
|
||||
fmt.Printf(" (uptime: %s)", formatDuration(time.Since(*refStatus.StartedAt)))
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
fmt.Printf(" %s running\n", style.Success.Render("●"))
|
||||
// Show queue size
|
||||
queue, err := refMgr.Queue()
|
||||
if err == nil && len(queue) > 0 {
|
||||
@@ -1254,8 +1241,7 @@ func runRigStop(cmd *cobra.Command, args []string) error {
|
||||
|
||||
// 2. Stop the refinery
|
||||
refMgr := refinery.NewManager(r)
|
||||
refStatus, err := refMgr.Status()
|
||||
if err == nil && refStatus.State == refinery.StateRunning {
|
||||
if running, _ := refMgr.IsRunning(); running {
|
||||
fmt.Printf(" Stopping refinery...\n")
|
||||
if err := refMgr.Stop(); err != nil {
|
||||
errors = append(errors, fmt.Sprintf("refinery: %v", err))
|
||||
@@ -1264,8 +1250,7 @@ func runRigStop(cmd *cobra.Command, args []string) error {
|
||||
|
||||
// 3. Stop the witness
|
||||
witMgr := witness.NewManager(r)
|
||||
witStatus, err := witMgr.Status()
|
||||
if err == nil && witStatus.State == witness.StateRunning {
|
||||
if running, _ := witMgr.IsRunning(); running {
|
||||
fmt.Printf(" Stopping witness...\n")
|
||||
if err := witMgr.Stop(); err != nil {
|
||||
errors = append(errors, fmt.Sprintf("witness: %v", err))
|
||||
@@ -1387,8 +1372,7 @@ func runRigRestart(cmd *cobra.Command, args []string) error {
|
||||
|
||||
// 2. Stop the refinery
|
||||
refMgr := refinery.NewManager(r)
|
||||
refStatus, err := refMgr.Status()
|
||||
if err == nil && refStatus.State == refinery.StateRunning {
|
||||
if running, _ := refMgr.IsRunning(); running {
|
||||
fmt.Printf(" Stopping refinery...\n")
|
||||
if err := refMgr.Stop(); err != nil {
|
||||
stopErrors = append(stopErrors, fmt.Sprintf("refinery: %v", err))
|
||||
@@ -1397,8 +1381,7 @@ func runRigRestart(cmd *cobra.Command, args []string) error {
|
||||
|
||||
// 3. Stop the witness
|
||||
witMgr := witness.NewManager(r)
|
||||
witStatus, err := witMgr.Status()
|
||||
if err == nil && witStatus.State == witness.StateRunning {
|
||||
if running, _ := witMgr.IsRunning(); running {
|
||||
fmt.Printf(" Stopping witness...\n")
|
||||
if err := witMgr.Stop(); err != nil {
|
||||
stopErrors = append(stopErrors, fmt.Sprintf("witness: %v", err))
|
||||
|
||||
@@ -218,65 +218,65 @@ func runWitnessStop(cmd *cobra.Command, args []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// WitnessStatusOutput is the JSON output format for witness status.
|
||||
type WitnessStatusOutput struct {
|
||||
Running bool `json:"running"`
|
||||
RigName string `json:"rig_name"`
|
||||
Session string `json:"session,omitempty"`
|
||||
MonitoredPolecats []string `json:"monitored_polecats,omitempty"`
|
||||
}
|
||||
|
||||
func runWitnessStatus(cmd *cobra.Command, args []string) error {
|
||||
rigName := args[0]
|
||||
|
||||
mgr, err := getWitnessManager(rigName)
|
||||
// Get rig for polecat info
|
||||
_, r, err := getRig(rigName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
w, err := mgr.Status()
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting status: %w", err)
|
||||
}
|
||||
mgr := witness.NewManager(r)
|
||||
|
||||
// Check actual tmux session state (more reliable than state file)
|
||||
t := tmux.NewTmux()
|
||||
sessionName := witnessSessionName(rigName)
|
||||
sessionRunning, _ := t.HasSession(sessionName)
|
||||
// ZFC: tmux is source of truth for running state
|
||||
running, _ := mgr.IsRunning()
|
||||
sessionInfo, _ := mgr.Status() // may be nil if not running
|
||||
|
||||
// Reconcile state: tmux session is the source of truth for background mode
|
||||
if sessionRunning && w.State != witness.StateRunning {
|
||||
w.State = witness.StateRunning
|
||||
} else if !sessionRunning && w.State == witness.StateRunning {
|
||||
w.State = witness.StateStopped
|
||||
}
|
||||
// Polecats come from rig config, not state file
|
||||
polecats := r.Polecats
|
||||
|
||||
// JSON output
|
||||
if witnessStatusJSON {
|
||||
output := WitnessStatusOutput{
|
||||
Running: running,
|
||||
RigName: rigName,
|
||||
MonitoredPolecats: polecats,
|
||||
}
|
||||
if sessionInfo != nil {
|
||||
output.Session = sessionInfo.Name
|
||||
}
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
return enc.Encode(w)
|
||||
return enc.Encode(output)
|
||||
}
|
||||
|
||||
// Human-readable output
|
||||
fmt.Printf("%s Witness: %s\n\n", style.Bold.Render(AgentTypeIcons[AgentWitness]), rigName)
|
||||
|
||||
stateStr := string(w.State)
|
||||
switch w.State {
|
||||
case witness.StateRunning:
|
||||
stateStr = style.Bold.Render("● running")
|
||||
case witness.StateStopped:
|
||||
stateStr = style.Dim.Render("○ stopped")
|
||||
case witness.StatePaused:
|
||||
stateStr = style.Dim.Render("⏸ paused")
|
||||
}
|
||||
fmt.Printf(" State: %s\n", stateStr)
|
||||
if sessionRunning {
|
||||
fmt.Printf(" Session: %s\n", sessionName)
|
||||
}
|
||||
|
||||
if w.StartedAt != nil {
|
||||
fmt.Printf(" Started: %s\n", w.StartedAt.Format("2006-01-02 15:04:05"))
|
||||
if running {
|
||||
fmt.Printf(" State: %s\n", style.Bold.Render("● running"))
|
||||
if sessionInfo != nil {
|
||||
fmt.Printf(" Session: %s\n", sessionInfo.Name)
|
||||
}
|
||||
} else {
|
||||
fmt.Printf(" State: %s\n", style.Dim.Render("○ stopped"))
|
||||
}
|
||||
|
||||
// Show monitored polecats
|
||||
fmt.Printf("\n %s\n", style.Bold.Render("Monitored Polecats:"))
|
||||
if len(w.MonitoredPolecats) == 0 {
|
||||
if len(polecats) == 0 {
|
||||
fmt.Printf(" %s\n", style.Dim.Render("(none)"))
|
||||
} else {
|
||||
for _, p := range w.MonitoredPolecats {
|
||||
for _, p := range polecats {
|
||||
fmt.Printf(" • %s\n", p)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user