fix: Remove PID/tmux state inference (gt-psuw7)
ZFC compliance: daemon becomes pure transport layer, trusting agent beads. Changes: - refinery Status(): Simply returns loaded state, no PID/tmux reconciliation - witness Status(): Simply returns loaded state, no PID inference - daemon ensureDeaconRunning(): Trusts agent bead state, no tmux fallback - daemon pokeDeacon(): Trusts agent bead state, no HasSession check Removed: - 78 lines of state inference code (PID checks, tmux session parsing) - "Reconciliation" logic that overwrote agent-reported state Note: Timeout fallback for dead agents is gt-2hzl4 (separate issue). Reference: ~/gt/docs/zfc-violations-audit.md 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -238,54 +238,21 @@ func (d *Daemon) nextMOTD() string {
|
|||||||
return deaconMOTDMessages[nextIdx]
|
return deaconMOTDMessages[nextIdx]
|
||||||
}
|
}
|
||||||
|
|
||||||
// ensureDeaconRunning checks if the Deacon session exists and Claude is running.
|
// ensureDeaconRunning ensures the Deacon is running.
|
||||||
// If the session exists but Claude has exited, it restarts Claude.
|
// ZFC-compliant: trusts agent bead state, no tmux inference (gt-psuw7).
|
||||||
// If the session doesn't exist, it creates it and starts Claude.
|
|
||||||
// The Deacon is the system's heartbeat - it must always be running.
|
// The Deacon is the system's heartbeat - it must always be running.
|
||||||
func (d *Daemon) ensureDeaconRunning() {
|
func (d *Daemon) ensureDeaconRunning() {
|
||||||
// Check agent bead state (ZFC: trust what agent reports)
|
// Check agent bead state (ZFC: trust what agent reports)
|
||||||
// This is the preferred state source per gt-39ttg
|
|
||||||
beadState, beadErr := d.getAgentBeadState("gt-deacon")
|
beadState, beadErr := d.getAgentBeadState("gt-deacon")
|
||||||
if beadErr == nil {
|
if beadErr == nil {
|
||||||
// Agent bead exists - check its state
|
|
||||||
if beadState == "running" || beadState == "working" {
|
if beadState == "running" || beadState == "working" {
|
||||||
// Agent reports it's running - trust it
|
// Agent reports it's running - trust it
|
||||||
// (Future: gt-2hzl4 will add timeout fallback for stale state)
|
// Note: gt-2hzl4 will add timeout fallback for stale state
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Agent reports not running - fall through to tmux check
|
|
||||||
}
|
}
|
||||||
// If agent bead not found, fall through to legacy tmux detection
|
// Agent not running (or bead not found) - start it
|
||||||
|
d.logger.Println("Deacon not running per agent bead, starting...")
|
||||||
sessionExists, err := d.tmux.HasSession(DeaconSessionName)
|
|
||||||
if err != nil {
|
|
||||||
d.logger.Printf("Error checking Deacon session: %v", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if sessionExists {
|
|
||||||
// Session exists - check if Claude is actually running
|
|
||||||
cmd, err := d.tmux.GetPaneCommand(DeaconSessionName)
|
|
||||||
if err != nil {
|
|
||||||
d.logger.Printf("Error checking Deacon pane command: %v", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// If Claude is running (node process), we're good
|
|
||||||
if cmd == "node" {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Claude has exited (shell is showing) - restart it
|
|
||||||
d.logger.Printf("Deacon session exists but Claude exited (cmd=%s), restarting...", cmd)
|
|
||||||
if err := d.tmux.SendKeys(DeaconSessionName, "export GT_ROLE=deacon BD_ACTOR=deacon && claude --dangerously-skip-permissions"); err != nil {
|
|
||||||
d.logger.Printf("Error restarting Claude in Deacon session: %v", err)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Session doesn't exist - create it and start Claude
|
|
||||||
d.logger.Println("Deacon session not running, starting...")
|
|
||||||
|
|
||||||
// Create session in deacon directory (ensures correct CLAUDE.md is loaded)
|
// Create session in deacon directory (ensures correct CLAUDE.md is loaded)
|
||||||
deaconDir := filepath.Join(d.config.TownRoot, "deacon")
|
deaconDir := filepath.Join(d.config.TownRoot, "deacon")
|
||||||
@@ -310,21 +277,17 @@ func (d *Daemon) ensureDeaconRunning() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// pokeDeacon sends a heartbeat message to the Deacon session.
|
// pokeDeacon sends a heartbeat message to the Deacon session.
|
||||||
// Simple notification - no staleness checking or backoff logic.
|
// ZFC-compliant: trusts agent bead state, no tmux inference (gt-psuw7).
|
||||||
// The Deacon molecule decides what to do with heartbeats.
|
// The Deacon molecule decides what to do with heartbeats.
|
||||||
func (d *Daemon) pokeDeacon() {
|
func (d *Daemon) pokeDeacon() {
|
||||||
running, err := d.tmux.HasSession(DeaconSessionName)
|
// Check agent bead state (ZFC: trust what agent reports)
|
||||||
if err != nil {
|
beadState, beadErr := d.getAgentBeadState("gt-deacon")
|
||||||
d.logger.Printf("Error checking Deacon session: %v", err)
|
if beadErr != nil || (beadState != "running" && beadState != "working") {
|
||||||
|
// Agent not running per bead - don't poke (ensureDeaconRunning should start it)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if !running {
|
// Agent reports running - send heartbeat
|
||||||
d.logger.Println("Deacon session not running after ensure, skipping poke")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send heartbeat message with rotating MOTD
|
|
||||||
motd := d.nextMOTD()
|
motd := d.nextMOTD()
|
||||||
msg := fmt.Sprintf("HEARTBEAT: %s", motd)
|
msg := fmt.Sprintf("HEARTBEAT: %s", motd)
|
||||||
if err := d.tmux.SendKeysReplace(DeaconSessionName, msg, 50); err != nil {
|
if err := d.tmux.SendKeysReplace(DeaconSessionName, msg, 50); err != nil {
|
||||||
|
|||||||
@@ -96,44 +96,10 @@ func (m *Manager) saveState(ref *Refinery) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Status returns the current refinery status.
|
// Status returns the current refinery status.
|
||||||
|
// ZFC-compliant: trusts agent-reported state, no PID/tmux inference.
|
||||||
|
// The daemon reads agent bead state for liveness checks.
|
||||||
func (m *Manager) Status() (*Refinery, error) {
|
func (m *Manager) Status() (*Refinery, error) {
|
||||||
ref, err := m.loadState()
|
return m.loadState()
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if tmux session exists
|
|
||||||
t := tmux.NewTmux()
|
|
||||||
sessionID := m.sessionName()
|
|
||||||
sessionRunning, _ := t.HasSession(sessionID)
|
|
||||||
|
|
||||||
// If tmux session is running, refinery is running
|
|
||||||
if sessionRunning {
|
|
||||||
if ref.State != StateRunning {
|
|
||||||
// Update state to match reality (non-fatal: state file update)
|
|
||||||
now := time.Now()
|
|
||||||
ref.State = StateRunning
|
|
||||||
if ref.StartedAt == nil {
|
|
||||||
ref.StartedAt = &now
|
|
||||||
}
|
|
||||||
_ = m.saveState(ref)
|
|
||||||
}
|
|
||||||
return ref, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// If state says running but tmux session doesn't exist, check PID
|
|
||||||
if ref.State == StateRunning {
|
|
||||||
if ref.PID > 0 && processExists(ref.PID) {
|
|
||||||
// Process is still running (foreground mode without tmux)
|
|
||||||
return ref, nil
|
|
||||||
}
|
|
||||||
// Neither session nor process exists - mark as stopped (non-fatal: state file update)
|
|
||||||
ref.State = StateStopped
|
|
||||||
ref.PID = 0
|
|
||||||
_ = m.saveState(ref)
|
|
||||||
}
|
|
||||||
|
|
||||||
return ref, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start starts the refinery.
|
// Start starts the refinery.
|
||||||
|
|||||||
@@ -72,22 +72,15 @@ func (m *Manager) saveState(w *Witness) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Status returns the current witness status.
|
// Status returns the current witness status.
|
||||||
|
// ZFC-compliant: trusts agent-reported state, no PID inference.
|
||||||
|
// The daemon reads agent bead state for liveness checks.
|
||||||
func (m *Manager) Status() (*Witness, error) {
|
func (m *Manager) Status() (*Witness, error) {
|
||||||
w, err := m.loadState()
|
w, err := m.loadState()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// If running, verify process is still alive
|
// Update monitored polecats list (still useful for display)
|
||||||
if w.State == StateRunning && w.PID > 0 {
|
|
||||||
if !processExists(w.PID) {
|
|
||||||
w.State = StateStopped
|
|
||||||
w.PID = 0
|
|
||||||
_ = m.saveState(w) // non-fatal: state file update
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update monitored polecats list
|
|
||||||
w.MonitoredPolecats = m.rig.Polecats
|
w.MonitoredPolecats = m.rig.Polecats
|
||||||
|
|
||||||
return w, nil
|
return w, nil
|
||||||
|
|||||||
Reference in New Issue
Block a user