From 0f633be4b1075cf514ac09a2520347efe25069f7 Mon Sep 17 00:00:00 2001 From: dennis Date: Fri, 9 Jan 2026 22:02:34 -0800 Subject: [PATCH] fix(zfc): remove PID-based agent liveness detection Replace ProcessExists() checks in witness and refinery managers with tmux session detection. Agent liveness should be derived from tmux session state, not PID probing (per ZFC tracking principles). - Remove util.ProcessExists() from witness/manager.go and refinery/manager.go - Delete internal/util/process.go and process_test.go (now unused) - Foreground mode and Stop() now rely solely on tmux HasSession/KillSession Closes: hq-yxkdr (recentDeaths already removed) Closes: hq-1sd4o (ProcessExists removed) Co-Authored-By: Claude Opus 4.5 --- internal/refinery/manager.go | 22 +++++++--------------- internal/util/process.go | 24 ------------------------ internal/util/process_test.go | 25 ------------------------- internal/witness/manager.go | 19 +++++-------------- 4 files changed, 12 insertions(+), 78 deletions(-) delete mode 100644 internal/util/process.go delete mode 100644 internal/util/process_test.go diff --git a/internal/refinery/manager.go b/internal/refinery/manager.go index e1e02d81..fd87486a 100644 --- a/internal/refinery/manager.go +++ b/internal/refinery/manager.go @@ -114,9 +114,10 @@ func (m *Manager) Start(foreground bool) error { sessionID := m.SessionName() if foreground { - // In foreground mode, we're likely running inside the tmux session - // that background mode created. Only check PID to avoid self-detection. - if ref.State == StateRunning && ref.PID > 0 && util.ProcessExists(ref.PID) { + // In foreground mode, check tmux session (no PID inference per ZFC) + townRoot := filepath.Dir(m.rig.Path) + agentCfg := config.ResolveAgentConfig(townRoot, m.rig.Path) + if running, _ := t.HasSession(sessionID); running && t.IsAgentRunning(sessionID, config.ExpectedPaneCommands(agentCfg)...) { return ErrAlreadyRunning } @@ -124,7 +125,7 @@ func (m *Manager) Start(foreground bool) error { now := time.Now() ref.State = StateRunning ref.StartedAt = &now - ref.PID = os.Getpid() + ref.PID = 0 // No longer track PID (ZFC) if err := m.saveState(ref); err != nil { return err @@ -151,10 +152,7 @@ func (m *Manager) Start(foreground bool) error { } } - // Also check via PID for backwards compatibility - if ref.State == StateRunning && ref.PID > 0 && util.ProcessExists(ref.PID) { - return ErrAlreadyRunning - } + // Note: No PID check per ZFC - tmux session is the source of truth // Background mode: spawn a Claude agent in a tmux session // The Claude agent handles MR processing using git commands and beads @@ -270,13 +268,7 @@ func (m *Manager) Stop() error { _ = t.KillSession(sessionID) } - // If we have a PID and it's a different process, try to stop it gracefully - if ref.PID > 0 && ref.PID != os.Getpid() && util.ProcessExists(ref.PID) { - // Send SIGTERM (best-effort graceful stop) - if proc, err := os.FindProcess(ref.PID); err == nil { - _ = proc.Signal(os.Interrupt) - } - } + // Note: No PID-based stop per ZFC - tmux session kill is sufficient ref.State = StateStopped ref.PID = 0 diff --git a/internal/util/process.go b/internal/util/process.go deleted file mode 100644 index ede918c4..00000000 --- a/internal/util/process.go +++ /dev/null @@ -1,24 +0,0 @@ -// Package util provides utility functions for Gas Town. -// This file was created as part of an E2E polecat workflow test. -package util - -import ( - "os" - "syscall" -) - -// ProcessExists checks if a process with the given PID exists. -// It sends signal 0 to the process, which doesn't actually send a signal -// but does perform error checking to see if the process exists. -func ProcessExists(pid int) bool { - if pid <= 0 { - return false - } - process, err := os.FindProcess(pid) - if err != nil { - return false - } - // Signal 0 checks if process exists without sending a real signal - err = process.Signal(syscall.Signal(0)) - return err == nil -} diff --git a/internal/util/process_test.go b/internal/util/process_test.go deleted file mode 100644 index 1618d1d0..00000000 --- a/internal/util/process_test.go +++ /dev/null @@ -1,25 +0,0 @@ -package util - -import ( - "testing" -) - -func TestProcessExistsNonExistent(t *testing.T) { - // Using a very high PID that's unlikely to exist - pid := 999999999 - if ProcessExists(pid) { - t.Errorf("ProcessExists(%d) = true, want false for non-existent process", pid) - } -} - -func TestProcessExistsNegativePID(t *testing.T) { - // Negative PIDs are invalid and should return false or may cause errors - // depending on the platform, so just test that it doesn't panic - _ = ProcessExists(-1) -} - -func TestProcessExistsZero(t *testing.T) { - // PID 0 is special (kernel process on Unix) - // Test that we can call it without panicking - _ = ProcessExists(0) -} diff --git a/internal/witness/manager.go b/internal/witness/manager.go index e85c7ae4..942259cc 100644 --- a/internal/witness/manager.go +++ b/internal/witness/manager.go @@ -16,7 +16,6 @@ import ( "github.com/steveyegge/gastown/internal/rig" "github.com/steveyegge/gastown/internal/session" "github.com/steveyegge/gastown/internal/tmux" - "github.com/steveyegge/gastown/internal/util" "github.com/steveyegge/gastown/internal/workspace" ) @@ -114,14 +113,15 @@ func (m *Manager) Start(foreground bool, agentOverride string, envOverrides []st if foreground { // Foreground mode is deprecated - patrol logic moved to mol-witness-patrol - if w.State == StateRunning && w.PID > 0 && util.ProcessExists(w.PID) { + // Just check tmux session (no PID inference per ZFC) + if running, _ := t.HasSession(sessionID); running && t.IsClaudeRunning(sessionID) { return ErrAlreadyRunning } now := time.Now() w.State = StateRunning w.StartedAt = &now - w.PID = os.Getpid() + w.PID = 0 // No longer track PID (ZFC) w.MonitoredPolecats = m.rig.Polecats return m.saveState(w) @@ -141,10 +141,7 @@ func (m *Manager) Start(foreground bool, agentOverride string, envOverrides []st } } - // Also check via PID for backwards compatibility - if w.State == StateRunning && w.PID > 0 && util.ProcessExists(w.PID) { - return ErrAlreadyRunning - } + // Note: No PID check per ZFC - tmux session is the source of truth // Working directory witnessDir := m.witnessDir() @@ -320,13 +317,7 @@ func (m *Manager) Stop() error { _ = t.KillSession(sessionID) } - // If we have a PID and it's a different process, try to stop it gracefully - if w.PID > 0 && w.PID != os.Getpid() && util.ProcessExists(w.PID) { - // Send SIGTERM (best-effort graceful stop) - if proc, err := os.FindProcess(w.PID); err == nil { - _ = proc.Signal(os.Interrupt) - } - } + // Note: No PID-based stop per ZFC - tmux session kill is sufficient w.State = StateStopped w.PID = 0