fix(zfc): remove PID-based agent liveness detection

Replace ProcessExists() checks in witness and refinery managers with
tmux session detection. Agent liveness should be derived from tmux
session state, not PID probing (per ZFC tracking principles).

- Remove util.ProcessExists() from witness/manager.go and refinery/manager.go
- Delete internal/util/process.go and process_test.go (now unused)
- Foreground mode and Stop() now rely solely on tmux HasSession/KillSession

Closes: hq-yxkdr (recentDeaths already removed)
Closes: hq-1sd4o (ProcessExists removed)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
dennis
2026-01-09 22:02:34 -08:00
committed by Steve Yegge
parent 593185873d
commit 0f633be4b1
4 changed files with 12 additions and 78 deletions

View File

@@ -114,9 +114,10 @@ func (m *Manager) Start(foreground bool) error {
sessionID := m.SessionName()
if foreground {
// In foreground mode, we're likely running inside the tmux session
// that background mode created. Only check PID to avoid self-detection.
if ref.State == StateRunning && ref.PID > 0 && util.ProcessExists(ref.PID) {
// In foreground mode, check tmux session (no PID inference per ZFC)
townRoot := filepath.Dir(m.rig.Path)
agentCfg := config.ResolveAgentConfig(townRoot, m.rig.Path)
if running, _ := t.HasSession(sessionID); running && t.IsAgentRunning(sessionID, config.ExpectedPaneCommands(agentCfg)...) {
return ErrAlreadyRunning
}
@@ -124,7 +125,7 @@ func (m *Manager) Start(foreground bool) error {
now := time.Now()
ref.State = StateRunning
ref.StartedAt = &now
ref.PID = os.Getpid()
ref.PID = 0 // No longer track PID (ZFC)
if err := m.saveState(ref); err != nil {
return err
@@ -151,10 +152,7 @@ func (m *Manager) Start(foreground bool) error {
}
}
// Also check via PID for backwards compatibility
if ref.State == StateRunning && ref.PID > 0 && util.ProcessExists(ref.PID) {
return ErrAlreadyRunning
}
// Note: No PID check per ZFC - tmux session is the source of truth
// Background mode: spawn a Claude agent in a tmux session
// The Claude agent handles MR processing using git commands and beads
@@ -270,13 +268,7 @@ func (m *Manager) Stop() error {
_ = t.KillSession(sessionID)
}
// If we have a PID and it's a different process, try to stop it gracefully
if ref.PID > 0 && ref.PID != os.Getpid() && util.ProcessExists(ref.PID) {
// Send SIGTERM (best-effort graceful stop)
if proc, err := os.FindProcess(ref.PID); err == nil {
_ = proc.Signal(os.Interrupt)
}
}
// Note: No PID-based stop per ZFC - tmux session kill is sufficient
ref.State = StateStopped
ref.PID = 0

View File

@@ -1,24 +0,0 @@
// Package util provides utility functions for Gas Town.
// This file was created as part of an E2E polecat workflow test.
package util
import (
"os"
"syscall"
)
// ProcessExists checks if a process with the given PID exists.
// It sends signal 0 to the process, which doesn't actually send a signal
// but does perform error checking to see if the process exists.
func ProcessExists(pid int) bool {
if pid <= 0 {
return false
}
process, err := os.FindProcess(pid)
if err != nil {
return false
}
// Signal 0 checks if process exists without sending a real signal
err = process.Signal(syscall.Signal(0))
return err == nil
}

View File

@@ -1,25 +0,0 @@
package util
import (
"testing"
)
func TestProcessExistsNonExistent(t *testing.T) {
// Using a very high PID that's unlikely to exist
pid := 999999999
if ProcessExists(pid) {
t.Errorf("ProcessExists(%d) = true, want false for non-existent process", pid)
}
}
func TestProcessExistsNegativePID(t *testing.T) {
// Negative PIDs are invalid and should return false or may cause errors
// depending on the platform, so just test that it doesn't panic
_ = ProcessExists(-1)
}
func TestProcessExistsZero(t *testing.T) {
// PID 0 is special (kernel process on Unix)
// Test that we can call it without panicking
_ = ProcessExists(0)
}

View File

@@ -16,7 +16,6 @@ import (
"github.com/steveyegge/gastown/internal/rig"
"github.com/steveyegge/gastown/internal/session"
"github.com/steveyegge/gastown/internal/tmux"
"github.com/steveyegge/gastown/internal/util"
"github.com/steveyegge/gastown/internal/workspace"
)
@@ -114,14 +113,15 @@ func (m *Manager) Start(foreground bool, agentOverride string, envOverrides []st
if foreground {
// Foreground mode is deprecated - patrol logic moved to mol-witness-patrol
if w.State == StateRunning && w.PID > 0 && util.ProcessExists(w.PID) {
// Just check tmux session (no PID inference per ZFC)
if running, _ := t.HasSession(sessionID); running && t.IsClaudeRunning(sessionID) {
return ErrAlreadyRunning
}
now := time.Now()
w.State = StateRunning
w.StartedAt = &now
w.PID = os.Getpid()
w.PID = 0 // No longer track PID (ZFC)
w.MonitoredPolecats = m.rig.Polecats
return m.saveState(w)
@@ -141,10 +141,7 @@ func (m *Manager) Start(foreground bool, agentOverride string, envOverrides []st
}
}
// Also check via PID for backwards compatibility
if w.State == StateRunning && w.PID > 0 && util.ProcessExists(w.PID) {
return ErrAlreadyRunning
}
// Note: No PID check per ZFC - tmux session is the source of truth
// Working directory
witnessDir := m.witnessDir()
@@ -320,13 +317,7 @@ func (m *Manager) Stop() error {
_ = t.KillSession(sessionID)
}
// If we have a PID and it's a different process, try to stop it gracefully
if w.PID > 0 && w.PID != os.Getpid() && util.ProcessExists(w.PID) {
// Send SIGTERM (best-effort graceful stop)
if proc, err := os.FindProcess(w.PID); err == nil {
_ = proc.Signal(os.Interrupt)
}
}
// Note: No PID-based stop per ZFC - tmux session kill is sufficient
w.State = StateStopped
w.PID = 0