fix(zfc): remove PID-based agent liveness detection
Replace ProcessExists() checks in witness and refinery managers with tmux session detection. Agent liveness should be derived from tmux session state, not PID probing (per ZFC tracking principles). - Remove util.ProcessExists() from witness/manager.go and refinery/manager.go - Delete internal/util/process.go and process_test.go (now unused) - Foreground mode and Stop() now rely solely on tmux HasSession/KillSession Closes: hq-yxkdr (recentDeaths already removed) Closes: hq-1sd4o (ProcessExists removed) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -114,9 +114,10 @@ func (m *Manager) Start(foreground bool) error {
|
|||||||
sessionID := m.SessionName()
|
sessionID := m.SessionName()
|
||||||
|
|
||||||
if foreground {
|
if foreground {
|
||||||
// In foreground mode, we're likely running inside the tmux session
|
// In foreground mode, check tmux session (no PID inference per ZFC)
|
||||||
// that background mode created. Only check PID to avoid self-detection.
|
townRoot := filepath.Dir(m.rig.Path)
|
||||||
if ref.State == StateRunning && ref.PID > 0 && util.ProcessExists(ref.PID) {
|
agentCfg := config.ResolveAgentConfig(townRoot, m.rig.Path)
|
||||||
|
if running, _ := t.HasSession(sessionID); running && t.IsAgentRunning(sessionID, config.ExpectedPaneCommands(agentCfg)...) {
|
||||||
return ErrAlreadyRunning
|
return ErrAlreadyRunning
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -124,7 +125,7 @@ func (m *Manager) Start(foreground bool) error {
|
|||||||
now := time.Now()
|
now := time.Now()
|
||||||
ref.State = StateRunning
|
ref.State = StateRunning
|
||||||
ref.StartedAt = &now
|
ref.StartedAt = &now
|
||||||
ref.PID = os.Getpid()
|
ref.PID = 0 // No longer track PID (ZFC)
|
||||||
|
|
||||||
if err := m.saveState(ref); err != nil {
|
if err := m.saveState(ref); err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -151,10 +152,7 @@ func (m *Manager) Start(foreground bool) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Also check via PID for backwards compatibility
|
// Note: No PID check per ZFC - tmux session is the source of truth
|
||||||
if ref.State == StateRunning && ref.PID > 0 && util.ProcessExists(ref.PID) {
|
|
||||||
return ErrAlreadyRunning
|
|
||||||
}
|
|
||||||
|
|
||||||
// Background mode: spawn a Claude agent in a tmux session
|
// Background mode: spawn a Claude agent in a tmux session
|
||||||
// The Claude agent handles MR processing using git commands and beads
|
// The Claude agent handles MR processing using git commands and beads
|
||||||
@@ -270,13 +268,7 @@ func (m *Manager) Stop() error {
|
|||||||
_ = t.KillSession(sessionID)
|
_ = t.KillSession(sessionID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we have a PID and it's a different process, try to stop it gracefully
|
// Note: No PID-based stop per ZFC - tmux session kill is sufficient
|
||||||
if ref.PID > 0 && ref.PID != os.Getpid() && util.ProcessExists(ref.PID) {
|
|
||||||
// Send SIGTERM (best-effort graceful stop)
|
|
||||||
if proc, err := os.FindProcess(ref.PID); err == nil {
|
|
||||||
_ = proc.Signal(os.Interrupt)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ref.State = StateStopped
|
ref.State = StateStopped
|
||||||
ref.PID = 0
|
ref.PID = 0
|
||||||
|
|||||||
@@ -1,24 +0,0 @@
|
|||||||
// Package util provides utility functions for Gas Town.
|
|
||||||
// This file was created as part of an E2E polecat workflow test.
|
|
||||||
package util
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
"syscall"
|
|
||||||
)
|
|
||||||
|
|
||||||
// ProcessExists checks if a process with the given PID exists.
|
|
||||||
// It sends signal 0 to the process, which doesn't actually send a signal
|
|
||||||
// but does perform error checking to see if the process exists.
|
|
||||||
func ProcessExists(pid int) bool {
|
|
||||||
if pid <= 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
process, err := os.FindProcess(pid)
|
|
||||||
if err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
// Signal 0 checks if process exists without sending a real signal
|
|
||||||
err = process.Signal(syscall.Signal(0))
|
|
||||||
return err == nil
|
|
||||||
}
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
package util
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestProcessExistsNonExistent(t *testing.T) {
|
|
||||||
// Using a very high PID that's unlikely to exist
|
|
||||||
pid := 999999999
|
|
||||||
if ProcessExists(pid) {
|
|
||||||
t.Errorf("ProcessExists(%d) = true, want false for non-existent process", pid)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestProcessExistsNegativePID(t *testing.T) {
|
|
||||||
// Negative PIDs are invalid and should return false or may cause errors
|
|
||||||
// depending on the platform, so just test that it doesn't panic
|
|
||||||
_ = ProcessExists(-1)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestProcessExistsZero(t *testing.T) {
|
|
||||||
// PID 0 is special (kernel process on Unix)
|
|
||||||
// Test that we can call it without panicking
|
|
||||||
_ = ProcessExists(0)
|
|
||||||
}
|
|
||||||
@@ -16,7 +16,6 @@ import (
|
|||||||
"github.com/steveyegge/gastown/internal/rig"
|
"github.com/steveyegge/gastown/internal/rig"
|
||||||
"github.com/steveyegge/gastown/internal/session"
|
"github.com/steveyegge/gastown/internal/session"
|
||||||
"github.com/steveyegge/gastown/internal/tmux"
|
"github.com/steveyegge/gastown/internal/tmux"
|
||||||
"github.com/steveyegge/gastown/internal/util"
|
|
||||||
"github.com/steveyegge/gastown/internal/workspace"
|
"github.com/steveyegge/gastown/internal/workspace"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -114,14 +113,15 @@ func (m *Manager) Start(foreground bool, agentOverride string, envOverrides []st
|
|||||||
|
|
||||||
if foreground {
|
if foreground {
|
||||||
// Foreground mode is deprecated - patrol logic moved to mol-witness-patrol
|
// Foreground mode is deprecated - patrol logic moved to mol-witness-patrol
|
||||||
if w.State == StateRunning && w.PID > 0 && util.ProcessExists(w.PID) {
|
// Just check tmux session (no PID inference per ZFC)
|
||||||
|
if running, _ := t.HasSession(sessionID); running && t.IsClaudeRunning(sessionID) {
|
||||||
return ErrAlreadyRunning
|
return ErrAlreadyRunning
|
||||||
}
|
}
|
||||||
|
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
w.State = StateRunning
|
w.State = StateRunning
|
||||||
w.StartedAt = &now
|
w.StartedAt = &now
|
||||||
w.PID = os.Getpid()
|
w.PID = 0 // No longer track PID (ZFC)
|
||||||
w.MonitoredPolecats = m.rig.Polecats
|
w.MonitoredPolecats = m.rig.Polecats
|
||||||
|
|
||||||
return m.saveState(w)
|
return m.saveState(w)
|
||||||
@@ -141,10 +141,7 @@ func (m *Manager) Start(foreground bool, agentOverride string, envOverrides []st
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Also check via PID for backwards compatibility
|
// Note: No PID check per ZFC - tmux session is the source of truth
|
||||||
if w.State == StateRunning && w.PID > 0 && util.ProcessExists(w.PID) {
|
|
||||||
return ErrAlreadyRunning
|
|
||||||
}
|
|
||||||
|
|
||||||
// Working directory
|
// Working directory
|
||||||
witnessDir := m.witnessDir()
|
witnessDir := m.witnessDir()
|
||||||
@@ -320,13 +317,7 @@ func (m *Manager) Stop() error {
|
|||||||
_ = t.KillSession(sessionID)
|
_ = t.KillSession(sessionID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we have a PID and it's a different process, try to stop it gracefully
|
// Note: No PID-based stop per ZFC - tmux session kill is sufficient
|
||||||
if w.PID > 0 && w.PID != os.Getpid() && util.ProcessExists(w.PID) {
|
|
||||||
// Send SIGTERM (best-effort graceful stop)
|
|
||||||
if proc, err := os.FindProcess(w.PID); err == nil {
|
|
||||||
_ = proc.Signal(os.Interrupt)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
w.State = StateStopped
|
w.State = StateStopped
|
||||||
w.PID = 0
|
w.PID = 0
|
||||||
|
|||||||
Reference in New Issue
Block a user