Agent sessions would fail on startup because send-keys arrived before the shell was ready, causing 'bad pattern' and 'command not found' errors. Fix: Create sessions with the command directly using tmux new-session's command argument. This runs the agent as the pane's initial process, avoiding shell readiness timing issues entirely. Updated all agent managers: mayor, deacon, witness, refinery, polecat, crew. Also fixes pre-existing build error in polecat/manager.go (polecatPath → clonePath/newClonePath). Closes #280 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
248 lines
7.2 KiB
Go
248 lines
7.2 KiB
Go
package witness
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"time"
|
|
|
|
"github.com/steveyegge/gastown/internal/agent"
|
|
"github.com/steveyegge/gastown/internal/claude"
|
|
"github.com/steveyegge/gastown/internal/config"
|
|
"github.com/steveyegge/gastown/internal/constants"
|
|
"github.com/steveyegge/gastown/internal/rig"
|
|
"github.com/steveyegge/gastown/internal/session"
|
|
"github.com/steveyegge/gastown/internal/tmux"
|
|
"github.com/steveyegge/gastown/internal/util"
|
|
)
|
|
|
|
// Common errors
|
|
var (
|
|
ErrNotRunning = errors.New("witness not running")
|
|
ErrAlreadyRunning = errors.New("witness already running")
|
|
)
|
|
|
|
// Manager handles witness lifecycle and monitoring operations.
|
|
type Manager struct {
|
|
rig *rig.Rig
|
|
workDir string
|
|
stateManager *agent.StateManager[Witness]
|
|
}
|
|
|
|
// NewManager creates a new witness manager for a rig.
|
|
func NewManager(r *rig.Rig) *Manager {
|
|
return &Manager{
|
|
rig: r,
|
|
workDir: r.Path,
|
|
stateManager: agent.NewStateManager[Witness](r.Path, "witness.json", func() *Witness {
|
|
return &Witness{
|
|
RigName: r.Name,
|
|
State: StateStopped,
|
|
}
|
|
}),
|
|
}
|
|
}
|
|
|
|
// stateFile returns the path to the witness state file.
|
|
func (m *Manager) stateFile() string {
|
|
return m.stateManager.StateFile()
|
|
}
|
|
|
|
// loadState loads witness state from disk.
|
|
func (m *Manager) loadState() (*Witness, error) {
|
|
return m.stateManager.Load()
|
|
}
|
|
|
|
// saveState persists witness state to disk using atomic write.
|
|
func (m *Manager) saveState(w *Witness) error {
|
|
return m.stateManager.Save(w)
|
|
}
|
|
|
|
// SessionName returns the tmux session name for this witness.
|
|
func (m *Manager) SessionName() string {
|
|
return fmt.Sprintf("gt-%s-witness", m.rig.Name)
|
|
}
|
|
|
|
// Status returns the current witness status.
|
|
// ZFC-compliant: trusts agent-reported state, no PID inference.
|
|
// The daemon reads agent bead state for liveness checks.
|
|
func (m *Manager) Status() (*Witness, error) {
|
|
w, err := m.loadState()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Update monitored polecats list (still useful for display)
|
|
w.MonitoredPolecats = m.rig.Polecats
|
|
|
|
return w, nil
|
|
}
|
|
|
|
// witnessDir returns the working directory for the witness.
|
|
// Prefers witness/rig/, falls back to witness/, then rig root.
|
|
func (m *Manager) witnessDir() string {
|
|
witnessRigDir := filepath.Join(m.rig.Path, "witness", "rig")
|
|
if _, err := os.Stat(witnessRigDir); err == nil {
|
|
return witnessRigDir
|
|
}
|
|
|
|
witnessDir := filepath.Join(m.rig.Path, "witness")
|
|
if _, err := os.Stat(witnessDir); err == nil {
|
|
return witnessDir
|
|
}
|
|
|
|
return m.rig.Path
|
|
}
|
|
|
|
// Start starts the witness.
|
|
// If foreground is true, only updates state (no tmux session - deprecated).
|
|
// Otherwise, spawns a Claude agent in a tmux session.
|
|
func (m *Manager) Start(foreground bool) error {
|
|
w, err := m.loadState()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
t := tmux.NewTmux()
|
|
sessionID := m.SessionName()
|
|
|
|
if foreground {
|
|
// Foreground mode is deprecated - patrol logic moved to mol-witness-patrol
|
|
if w.State == StateRunning && w.PID > 0 && util.ProcessExists(w.PID) {
|
|
return ErrAlreadyRunning
|
|
}
|
|
|
|
now := time.Now()
|
|
w.State = StateRunning
|
|
w.StartedAt = &now
|
|
w.PID = os.Getpid()
|
|
w.MonitoredPolecats = m.rig.Polecats
|
|
|
|
return m.saveState(w)
|
|
}
|
|
|
|
// Background mode: check if session already exists
|
|
running, _ := t.HasSession(sessionID)
|
|
if running {
|
|
// Session exists - check if Claude is actually running (healthy vs zombie)
|
|
if t.IsClaudeRunning(sessionID) {
|
|
// Healthy - Claude is running
|
|
return ErrAlreadyRunning
|
|
}
|
|
// Zombie - tmux alive but Claude dead. Kill and recreate.
|
|
if err := t.KillSession(sessionID); err != nil {
|
|
return fmt.Errorf("killing zombie session: %w", err)
|
|
}
|
|
}
|
|
|
|
// Also check via PID for backwards compatibility
|
|
if w.State == StateRunning && w.PID > 0 && util.ProcessExists(w.PID) {
|
|
return ErrAlreadyRunning
|
|
}
|
|
|
|
// Working directory
|
|
witnessDir := m.witnessDir()
|
|
|
|
// Ensure Claude settings exist in witness/ (not witness/rig/) so we don't
|
|
// write into the source repo. Claude walks up the tree to find settings.
|
|
witnessParentDir := filepath.Join(m.rig.Path, "witness")
|
|
if err := claude.EnsureSettingsForRole(witnessParentDir, "witness"); err != nil {
|
|
return fmt.Errorf("ensuring Claude settings: %w", err)
|
|
}
|
|
|
|
// Build startup command first
|
|
// Pass m.rig.Path so rig agent settings are honored (not town-level defaults)
|
|
bdActor := fmt.Sprintf("%s/witness", m.rig.Name)
|
|
command := config.BuildAgentStartupCommand("witness", bdActor, m.rig.Path, "")
|
|
runtimeConfig := config.LoadRuntimeConfig(m.rig.Path)
|
|
|
|
// Create session with command directly to avoid send-keys race condition.
|
|
// See: https://github.com/anthropics/gastown/issues/280
|
|
if err := t.NewSessionWithCommand(sessionID, witnessDir, command); err != nil {
|
|
return fmt.Errorf("creating tmux session: %w", err)
|
|
}
|
|
|
|
// Set environment variables (non-fatal: session works without these)
|
|
_ = t.SetEnvironment(sessionID, "GT_ROLE", "witness")
|
|
_ = t.SetEnvironment(sessionID, "GT_RIG", m.rig.Name)
|
|
_ = t.SetEnvironment(sessionID, "BD_ACTOR", bdActor)
|
|
|
|
// Apply Gas Town theming (non-fatal: theming failure doesn't affect operation)
|
|
theme := tmux.AssignTheme(m.rig.Name)
|
|
_ = t.ConfigureGasTownSession(sessionID, theme, m.rig.Name, "witness", "witness")
|
|
|
|
// Update state to running
|
|
now := time.Now()
|
|
w.State = StateRunning
|
|
w.StartedAt = &now
|
|
w.PID = 0 // Claude agent doesn't have a PID we track
|
|
w.MonitoredPolecats = m.rig.Polecats
|
|
if err := m.saveState(w); err != nil {
|
|
_ = t.KillSession(sessionID) // best-effort cleanup on state save failure
|
|
return fmt.Errorf("saving state: %w", err)
|
|
}
|
|
|
|
// Wait for runtime to start and show its prompt (non-fatal)
|
|
if err := t.WaitForRuntimeReady(sessionID, runtimeConfig, constants.ClaudeStartTimeout); err != nil {
|
|
// Non-fatal - try to continue anyway
|
|
}
|
|
|
|
// Accept bypass permissions warning dialog if it appears.
|
|
_ = t.AcceptBypassPermissionsWarning(sessionID)
|
|
|
|
time.Sleep(constants.ShutdownNotifyDelay)
|
|
|
|
// Inject startup nudge for predecessor discovery via /resume
|
|
address := fmt.Sprintf("%s/witness", m.rig.Name)
|
|
_ = session.StartupNudge(t, sessionID, session.StartupNudgeConfig{
|
|
Recipient: address,
|
|
Sender: "deacon",
|
|
Topic: "patrol",
|
|
}) // Non-fatal
|
|
|
|
// GUPP: Gas Town Universal Propulsion Principle
|
|
// Send the propulsion nudge to trigger autonomous patrol execution.
|
|
// Wait for beacon to be fully processed (needs to be separate prompt)
|
|
time.Sleep(2 * time.Second)
|
|
_ = t.NudgeSession(sessionID, session.PropulsionNudgeForRole("witness", witnessDir)) // Non-fatal
|
|
|
|
return nil
|
|
}
|
|
|
|
// Stop stops the witness.
|
|
func (m *Manager) Stop() error {
|
|
w, err := m.loadState()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Check if tmux session exists
|
|
t := tmux.NewTmux()
|
|
sessionID := m.SessionName()
|
|
sessionRunning, _ := t.HasSession(sessionID)
|
|
|
|
// If neither state nor session indicates running, it's not running
|
|
if w.State != StateRunning && !sessionRunning {
|
|
return ErrNotRunning
|
|
}
|
|
|
|
// Kill tmux session if it exists (best-effort: may already be dead)
|
|
if sessionRunning {
|
|
_ = t.KillSession(sessionID)
|
|
}
|
|
|
|
// If we have a PID and it's a different process, try to stop it gracefully
|
|
if w.PID > 0 && w.PID != os.Getpid() && util.ProcessExists(w.PID) {
|
|
// Send SIGTERM (best-effort graceful stop)
|
|
if proc, err := os.FindProcess(w.PID); err == nil {
|
|
_ = proc.Signal(os.Interrupt)
|
|
}
|
|
}
|
|
|
|
w.State = StateStopped
|
|
w.PID = 0
|
|
|
|
return m.saveState(w)
|
|
}
|