Files
gastown/internal/witness/manager.go
jack afff85cdff fix(tmux): use NewSessionWithCommand to avoid send-keys race condition
Agent sessions would fail on startup because send-keys arrived before the
shell was ready, causing 'bad pattern' and 'command not found' errors.

Fix: Create sessions with the command directly using tmux new-session's
command argument. This runs the agent as the pane's initial process,
avoiding shell readiness timing issues entirely.

Updated all agent managers: mayor, deacon, witness, refinery, polecat, crew.

Also fixes pre-existing build error in polecat/manager.go (polecatPath →
clonePath/newClonePath).

Closes #280

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-08 23:35:31 -08:00

248 lines
7.2 KiB
Go

package witness
import (
"errors"
"fmt"
"os"
"path/filepath"
"time"
"github.com/steveyegge/gastown/internal/agent"
"github.com/steveyegge/gastown/internal/claude"
"github.com/steveyegge/gastown/internal/config"
"github.com/steveyegge/gastown/internal/constants"
"github.com/steveyegge/gastown/internal/rig"
"github.com/steveyegge/gastown/internal/session"
"github.com/steveyegge/gastown/internal/tmux"
"github.com/steveyegge/gastown/internal/util"
)
// Common errors
var (
ErrNotRunning = errors.New("witness not running")
ErrAlreadyRunning = errors.New("witness already running")
)
// Manager handles witness lifecycle and monitoring operations.
type Manager struct {
rig *rig.Rig
workDir string
stateManager *agent.StateManager[Witness]
}
// NewManager creates a new witness manager for a rig.
func NewManager(r *rig.Rig) *Manager {
return &Manager{
rig: r,
workDir: r.Path,
stateManager: agent.NewStateManager[Witness](r.Path, "witness.json", func() *Witness {
return &Witness{
RigName: r.Name,
State: StateStopped,
}
}),
}
}
// stateFile returns the path to the witness state file.
func (m *Manager) stateFile() string {
return m.stateManager.StateFile()
}
// loadState loads witness state from disk.
func (m *Manager) loadState() (*Witness, error) {
return m.stateManager.Load()
}
// saveState persists witness state to disk using atomic write.
func (m *Manager) saveState(w *Witness) error {
return m.stateManager.Save(w)
}
// SessionName returns the tmux session name for this witness.
func (m *Manager) SessionName() string {
return fmt.Sprintf("gt-%s-witness", m.rig.Name)
}
// Status returns the current witness status.
// ZFC-compliant: trusts agent-reported state, no PID inference.
// The daemon reads agent bead state for liveness checks.
func (m *Manager) Status() (*Witness, error) {
w, err := m.loadState()
if err != nil {
return nil, err
}
// Update monitored polecats list (still useful for display)
w.MonitoredPolecats = m.rig.Polecats
return w, nil
}
// witnessDir returns the working directory for the witness.
// Prefers witness/rig/, falls back to witness/, then rig root.
func (m *Manager) witnessDir() string {
witnessRigDir := filepath.Join(m.rig.Path, "witness", "rig")
if _, err := os.Stat(witnessRigDir); err == nil {
return witnessRigDir
}
witnessDir := filepath.Join(m.rig.Path, "witness")
if _, err := os.Stat(witnessDir); err == nil {
return witnessDir
}
return m.rig.Path
}
// Start starts the witness.
// If foreground is true, only updates state (no tmux session - deprecated).
// Otherwise, spawns a Claude agent in a tmux session.
func (m *Manager) Start(foreground bool) error {
w, err := m.loadState()
if err != nil {
return err
}
t := tmux.NewTmux()
sessionID := m.SessionName()
if foreground {
// Foreground mode is deprecated - patrol logic moved to mol-witness-patrol
if w.State == StateRunning && w.PID > 0 && util.ProcessExists(w.PID) {
return ErrAlreadyRunning
}
now := time.Now()
w.State = StateRunning
w.StartedAt = &now
w.PID = os.Getpid()
w.MonitoredPolecats = m.rig.Polecats
return m.saveState(w)
}
// Background mode: check if session already exists
running, _ := t.HasSession(sessionID)
if running {
// Session exists - check if Claude is actually running (healthy vs zombie)
if t.IsClaudeRunning(sessionID) {
// Healthy - Claude is running
return ErrAlreadyRunning
}
// Zombie - tmux alive but Claude dead. Kill and recreate.
if err := t.KillSession(sessionID); err != nil {
return fmt.Errorf("killing zombie session: %w", err)
}
}
// Also check via PID for backwards compatibility
if w.State == StateRunning && w.PID > 0 && util.ProcessExists(w.PID) {
return ErrAlreadyRunning
}
// Working directory
witnessDir := m.witnessDir()
// Ensure Claude settings exist in witness/ (not witness/rig/) so we don't
// write into the source repo. Claude walks up the tree to find settings.
witnessParentDir := filepath.Join(m.rig.Path, "witness")
if err := claude.EnsureSettingsForRole(witnessParentDir, "witness"); err != nil {
return fmt.Errorf("ensuring Claude settings: %w", err)
}
// Build startup command first
// Pass m.rig.Path so rig agent settings are honored (not town-level defaults)
bdActor := fmt.Sprintf("%s/witness", m.rig.Name)
command := config.BuildAgentStartupCommand("witness", bdActor, m.rig.Path, "")
runtimeConfig := config.LoadRuntimeConfig(m.rig.Path)
// Create session with command directly to avoid send-keys race condition.
// See: https://github.com/anthropics/gastown/issues/280
if err := t.NewSessionWithCommand(sessionID, witnessDir, command); err != nil {
return fmt.Errorf("creating tmux session: %w", err)
}
// Set environment variables (non-fatal: session works without these)
_ = t.SetEnvironment(sessionID, "GT_ROLE", "witness")
_ = t.SetEnvironment(sessionID, "GT_RIG", m.rig.Name)
_ = t.SetEnvironment(sessionID, "BD_ACTOR", bdActor)
// Apply Gas Town theming (non-fatal: theming failure doesn't affect operation)
theme := tmux.AssignTheme(m.rig.Name)
_ = t.ConfigureGasTownSession(sessionID, theme, m.rig.Name, "witness", "witness")
// Update state to running
now := time.Now()
w.State = StateRunning
w.StartedAt = &now
w.PID = 0 // Claude agent doesn't have a PID we track
w.MonitoredPolecats = m.rig.Polecats
if err := m.saveState(w); err != nil {
_ = t.KillSession(sessionID) // best-effort cleanup on state save failure
return fmt.Errorf("saving state: %w", err)
}
// Wait for runtime to start and show its prompt (non-fatal)
if err := t.WaitForRuntimeReady(sessionID, runtimeConfig, constants.ClaudeStartTimeout); err != nil {
// Non-fatal - try to continue anyway
}
// Accept bypass permissions warning dialog if it appears.
_ = t.AcceptBypassPermissionsWarning(sessionID)
time.Sleep(constants.ShutdownNotifyDelay)
// Inject startup nudge for predecessor discovery via /resume
address := fmt.Sprintf("%s/witness", m.rig.Name)
_ = session.StartupNudge(t, sessionID, session.StartupNudgeConfig{
Recipient: address,
Sender: "deacon",
Topic: "patrol",
}) // Non-fatal
// GUPP: Gas Town Universal Propulsion Principle
// Send the propulsion nudge to trigger autonomous patrol execution.
// Wait for beacon to be fully processed (needs to be separate prompt)
time.Sleep(2 * time.Second)
_ = t.NudgeSession(sessionID, session.PropulsionNudgeForRole("witness", witnessDir)) // Non-fatal
return nil
}
// Stop stops the witness.
func (m *Manager) Stop() error {
w, err := m.loadState()
if err != nil {
return err
}
// Check if tmux session exists
t := tmux.NewTmux()
sessionID := m.SessionName()
sessionRunning, _ := t.HasSession(sessionID)
// If neither state nor session indicates running, it's not running
if w.State != StateRunning && !sessionRunning {
return ErrNotRunning
}
// Kill tmux session if it exists (best-effort: may already be dead)
if sessionRunning {
_ = t.KillSession(sessionID)
}
// If we have a PID and it's a different process, try to stop it gracefully
if w.PID > 0 && w.PID != os.Getpid() && util.ProcessExists(w.PID) {
// Send SIGTERM (best-effort graceful stop)
if proc, err := os.FindProcess(w.PID); err == nil {
_ = proc.Signal(os.Interrupt)
}
}
w.State = StateStopped
w.PID = 0
return m.saveState(w)
}