Files
gastown/internal/witness/manager.go
gastown/crew/gus 86751e1ea5 feat(witness): add --env flag for environment variable overrides
Extends the --agent flag with a more general --env flag that allows
setting arbitrary environment variables when starting a witness.

Precedence (highest to lowest):
1. CLI --env overrides
2. Role bead env_vars
3. config.AgentEnv() defaults

Examples:
  gt witness start greenplace --env ANTHROPIC_MODEL=claude-3-haiku
  gt witness restart greenplace --env DEBUG=1 --env VERBOSE=true

Co-authored-by: joshuavial <git@codewithjv.com>
2026-01-09 22:00:43 -08:00

336 lines
10 KiB
Go

package witness
import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"time"
"github.com/steveyegge/gastown/internal/agent"
"github.com/steveyegge/gastown/internal/beads"
"github.com/steveyegge/gastown/internal/claude"
"github.com/steveyegge/gastown/internal/config"
"github.com/steveyegge/gastown/internal/constants"
"github.com/steveyegge/gastown/internal/rig"
"github.com/steveyegge/gastown/internal/session"
"github.com/steveyegge/gastown/internal/tmux"
"github.com/steveyegge/gastown/internal/util"
"github.com/steveyegge/gastown/internal/workspace"
)
// Common errors
var (
ErrNotRunning = errors.New("witness not running")
ErrAlreadyRunning = errors.New("witness already running")
)
// Manager handles witness lifecycle and monitoring operations.
type Manager struct {
rig *rig.Rig
workDir string
stateManager *agent.StateManager[Witness]
}
// NewManager creates a new witness manager for a rig.
func NewManager(r *rig.Rig) *Manager {
return &Manager{
rig: r,
workDir: r.Path,
stateManager: agent.NewStateManager[Witness](r.Path, "witness.json", func() *Witness {
return &Witness{
RigName: r.Name,
State: StateStopped,
}
}),
}
}
// stateFile returns the path to the witness state file.
func (m *Manager) stateFile() string {
return m.stateManager.StateFile()
}
// loadState loads witness state from disk.
func (m *Manager) loadState() (*Witness, error) {
return m.stateManager.Load()
}
// saveState persists witness state to disk using atomic write.
func (m *Manager) saveState(w *Witness) error {
return m.stateManager.Save(w)
}
// SessionName returns the tmux session name for this witness.
func (m *Manager) SessionName() string {
return fmt.Sprintf("gt-%s-witness", m.rig.Name)
}
// Status returns the current witness status.
// ZFC-compliant: trusts agent-reported state, no PID inference.
// The daemon reads agent bead state for liveness checks.
func (m *Manager) Status() (*Witness, error) {
w, err := m.loadState()
if err != nil {
return nil, err
}
// Update monitored polecats list (still useful for display)
w.MonitoredPolecats = m.rig.Polecats
return w, nil
}
// witnessDir returns the working directory for the witness.
// Prefers witness/rig/, falls back to witness/, then rig root.
func (m *Manager) witnessDir() string {
witnessRigDir := filepath.Join(m.rig.Path, "witness", "rig")
if _, err := os.Stat(witnessRigDir); err == nil {
return witnessRigDir
}
witnessDir := filepath.Join(m.rig.Path, "witness")
if _, err := os.Stat(witnessDir); err == nil {
return witnessDir
}
return m.rig.Path
}
// Start starts the witness.
// If foreground is true, only updates state (no tmux session - deprecated).
// Otherwise, spawns a Claude agent in a tmux session.
// agentOverride optionally specifies a different agent alias to use.
// envOverrides are KEY=VALUE pairs that override all other env var sources.
func (m *Manager) Start(foreground bool, agentOverride string, envOverrides []string) error {
w, err := m.loadState()
if err != nil {
return err
}
t := tmux.NewTmux()
sessionID := m.SessionName()
if foreground {
// Foreground mode is deprecated - patrol logic moved to mol-witness-patrol
if w.State == StateRunning && w.PID > 0 && util.ProcessExists(w.PID) {
return ErrAlreadyRunning
}
now := time.Now()
w.State = StateRunning
w.StartedAt = &now
w.PID = os.Getpid()
w.MonitoredPolecats = m.rig.Polecats
return m.saveState(w)
}
// Background mode: check if session already exists
running, _ := t.HasSession(sessionID)
if running {
// Session exists - check if Claude is actually running (healthy vs zombie)
if t.IsClaudeRunning(sessionID) {
// Healthy - Claude is running
return ErrAlreadyRunning
}
// Zombie - tmux alive but Claude dead. Kill and recreate.
if err := t.KillSession(sessionID); err != nil {
return fmt.Errorf("killing zombie session: %w", err)
}
}
// Also check via PID for backwards compatibility
if w.State == StateRunning && w.PID > 0 && util.ProcessExists(w.PID) {
return ErrAlreadyRunning
}
// Working directory
witnessDir := m.witnessDir()
// Ensure Claude settings exist in witness/ (not witness/rig/) so we don't
// write into the source repo. Claude walks up the tree to find settings.
witnessParentDir := filepath.Join(m.rig.Path, "witness")
if err := claude.EnsureSettingsForRole(witnessParentDir, "witness"); err != nil {
return fmt.Errorf("ensuring Claude settings: %w", err)
}
// Create new tmux session
if err := t.NewSession(sessionID, witnessDir); err != nil {
return fmt.Errorf("creating tmux session: %w", err)
}
// Apply Gas Town theming (non-fatal: theming failure doesn't affect operation)
theme := tmux.AssignTheme(m.rig.Name)
_ = t.ConfigureGasTownSession(sessionID, theme, m.rig.Name, "witness", "witness")
roleConfig, err := m.roleConfig()
if err != nil {
_ = t.KillSession(sessionID)
return err
}
townRoot := m.townRoot()
// Set environment variables (non-fatal: session works without these)
// Use centralized AgentEnv for consistency across all role startup paths
envVars := config.AgentEnv(config.AgentEnvConfig{
Role: "witness",
Rig: m.rig.Name,
TownRoot: townRoot,
BeadsDir: beads.ResolveBeadsDir(m.rig.Path),
})
for k, v := range envVars {
_ = t.SetEnvironment(sessionID, k, v)
}
// Apply role config env vars if present (non-fatal).
for key, value := range roleConfigEnvVars(roleConfig, townRoot, m.rig.Name) {
_ = t.SetEnvironment(sessionID, key, value)
}
// Apply CLI env overrides (highest priority, non-fatal).
for _, override := range envOverrides {
if key, value, ok := strings.Cut(override, "="); ok {
_ = t.SetEnvironment(sessionID, key, value)
}
}
// Update state to running
now := time.Now()
w.State = StateRunning
w.StartedAt = &now
w.PID = 0 // Claude agent doesn't have a PID we track
w.MonitoredPolecats = m.rig.Polecats
if err := m.saveState(w); err != nil {
_ = t.KillSession(sessionID) // best-effort cleanup on state save failure
return fmt.Errorf("saving state: %w", err)
}
// Launch Claude directly (no shell respawn loop)
// Restarts are handled by daemon via LIFECYCLE mail or deacon health-scan
// NOTE: No gt prime injection needed - SessionStart hook handles it automatically
// Export GT_ROLE and BD_ACTOR in the command since tmux SetEnvironment only affects new panes
// Pass m.rig.Path so rig agent settings are honored (not town-level defaults)
command, err := buildWitnessStartCommand(m.rig.Path, m.rig.Name, townRoot, agentOverride, roleConfig)
if err != nil {
_ = t.KillSession(sessionID)
return err
}
// Wait for shell to be ready before sending keys (prevents "can't find pane" under load)
if err := t.WaitForShellReady(sessionID, 5*time.Second); err != nil {
_ = t.KillSession(sessionID)
return fmt.Errorf("waiting for shell: %w", err)
}
if err := t.SendKeys(sessionID, command); err != nil {
_ = t.KillSession(sessionID) // best-effort cleanup
return fmt.Errorf("starting Claude agent: %w", err)
}
// Wait for Claude to start (non-fatal).
if err := t.WaitForCommand(sessionID, constants.SupportedShells, constants.ClaudeStartTimeout); err != nil {
// Non-fatal - try to continue anyway
}
// Accept bypass permissions warning dialog if it appears.
_ = t.AcceptBypassPermissionsWarning(sessionID)
time.Sleep(constants.ShutdownNotifyDelay)
// Inject startup nudge for predecessor discovery via /resume
address := fmt.Sprintf("%s/witness", m.rig.Name)
_ = session.StartupNudge(t, sessionID, session.StartupNudgeConfig{
Recipient: address,
Sender: "deacon",
Topic: "patrol",
}) // Non-fatal
// GUPP: Gas Town Universal Propulsion Principle
// Send the propulsion nudge to trigger autonomous patrol execution.
// Wait for beacon to be fully processed (needs to be separate prompt)
time.Sleep(2 * time.Second)
_ = t.NudgeSession(sessionID, session.PropulsionNudgeForRole("witness", witnessDir)) // Non-fatal
return nil
}
func (m *Manager) roleConfig() (*beads.RoleConfig, error) {
beadsPath := m.rig.BeadsPath()
beadsDir := beads.ResolveBeadsDir(beadsPath)
bd := beads.NewWithBeadsDir(beadsPath, beadsDir)
roleConfig, err := bd.GetRoleConfig(beads.RoleBeadIDTown("witness"))
if err != nil {
return nil, fmt.Errorf("loading witness role config: %w", err)
}
return roleConfig, nil
}
func (m *Manager) townRoot() string {
townRoot, err := workspace.Find(m.rig.Path)
if err != nil || townRoot == "" {
return m.rig.Path
}
return townRoot
}
func roleConfigEnvVars(roleConfig *beads.RoleConfig, townRoot, rigName string) map[string]string {
if roleConfig == nil || len(roleConfig.EnvVars) == 0 {
return nil
}
expanded := make(map[string]string, len(roleConfig.EnvVars))
for key, value := range roleConfig.EnvVars {
expanded[key] = beads.ExpandRolePattern(value, townRoot, rigName, "", "witness")
}
return expanded
}
func buildWitnessStartCommand(rigPath, rigName, townRoot, agentOverride string, roleConfig *beads.RoleConfig) (string, error) {
if agentOverride != "" {
roleConfig = nil
}
if roleConfig != nil && roleConfig.StartCommand != "" {
return beads.ExpandRolePattern(roleConfig.StartCommand, townRoot, rigName, "", "witness"), nil
}
bdActor := fmt.Sprintf("%s/witness", rigName)
command, err := config.BuildAgentStartupCommandWithAgentOverride("witness", bdActor, rigPath, "", agentOverride)
if err != nil {
return "", fmt.Errorf("building startup command: %w", err)
}
return command, nil
}
// Stop stops the witness.
func (m *Manager) Stop() error {
w, err := m.loadState()
if err != nil {
return err
}
// Check if tmux session exists
t := tmux.NewTmux()
sessionID := m.SessionName()
sessionRunning, _ := t.HasSession(sessionID)
// If neither state nor session indicates running, it's not running
if w.State != StateRunning && !sessionRunning {
return ErrNotRunning
}
// Kill tmux session if it exists (best-effort: may already be dead)
if sessionRunning {
_ = t.KillSession(sessionID)
}
// If we have a PID and it's a different process, try to stop it gracefully
if w.PID > 0 && w.PID != os.Getpid() && util.ProcessExists(w.PID) {
// Send SIGTERM (best-effort graceful stop)
if proc, err := os.FindProcess(w.PID); err == nil {
_ = proc.Signal(os.Interrupt)
}
}
w.State = StateStopped
w.PID = 0
return m.saveState(w)
}