* fix(sling_test): update test for cook dir change
The cook command no longer needs database context and runs from cwd,
not the target rig directory. Update test to match this behavior
change from bd2a5ab5.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
* fix(tests): skip tests requiring missing binaries, handle --allow-stale
- Add skipIfAgentBinaryMissing helper to skip tests when codex/gemini
binaries aren't available in the test environment
- Update rig manager test stub to handle --allow-stale flag
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
* refactor(config): remove BEADS_DIR from agent environment
Stop exporting BEADS_DIR in AgentEnv - agents should use beads redirect
mechanism instead of relying on environment variable. This prevents
prefix mismatches when agents operate across different beads databases.
Changes:
- Remove BeadsDir field from AgentEnvConfig
- Remove BEADS_DIR from env vars set on agent sessions
- Update doctor env_check to not expect BEADS_DIR
- Update all manager Start() calls to not pass BeadsDir
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
* fix(doctor): detect BEADS_DIR in tmux session environment
Add a doctor check that warns when BEADS_DIR is set in any Gas Town
tmux session. BEADS_DIR in the environment overrides prefix-based
routing and breaks multi-rig lookups - agents should use the beads
redirect mechanism instead.
The check:
- Iterates over all Gas Town tmux sessions (gt-* and hq-*)
- Checks if BEADS_DIR is set in the session environment
- Returns a warning with fix hint to restart sessions
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---------
Co-authored-by: julianknutsen <julianknutsen@users.noreply.github>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
314 lines
9.5 KiB
Go
314 lines
9.5 KiB
Go
package witness
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/steveyegge/gastown/internal/agent"
|
|
"github.com/steveyegge/gastown/internal/beads"
|
|
"github.com/steveyegge/gastown/internal/claude"
|
|
"github.com/steveyegge/gastown/internal/config"
|
|
"github.com/steveyegge/gastown/internal/constants"
|
|
"github.com/steveyegge/gastown/internal/rig"
|
|
"github.com/steveyegge/gastown/internal/session"
|
|
"github.com/steveyegge/gastown/internal/tmux"
|
|
"github.com/steveyegge/gastown/internal/workspace"
|
|
)
|
|
|
|
// Common errors
|
|
var (
|
|
ErrNotRunning = errors.New("witness not running")
|
|
ErrAlreadyRunning = errors.New("witness already running")
|
|
)
|
|
|
|
// Manager handles witness lifecycle and monitoring operations.
|
|
type Manager struct {
|
|
rig *rig.Rig
|
|
workDir string
|
|
stateManager *agent.StateManager[Witness]
|
|
}
|
|
|
|
// NewManager creates a new witness manager for a rig.
|
|
func NewManager(r *rig.Rig) *Manager {
|
|
return &Manager{
|
|
rig: r,
|
|
workDir: r.Path,
|
|
stateManager: agent.NewStateManager[Witness](r.Path, "witness.json", func() *Witness {
|
|
return &Witness{
|
|
RigName: r.Name,
|
|
State: StateStopped,
|
|
}
|
|
}),
|
|
}
|
|
}
|
|
|
|
// stateFile returns the path to the witness state file.
|
|
func (m *Manager) stateFile() string {
|
|
return m.stateManager.StateFile()
|
|
}
|
|
|
|
// loadState loads witness state from disk.
|
|
func (m *Manager) loadState() (*Witness, error) {
|
|
return m.stateManager.Load()
|
|
}
|
|
|
|
// saveState persists witness state to disk using atomic write.
|
|
func (m *Manager) saveState(w *Witness) error {
|
|
return m.stateManager.Save(w)
|
|
}
|
|
|
|
// SessionName returns the tmux session name for this witness.
|
|
func (m *Manager) SessionName() string {
|
|
return fmt.Sprintf("gt-%s-witness", m.rig.Name)
|
|
}
|
|
|
|
// Status returns the current witness status.
|
|
// ZFC-compliant: trusts agent-reported state, no PID inference.
|
|
// The daemon reads agent bead state for liveness checks.
|
|
func (m *Manager) Status() (*Witness, error) {
|
|
w, err := m.loadState()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Update monitored polecats list (still useful for display)
|
|
w.MonitoredPolecats = m.rig.Polecats
|
|
|
|
return w, nil
|
|
}
|
|
|
|
// witnessDir returns the working directory for the witness.
|
|
// Prefers witness/rig/, falls back to witness/, then rig root.
|
|
func (m *Manager) witnessDir() string {
|
|
witnessRigDir := filepath.Join(m.rig.Path, "witness", "rig")
|
|
if _, err := os.Stat(witnessRigDir); err == nil {
|
|
return witnessRigDir
|
|
}
|
|
|
|
witnessDir := filepath.Join(m.rig.Path, "witness")
|
|
if _, err := os.Stat(witnessDir); err == nil {
|
|
return witnessDir
|
|
}
|
|
|
|
return m.rig.Path
|
|
}
|
|
|
|
// Start starts the witness.
|
|
// If foreground is true, only updates state (no tmux session - deprecated).
|
|
// Otherwise, spawns a Claude agent in a tmux session.
|
|
// agentOverride optionally specifies a different agent alias to use.
|
|
// envOverrides are KEY=VALUE pairs that override all other env var sources.
|
|
func (m *Manager) Start(foreground bool, agentOverride string, envOverrides []string) error {
|
|
w, err := m.loadState()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
t := tmux.NewTmux()
|
|
sessionID := m.SessionName()
|
|
|
|
if foreground {
|
|
// Foreground mode is deprecated - patrol logic moved to mol-witness-patrol
|
|
// Just check tmux session (no PID inference per ZFC)
|
|
if running, _ := t.HasSession(sessionID); running && t.IsClaudeRunning(sessionID) {
|
|
return ErrAlreadyRunning
|
|
}
|
|
|
|
now := time.Now()
|
|
w.State = StateRunning
|
|
w.StartedAt = &now
|
|
w.PID = 0 // No longer track PID (ZFC)
|
|
w.MonitoredPolecats = m.rig.Polecats
|
|
|
|
return m.saveState(w)
|
|
}
|
|
|
|
// Background mode: check if session already exists
|
|
running, _ := t.HasSession(sessionID)
|
|
if running {
|
|
// Session exists - check if Claude is actually running (healthy vs zombie)
|
|
if t.IsClaudeRunning(sessionID) {
|
|
// Healthy - Claude is running
|
|
return ErrAlreadyRunning
|
|
}
|
|
// Zombie - tmux alive but Claude dead. Kill and recreate.
|
|
if err := t.KillSession(sessionID); err != nil {
|
|
return fmt.Errorf("killing zombie session: %w", err)
|
|
}
|
|
}
|
|
|
|
// Note: No PID check per ZFC - tmux session is the source of truth
|
|
|
|
// Working directory
|
|
witnessDir := m.witnessDir()
|
|
|
|
// Ensure Claude settings exist in witness/ (not witness/rig/) so we don't
|
|
// write into the source repo. Claude walks up the tree to find settings.
|
|
witnessParentDir := filepath.Join(m.rig.Path, "witness")
|
|
if err := claude.EnsureSettingsForRole(witnessParentDir, "witness"); err != nil {
|
|
return fmt.Errorf("ensuring Claude settings: %w", err)
|
|
}
|
|
|
|
roleConfig, err := m.roleConfig()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
townRoot := m.townRoot()
|
|
|
|
// Build startup command first
|
|
// NOTE: No gt prime injection needed - SessionStart hook handles it automatically
|
|
// Export GT_ROLE and BD_ACTOR in the command since tmux SetEnvironment only affects new panes
|
|
// Pass m.rig.Path so rig agent settings are honored (not town-level defaults)
|
|
command, err := buildWitnessStartCommand(m.rig.Path, m.rig.Name, townRoot, agentOverride, roleConfig)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Create session with command directly to avoid send-keys race condition.
|
|
// See: https://github.com/anthropics/gastown/issues/280
|
|
if err := t.NewSessionWithCommand(sessionID, witnessDir, command); err != nil {
|
|
return fmt.Errorf("creating tmux session: %w", err)
|
|
}
|
|
|
|
// Set environment variables (non-fatal: session works without these)
|
|
// Use centralized AgentEnv for consistency across all role startup paths
|
|
envVars := config.AgentEnv(config.AgentEnvConfig{
|
|
Role: "witness",
|
|
Rig: m.rig.Name,
|
|
TownRoot: townRoot,
|
|
})
|
|
for k, v := range envVars {
|
|
_ = t.SetEnvironment(sessionID, k, v)
|
|
}
|
|
// Apply role config env vars if present (non-fatal).
|
|
for key, value := range roleConfigEnvVars(roleConfig, townRoot, m.rig.Name) {
|
|
_ = t.SetEnvironment(sessionID, key, value)
|
|
}
|
|
// Apply CLI env overrides (highest priority, non-fatal).
|
|
for _, override := range envOverrides {
|
|
if key, value, ok := strings.Cut(override, "="); ok {
|
|
_ = t.SetEnvironment(sessionID, key, value)
|
|
}
|
|
}
|
|
|
|
// Apply Gas Town theming (non-fatal: theming failure doesn't affect operation)
|
|
theme := tmux.AssignTheme(m.rig.Name)
|
|
_ = t.ConfigureGasTownSession(sessionID, theme, m.rig.Name, "witness", "witness")
|
|
|
|
// Update state to running
|
|
now := time.Now()
|
|
w.State = StateRunning
|
|
w.StartedAt = &now
|
|
w.PID = 0 // Claude agent doesn't have a PID we track
|
|
w.MonitoredPolecats = m.rig.Polecats
|
|
if err := m.saveState(w); err != nil {
|
|
_ = t.KillSession(sessionID) // best-effort cleanup on state save failure
|
|
return fmt.Errorf("saving state: %w", err)
|
|
}
|
|
|
|
// Wait for Claude to start (non-fatal).
|
|
if err := t.WaitForCommand(sessionID, constants.SupportedShells, constants.ClaudeStartTimeout); err != nil {
|
|
// Non-fatal - try to continue anyway
|
|
}
|
|
|
|
// Accept bypass permissions warning dialog if it appears.
|
|
_ = t.AcceptBypassPermissionsWarning(sessionID)
|
|
|
|
time.Sleep(constants.ShutdownNotifyDelay)
|
|
|
|
// Inject startup nudge for predecessor discovery via /resume
|
|
address := fmt.Sprintf("%s/witness", m.rig.Name)
|
|
_ = session.StartupNudge(t, sessionID, session.StartupNudgeConfig{
|
|
Recipient: address,
|
|
Sender: "deacon",
|
|
Topic: "patrol",
|
|
}) // Non-fatal
|
|
|
|
// GUPP: Gas Town Universal Propulsion Principle
|
|
// Send the propulsion nudge to trigger autonomous patrol execution.
|
|
// Wait for beacon to be fully processed (needs to be separate prompt)
|
|
time.Sleep(2 * time.Second)
|
|
_ = t.NudgeSession(sessionID, session.PropulsionNudgeForRole("witness", witnessDir)) // Non-fatal
|
|
|
|
return nil
|
|
}
|
|
|
|
func (m *Manager) roleConfig() (*beads.RoleConfig, error) {
|
|
// Role beads use hq- prefix and live in town-level beads, not rig beads
|
|
townRoot := m.townRoot()
|
|
bd := beads.NewWithBeadsDir(townRoot, beads.ResolveBeadsDir(townRoot))
|
|
roleConfig, err := bd.GetRoleConfig(beads.RoleBeadIDTown("witness"))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("loading witness role config: %w", err)
|
|
}
|
|
return roleConfig, nil
|
|
}
|
|
|
|
func (m *Manager) townRoot() string {
|
|
townRoot, err := workspace.Find(m.rig.Path)
|
|
if err != nil || townRoot == "" {
|
|
return m.rig.Path
|
|
}
|
|
return townRoot
|
|
}
|
|
|
|
func roleConfigEnvVars(roleConfig *beads.RoleConfig, townRoot, rigName string) map[string]string {
|
|
if roleConfig == nil || len(roleConfig.EnvVars) == 0 {
|
|
return nil
|
|
}
|
|
expanded := make(map[string]string, len(roleConfig.EnvVars))
|
|
for key, value := range roleConfig.EnvVars {
|
|
expanded[key] = beads.ExpandRolePattern(value, townRoot, rigName, "", "witness")
|
|
}
|
|
return expanded
|
|
}
|
|
|
|
func buildWitnessStartCommand(rigPath, rigName, townRoot, agentOverride string, roleConfig *beads.RoleConfig) (string, error) {
|
|
if agentOverride != "" {
|
|
roleConfig = nil
|
|
}
|
|
if roleConfig != nil && roleConfig.StartCommand != "" {
|
|
return beads.ExpandRolePattern(roleConfig.StartCommand, townRoot, rigName, "", "witness"), nil
|
|
}
|
|
command, err := config.BuildAgentStartupCommandWithAgentOverride("witness", rigName, townRoot, rigPath, "", agentOverride)
|
|
if err != nil {
|
|
return "", fmt.Errorf("building startup command: %w", err)
|
|
}
|
|
return command, nil
|
|
}
|
|
|
|
// Stop stops the witness.
|
|
func (m *Manager) Stop() error {
|
|
w, err := m.loadState()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Check if tmux session exists
|
|
t := tmux.NewTmux()
|
|
sessionID := m.SessionName()
|
|
sessionRunning, _ := t.HasSession(sessionID)
|
|
|
|
// If neither state nor session indicates running, it's not running
|
|
if w.State != StateRunning && !sessionRunning {
|
|
return ErrNotRunning
|
|
}
|
|
|
|
// Kill tmux session if it exists (best-effort: may already be dead)
|
|
if sessionRunning {
|
|
_ = t.KillSession(sessionID)
|
|
}
|
|
|
|
// Note: No PID-based stop per ZFC - tmux session kill is sufficient
|
|
|
|
w.State = StateStopped
|
|
w.PID = 0
|
|
|
|
return m.saveState(w)
|
|
}
|