The manager refactors (ea8bef2,72544cc0) conflicted with the agent override feature, causing regressions: Deacon (ea8bef2): - Lost agentOverride parameter - Re-added respawn loop (removed in5f2e16f) - Lost GUPP (startup + propulsion nudges) Crew (72544cc0): - Lost agentOverride wiring to StartOptions - --agent flag had no effect on crew refresh/restart This fix restores agent override support and GUPP while keeping improvements from the manager refactors (zombie detection, etc). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
573 lines
16 KiB
Go
573 lines
16 KiB
Go
package crew
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/steveyegge/gastown/internal/beads"
|
|
"github.com/steveyegge/gastown/internal/claude"
|
|
"github.com/steveyegge/gastown/internal/config"
|
|
"github.com/steveyegge/gastown/internal/constants"
|
|
"github.com/steveyegge/gastown/internal/git"
|
|
"github.com/steveyegge/gastown/internal/rig"
|
|
"github.com/steveyegge/gastown/internal/session"
|
|
"github.com/steveyegge/gastown/internal/tmux"
|
|
"github.com/steveyegge/gastown/internal/util"
|
|
)
|
|
|
|
// Common errors
|
|
var (
|
|
ErrCrewExists = errors.New("crew worker already exists")
|
|
ErrCrewNotFound = errors.New("crew worker not found")
|
|
ErrHasChanges = errors.New("crew worker has uncommitted changes")
|
|
ErrInvalidCrewName = errors.New("invalid crew name")
|
|
ErrSessionRunning = errors.New("session already running")
|
|
ErrSessionNotFound = errors.New("session not found")
|
|
)
|
|
|
|
// StartOptions configures crew session startup.
|
|
type StartOptions struct {
|
|
// Account specifies the account handle to use (overrides default).
|
|
Account string
|
|
|
|
// ClaudeConfigDir is resolved CLAUDE_CONFIG_DIR for the account.
|
|
// If set, this is injected as an environment variable.
|
|
ClaudeConfigDir string
|
|
|
|
// KillExisting kills any existing session before starting (for restart operations).
|
|
// If false and a session is running, Start() returns ErrSessionRunning.
|
|
KillExisting bool
|
|
|
|
// Topic is the startup nudge topic (e.g., "start", "restart", "refresh").
|
|
// Defaults to "start" if empty.
|
|
Topic string
|
|
|
|
// Interactive removes --dangerously-skip-permissions for interactive/refresh mode.
|
|
Interactive bool
|
|
|
|
// AgentOverride specifies an alternate agent alias (e.g., for testing).
|
|
AgentOverride string
|
|
}
|
|
|
|
// validateCrewName checks that a crew name is safe and valid.
|
|
// Rejects path traversal attempts and characters that break agent ID parsing.
|
|
func validateCrewName(name string) error {
|
|
if name == "" {
|
|
return fmt.Errorf("%w: name cannot be empty", ErrInvalidCrewName)
|
|
}
|
|
if name == "." || name == ".." {
|
|
return fmt.Errorf("%w: %q is not allowed", ErrInvalidCrewName, name)
|
|
}
|
|
if strings.ContainsAny(name, "/\\") {
|
|
return fmt.Errorf("%w: %q contains path separators", ErrInvalidCrewName, name)
|
|
}
|
|
if strings.Contains(name, "..") {
|
|
return fmt.Errorf("%w: %q contains path traversal sequence", ErrInvalidCrewName, name)
|
|
}
|
|
// Reject characters that break agent ID parsing (same as rig names)
|
|
if strings.ContainsAny(name, "-. ") {
|
|
sanitized := strings.NewReplacer("-", "_", ".", "_", " ", "_").Replace(name)
|
|
sanitized = strings.ToLower(sanitized)
|
|
return fmt.Errorf("%w: %q contains invalid characters; hyphens, dots, and spaces are reserved for agent ID parsing. Try %q instead", ErrInvalidCrewName, name, sanitized)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Manager handles crew worker lifecycle.
|
|
type Manager struct {
|
|
rig *rig.Rig
|
|
git *git.Git
|
|
}
|
|
|
|
// NewManager creates a new crew manager.
|
|
func NewManager(r *rig.Rig, g *git.Git) *Manager {
|
|
return &Manager{
|
|
rig: r,
|
|
git: g,
|
|
}
|
|
}
|
|
|
|
// crewDir returns the directory for a crew worker.
|
|
func (m *Manager) crewDir(name string) string {
|
|
return filepath.Join(m.rig.Path, "crew", name)
|
|
}
|
|
|
|
// stateFile returns the state file path for a crew worker.
|
|
func (m *Manager) stateFile(name string) string {
|
|
return filepath.Join(m.crewDir(name), "state.json")
|
|
}
|
|
|
|
// mailDir returns the mail directory path for a crew worker.
|
|
func (m *Manager) mailDir(name string) string {
|
|
return filepath.Join(m.crewDir(name), "mail")
|
|
}
|
|
|
|
// exists checks if a crew worker exists.
|
|
func (m *Manager) exists(name string) bool {
|
|
_, err := os.Stat(m.crewDir(name))
|
|
return err == nil
|
|
}
|
|
|
|
// Add creates a new crew worker with a clone of the rig.
|
|
func (m *Manager) Add(name string, createBranch bool) (*CrewWorker, error) {
|
|
if err := validateCrewName(name); err != nil {
|
|
return nil, err
|
|
}
|
|
if m.exists(name) {
|
|
return nil, ErrCrewExists
|
|
}
|
|
|
|
crewPath := m.crewDir(name)
|
|
|
|
// Create crew directory if needed
|
|
crewBaseDir := filepath.Join(m.rig.Path, "crew")
|
|
if err := os.MkdirAll(crewBaseDir, 0755); err != nil {
|
|
return nil, fmt.Errorf("creating crew dir: %w", err)
|
|
}
|
|
|
|
// Clone the rig repo
|
|
if m.rig.LocalRepo != "" {
|
|
if err := m.git.CloneWithReference(m.rig.GitURL, crewPath, m.rig.LocalRepo); err != nil {
|
|
fmt.Printf("Warning: could not clone with local repo reference: %v\n", err)
|
|
if err := m.git.Clone(m.rig.GitURL, crewPath); err != nil {
|
|
return nil, fmt.Errorf("cloning rig: %w", err)
|
|
}
|
|
}
|
|
} else {
|
|
if err := m.git.Clone(m.rig.GitURL, crewPath); err != nil {
|
|
return nil, fmt.Errorf("cloning rig: %w", err)
|
|
}
|
|
}
|
|
|
|
crewGit := git.NewGit(crewPath)
|
|
branchName := m.rig.DefaultBranch()
|
|
|
|
// Optionally create a working branch
|
|
if createBranch {
|
|
branchName = fmt.Sprintf("crew/%s", name)
|
|
if err := crewGit.CreateBranch(branchName); err != nil {
|
|
_ = os.RemoveAll(crewPath) // best-effort cleanup
|
|
return nil, fmt.Errorf("creating branch: %w", err)
|
|
}
|
|
if err := crewGit.Checkout(branchName); err != nil {
|
|
_ = os.RemoveAll(crewPath) // best-effort cleanup
|
|
return nil, fmt.Errorf("checking out branch: %w", err)
|
|
}
|
|
}
|
|
|
|
// Create mail directory for mail delivery
|
|
mailPath := m.mailDir(name)
|
|
if err := os.MkdirAll(mailPath, 0755); err != nil {
|
|
_ = os.RemoveAll(crewPath) // best-effort cleanup
|
|
return nil, fmt.Errorf("creating mail dir: %w", err)
|
|
}
|
|
|
|
// Set up shared beads: crew uses rig's shared beads via redirect file
|
|
if err := m.setupSharedBeads(crewPath); err != nil {
|
|
// Non-fatal - crew can still work, warn but don't fail
|
|
fmt.Printf("Warning: could not set up shared beads: %v\n", err)
|
|
}
|
|
|
|
// NOTE: Slash commands (.claude/commands/) are provisioned at town level by gt install.
|
|
// All agents inherit them via Claude's directory traversal - no per-workspace copies needed.
|
|
|
|
// NOTE: We intentionally do NOT write to CLAUDE.md here.
|
|
// Gas Town context is injected ephemerally via SessionStart hook (gt prime).
|
|
// Writing to CLAUDE.md would overwrite project instructions and leak
|
|
// Gas Town internals into the project repo when workers commit/push.
|
|
|
|
// Create crew worker state
|
|
now := time.Now()
|
|
crew := &CrewWorker{
|
|
Name: name,
|
|
Rig: m.rig.Name,
|
|
ClonePath: crewPath,
|
|
Branch: branchName,
|
|
CreatedAt: now,
|
|
UpdatedAt: now,
|
|
}
|
|
|
|
// Save state
|
|
if err := m.saveState(crew); err != nil {
|
|
_ = os.RemoveAll(crewPath) // best-effort cleanup
|
|
return nil, fmt.Errorf("saving state: %w", err)
|
|
}
|
|
|
|
return crew, nil
|
|
}
|
|
|
|
// Remove deletes a crew worker.
|
|
func (m *Manager) Remove(name string, force bool) error {
|
|
if err := validateCrewName(name); err != nil {
|
|
return err
|
|
}
|
|
if !m.exists(name) {
|
|
return ErrCrewNotFound
|
|
}
|
|
|
|
crewPath := m.crewDir(name)
|
|
|
|
if !force {
|
|
crewGit := git.NewGit(crewPath)
|
|
hasChanges, err := crewGit.HasUncommittedChanges()
|
|
if err == nil && hasChanges {
|
|
return ErrHasChanges
|
|
}
|
|
}
|
|
|
|
// Remove directory
|
|
if err := os.RemoveAll(crewPath); err != nil {
|
|
return fmt.Errorf("removing crew dir: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// List returns all crew workers in the rig.
|
|
func (m *Manager) List() ([]*CrewWorker, error) {
|
|
crewBaseDir := filepath.Join(m.rig.Path, "crew")
|
|
|
|
entries, err := os.ReadDir(crewBaseDir)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil, nil
|
|
}
|
|
return nil, fmt.Errorf("reading crew dir: %w", err)
|
|
}
|
|
|
|
var workers []*CrewWorker
|
|
for _, entry := range entries {
|
|
if !entry.IsDir() {
|
|
continue
|
|
}
|
|
|
|
worker, err := m.Get(entry.Name())
|
|
if err != nil {
|
|
continue // Skip invalid workers
|
|
}
|
|
workers = append(workers, worker)
|
|
}
|
|
|
|
return workers, nil
|
|
}
|
|
|
|
// Get returns a specific crew worker by name.
|
|
func (m *Manager) Get(name string) (*CrewWorker, error) {
|
|
if err := validateCrewName(name); err != nil {
|
|
return nil, err
|
|
}
|
|
if !m.exists(name) {
|
|
return nil, ErrCrewNotFound
|
|
}
|
|
|
|
return m.loadState(name)
|
|
}
|
|
|
|
// saveState persists crew worker state to disk using atomic write.
|
|
func (m *Manager) saveState(crew *CrewWorker) error {
|
|
stateFile := m.stateFile(crew.Name)
|
|
if err := util.AtomicWriteJSON(stateFile, crew); err != nil {
|
|
return fmt.Errorf("writing state: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// loadState reads crew worker state from disk.
|
|
func (m *Manager) loadState(name string) (*CrewWorker, error) {
|
|
stateFile := m.stateFile(name)
|
|
|
|
data, err := os.ReadFile(stateFile)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
// Return minimal crew worker if state file missing
|
|
return &CrewWorker{
|
|
Name: name,
|
|
Rig: m.rig.Name,
|
|
ClonePath: m.crewDir(name),
|
|
}, nil
|
|
}
|
|
return nil, fmt.Errorf("reading state: %w", err)
|
|
}
|
|
|
|
var crew CrewWorker
|
|
if err := json.Unmarshal(data, &crew); err != nil {
|
|
return nil, fmt.Errorf("parsing state: %w", err)
|
|
}
|
|
|
|
// Backfill essential fields if missing (handles empty or incomplete state.json)
|
|
if crew.Name == "" {
|
|
crew.Name = name
|
|
}
|
|
if crew.Rig == "" {
|
|
crew.Rig = m.rig.Name
|
|
}
|
|
if crew.ClonePath == "" {
|
|
crew.ClonePath = m.crewDir(name)
|
|
}
|
|
|
|
return &crew, nil
|
|
}
|
|
|
|
// Rename renames a crew worker from oldName to newName.
|
|
func (m *Manager) Rename(oldName, newName string) error {
|
|
if !m.exists(oldName) {
|
|
return ErrCrewNotFound
|
|
}
|
|
if m.exists(newName) {
|
|
return ErrCrewExists
|
|
}
|
|
|
|
oldPath := m.crewDir(oldName)
|
|
newPath := m.crewDir(newName)
|
|
|
|
// Rename directory
|
|
if err := os.Rename(oldPath, newPath); err != nil {
|
|
return fmt.Errorf("renaming crew dir: %w", err)
|
|
}
|
|
|
|
// Update state file with new name and path
|
|
crew, err := m.loadState(newName)
|
|
if err != nil {
|
|
// Rollback on error (best-effort)
|
|
_ = os.Rename(newPath, oldPath)
|
|
return fmt.Errorf("loading state: %w", err)
|
|
}
|
|
|
|
crew.Name = newName
|
|
crew.ClonePath = newPath
|
|
crew.UpdatedAt = time.Now()
|
|
|
|
if err := m.saveState(crew); err != nil {
|
|
// Rollback on error (best-effort)
|
|
_ = os.Rename(newPath, oldPath)
|
|
return fmt.Errorf("saving state: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Pristine ensures a crew worker is up-to-date with remote.
|
|
// It runs git pull --rebase and bd sync.
|
|
func (m *Manager) Pristine(name string) (*PristineResult, error) {
|
|
if err := validateCrewName(name); err != nil {
|
|
return nil, err
|
|
}
|
|
if !m.exists(name) {
|
|
return nil, ErrCrewNotFound
|
|
}
|
|
|
|
crewPath := m.crewDir(name)
|
|
crewGit := git.NewGit(crewPath)
|
|
|
|
result := &PristineResult{
|
|
Name: name,
|
|
}
|
|
|
|
// Check for uncommitted changes
|
|
hasChanges, err := crewGit.HasUncommittedChanges()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("checking changes: %w", err)
|
|
}
|
|
result.HadChanges = hasChanges
|
|
|
|
// Pull latest (use origin and current branch)
|
|
if err := crewGit.Pull("origin", ""); err != nil {
|
|
result.PullError = err.Error()
|
|
} else {
|
|
result.Pulled = true
|
|
}
|
|
|
|
// Run bd sync
|
|
if err := m.runBdSync(crewPath); err != nil {
|
|
result.SyncError = err.Error()
|
|
} else {
|
|
result.Synced = true
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// runBdSync runs bd sync in the given directory.
|
|
func (m *Manager) runBdSync(dir string) error {
|
|
cmd := exec.Command("bd", "sync")
|
|
cmd.Dir = dir
|
|
return cmd.Run()
|
|
}
|
|
|
|
// PristineResult captures the results of a pristine operation.
|
|
type PristineResult struct {
|
|
Name string `json:"name"`
|
|
HadChanges bool `json:"had_changes"`
|
|
Pulled bool `json:"pulled"`
|
|
PullError string `json:"pull_error,omitempty"`
|
|
Synced bool `json:"synced"`
|
|
SyncError string `json:"sync_error,omitempty"`
|
|
}
|
|
|
|
// setupSharedBeads creates a redirect file so the crew worker uses the rig's shared .beads database.
|
|
// This eliminates the need for git sync between crew clones - all crew members share one database.
|
|
func (m *Manager) setupSharedBeads(crewPath string) error {
|
|
townRoot := filepath.Dir(m.rig.Path)
|
|
return beads.SetupRedirect(townRoot, crewPath)
|
|
}
|
|
|
|
// SessionName returns the tmux session name for a crew member.
|
|
func (m *Manager) SessionName(name string) string {
|
|
return fmt.Sprintf("gt-%s-crew-%s", m.rig.Name, name)
|
|
}
|
|
|
|
// Start creates and starts a tmux session for a crew member.
|
|
// If the crew member doesn't exist, it will be created first.
|
|
func (m *Manager) Start(name string, opts StartOptions) error {
|
|
if err := validateCrewName(name); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Get or create the crew worker
|
|
worker, err := m.Get(name)
|
|
if err == ErrCrewNotFound {
|
|
worker, err = m.Add(name, false) // No feature branch for crew
|
|
if err != nil {
|
|
return fmt.Errorf("creating crew workspace: %w", err)
|
|
}
|
|
} else if err != nil {
|
|
return fmt.Errorf("getting crew worker: %w", err)
|
|
}
|
|
|
|
t := tmux.NewTmux()
|
|
sessionID := m.SessionName(name)
|
|
|
|
// Check if session already exists
|
|
running, err := t.HasSession(sessionID)
|
|
if err != nil {
|
|
return fmt.Errorf("checking session: %w", err)
|
|
}
|
|
if running {
|
|
if opts.KillExisting {
|
|
// Restart mode - kill existing session
|
|
if err := t.KillSession(sessionID); err != nil {
|
|
return fmt.Errorf("killing existing session: %w", err)
|
|
}
|
|
} else {
|
|
// Normal start - session exists, check if Claude is actually running
|
|
if t.IsClaudeRunning(sessionID) {
|
|
return fmt.Errorf("%w: %s", ErrSessionRunning, sessionID)
|
|
}
|
|
// Zombie session - kill and recreate
|
|
if err := t.KillSession(sessionID); err != nil {
|
|
return fmt.Errorf("killing zombie session: %w", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Ensure Claude settings exist in crew/ (not crew/<name>/) so we don't
|
|
// write into the source repo. Claude walks up the tree to find settings.
|
|
// All crew members share the same settings file.
|
|
crewBaseDir := filepath.Join(m.rig.Path, "crew")
|
|
if err := claude.EnsureSettingsForRole(crewBaseDir, "crew"); err != nil {
|
|
return fmt.Errorf("ensuring Claude settings: %w", err)
|
|
}
|
|
|
|
// Create tmux session
|
|
if err := t.NewSession(sessionID, worker.ClonePath); err != nil {
|
|
return fmt.Errorf("creating session: %w", err)
|
|
}
|
|
|
|
// Set environment variables (non-fatal: session works without these)
|
|
_ = t.SetEnvironment(sessionID, "GT_RIG", m.rig.Name)
|
|
_ = t.SetEnvironment(sessionID, "GT_CREW", name)
|
|
_ = t.SetEnvironment(sessionID, "GT_ROLE", "crew")
|
|
|
|
// Set CLAUDE_CONFIG_DIR for account selection (non-fatal)
|
|
if opts.ClaudeConfigDir != "" {
|
|
_ = t.SetEnvironment(sessionID, "CLAUDE_CONFIG_DIR", opts.ClaudeConfigDir)
|
|
}
|
|
|
|
// Apply rig-based theming (non-fatal: theming failure doesn't affect operation)
|
|
theme := tmux.AssignTheme(m.rig.Name)
|
|
_ = t.ConfigureGasTownSession(sessionID, theme, m.rig.Name, name, "crew")
|
|
|
|
// Set up C-b n/p keybindings for crew session cycling (non-fatal)
|
|
_ = t.SetCrewCycleBindings(sessionID)
|
|
|
|
// Wait for shell to be ready
|
|
if err := t.WaitForShellReady(sessionID, constants.ShellReadyTimeout); err != nil {
|
|
return fmt.Errorf("waiting for shell: %w", err)
|
|
}
|
|
|
|
// Build the startup beacon for predecessor discovery via /resume
|
|
// Pass it as Claude's initial prompt - processed when Claude is ready
|
|
address := fmt.Sprintf("%s/crew/%s", m.rig.Name, name)
|
|
topic := opts.Topic
|
|
if topic == "" {
|
|
topic = "start"
|
|
}
|
|
beacon := session.FormatStartupNudge(session.StartupNudgeConfig{
|
|
Recipient: address,
|
|
Sender: "human",
|
|
Topic: topic,
|
|
})
|
|
|
|
// Start claude with environment exports and beacon as initial prompt
|
|
// SessionStart hook handles context loading (gt prime --hook)
|
|
claudeCmd, err := config.BuildCrewStartupCommandWithAgentOverride(m.rig.Name, name, m.rig.Path, beacon, opts.AgentOverride)
|
|
if err != nil {
|
|
_ = t.KillSession(sessionID)
|
|
return fmt.Errorf("building startup command: %w", err)
|
|
}
|
|
|
|
// For interactive/refresh mode, remove --dangerously-skip-permissions
|
|
if opts.Interactive {
|
|
claudeCmd = strings.Replace(claudeCmd, " --dangerously-skip-permissions", "", 1)
|
|
}
|
|
if err := t.SendKeys(sessionID, claudeCmd); err != nil {
|
|
_ = t.KillSession(sessionID) // best-effort cleanup
|
|
return fmt.Errorf("starting claude: %w", err)
|
|
}
|
|
|
|
// Wait for Claude to start (non-fatal: session continues even if this times out)
|
|
_ = t.WaitForCommand(sessionID, constants.SupportedShells, constants.ClaudeStartTimeout)
|
|
|
|
return nil
|
|
}
|
|
|
|
// Stop terminates a crew member's tmux session.
|
|
func (m *Manager) Stop(name string) error {
|
|
if err := validateCrewName(name); err != nil {
|
|
return err
|
|
}
|
|
|
|
t := tmux.NewTmux()
|
|
sessionID := m.SessionName(name)
|
|
|
|
// Check if session exists
|
|
running, err := t.HasSession(sessionID)
|
|
if err != nil {
|
|
return fmt.Errorf("checking session: %w", err)
|
|
}
|
|
if !running {
|
|
return ErrSessionNotFound
|
|
}
|
|
|
|
// Kill the session
|
|
if err := t.KillSession(sessionID); err != nil {
|
|
return fmt.Errorf("killing session: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// IsRunning checks if a crew member's session is active.
|
|
func (m *Manager) IsRunning(name string) (bool, error) {
|
|
t := tmux.NewTmux()
|
|
sessionID := m.SessionName(name)
|
|
return t.HasSession(sessionID)
|
|
}
|