Files
gastown/internal/cmd/up.go
gastown/crew/max 4ef93e1d8a fix(rig): respect parked/docked status in gt up and gt rig start
Previously, `gt up` and `gt rig start` would start witnesses and
refineries for parked/docked rigs, bypassing the operational status
protection. Only the daemon respected the wisp config status.

Now both commands check wisp config status before starting agents:
- `gt up` shows "skipped (rig parked)" for parked/docked rigs
- `gt rig start` warns and skips parked/docked rigs

This prevents accidentally bringing parked/docked rigs back online
when running routine commands.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-17 00:50:46 -08:00

716 lines
19 KiB
Go

package cmd
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"time"
"github.com/spf13/cobra"
"github.com/steveyegge/gastown/internal/beads"
"github.com/steveyegge/gastown/internal/config"
"github.com/steveyegge/gastown/internal/crew"
"github.com/steveyegge/gastown/internal/daemon"
"github.com/steveyegge/gastown/internal/deacon"
"github.com/steveyegge/gastown/internal/events"
"github.com/steveyegge/gastown/internal/mayor"
"github.com/steveyegge/gastown/internal/polecat"
"github.com/steveyegge/gastown/internal/refinery"
"github.com/steveyegge/gastown/internal/rig"
"github.com/steveyegge/gastown/internal/style"
"github.com/steveyegge/gastown/internal/tmux"
"github.com/steveyegge/gastown/internal/wisp"
"github.com/steveyegge/gastown/internal/witness"
"github.com/steveyegge/gastown/internal/workspace"
)
// agentStartResult holds the result of starting an agent.
type agentStartResult struct {
name string // Display name like "Witness (gastown)"
ok bool // Whether start succeeded
detail string // Status detail (session name or error)
}
// maxConcurrentAgentStarts limits parallel agent startups to avoid resource exhaustion.
const maxConcurrentAgentStarts = 10
var upCmd = &cobra.Command{
Use: "up",
GroupID: GroupServices,
Short: "Bring up all Gas Town services",
Long: `Start all Gas Town long-lived services.
This is the idempotent "boot" command for Gas Town. It ensures all
infrastructure agents are running:
• Daemon - Go background process that pokes agents
• Deacon - Health orchestrator (monitors Mayor/Witnesses)
• Mayor - Global work coordinator
• Witnesses - Per-rig polecat managers
• Refineries - Per-rig merge queue processors
Polecats are NOT started by this command - they are transient workers
spawned on demand by the Mayor or Witnesses.
Use --restore to also start:
• Crew - Per rig settings (settings/config.json crew.startup)
• Polecats - Those with pinned beads (work attached)
Running 'gt up' multiple times is safe - it only starts services that
aren't already running.`,
RunE: runUp,
}
var (
upQuiet bool
upRestore bool
)
func init() {
upCmd.Flags().BoolVarP(&upQuiet, "quiet", "q", false, "Only show errors")
upCmd.Flags().BoolVar(&upRestore, "restore", false, "Also restore crew (from settings) and polecats (from hooks)")
rootCmd.AddCommand(upCmd)
}
func runUp(cmd *cobra.Command, args []string) error {
townRoot, err := workspace.FindFromCwdOrError()
if err != nil {
return fmt.Errorf("not in a Gas Town workspace: %w", err)
}
allOK := true
// Discover rigs early so we can prefetch while daemon/deacon/mayor start
rigs := discoverRigs(townRoot)
// Start daemon, deacon, mayor, and rig prefetch in parallel
var daemonErr error
var daemonPID int
var deaconResult, mayorResult agentStartResult
var prefetchedRigs map[string]*rig.Rig
var rigErrors map[string]error
var startupWg sync.WaitGroup
startupWg.Add(4)
// 1. Daemon (Go process)
go func() {
defer startupWg.Done()
if err := ensureDaemon(townRoot); err != nil {
daemonErr = err
} else {
running, pid, _ := daemon.IsRunning(townRoot)
if running {
daemonPID = pid
}
}
}()
// 2. Deacon
go func() {
defer startupWg.Done()
deaconMgr := deacon.NewManager(townRoot)
if err := deaconMgr.Start(""); err != nil {
if err == deacon.ErrAlreadyRunning {
deaconResult = agentStartResult{name: "Deacon", ok: true, detail: deaconMgr.SessionName()}
} else {
deaconResult = agentStartResult{name: "Deacon", ok: false, detail: err.Error()}
}
} else {
deaconResult = agentStartResult{name: "Deacon", ok: true, detail: deaconMgr.SessionName()}
}
}()
// 3. Mayor
go func() {
defer startupWg.Done()
mayorMgr := mayor.NewManager(townRoot)
if err := mayorMgr.Start(""); err != nil {
if err == mayor.ErrAlreadyRunning {
mayorResult = agentStartResult{name: "Mayor", ok: true, detail: mayorMgr.SessionName()}
} else {
mayorResult = agentStartResult{name: "Mayor", ok: false, detail: err.Error()}
}
} else {
mayorResult = agentStartResult{name: "Mayor", ok: true, detail: mayorMgr.SessionName()}
}
}()
// 4. Prefetch rig configs (overlaps with daemon/deacon/mayor startup)
go func() {
defer startupWg.Done()
prefetchedRigs, rigErrors = prefetchRigs(rigs)
}()
startupWg.Wait()
// Print daemon/deacon/mayor results
if daemonErr != nil {
printStatus("Daemon", false, daemonErr.Error())
allOK = false
} else if daemonPID > 0 {
printStatus("Daemon", true, fmt.Sprintf("PID %d", daemonPID))
}
printStatus(deaconResult.name, deaconResult.ok, deaconResult.detail)
if !deaconResult.ok {
allOK = false
}
printStatus(mayorResult.name, mayorResult.ok, mayorResult.detail)
if !mayorResult.ok {
allOK = false
}
// 5 & 6. Witnesses and Refineries (using prefetched rigs)
witnessResults, refineryResults := startRigAgentsWithPrefetch(rigs, prefetchedRigs, rigErrors)
// Print results in order: all witnesses first, then all refineries
for _, rigName := range rigs {
if result, ok := witnessResults[rigName]; ok {
printStatus(result.name, result.ok, result.detail)
if !result.ok {
allOK = false
}
}
}
for _, rigName := range rigs {
if result, ok := refineryResults[rigName]; ok {
printStatus(result.name, result.ok, result.detail)
if !result.ok {
allOK = false
}
}
}
// 7. Crew (if --restore)
if upRestore {
for _, rigName := range rigs {
crewStarted, crewErrors := startCrewFromSettings(townRoot, rigName)
for _, name := range crewStarted {
printStatus(fmt.Sprintf("Crew (%s/%s)", rigName, name), true, fmt.Sprintf("gt-%s-crew-%s", rigName, name))
}
for name, err := range crewErrors {
printStatus(fmt.Sprintf("Crew (%s/%s)", rigName, name), false, err.Error())
allOK = false
}
}
// 7. Polecats with pinned work (if --restore)
for _, rigName := range rigs {
polecatsStarted, polecatErrors := startPolecatsWithWork(townRoot, rigName)
for _, name := range polecatsStarted {
printStatus(fmt.Sprintf("Polecat (%s/%s)", rigName, name), true, fmt.Sprintf("gt-%s-polecat-%s", rigName, name))
}
for name, err := range polecatErrors {
printStatus(fmt.Sprintf("Polecat (%s/%s)", rigName, name), false, err.Error())
allOK = false
}
}
}
fmt.Println()
if allOK {
fmt.Printf("%s All services running\n", style.Bold.Render("✓"))
// Log boot event with started services
startedServices := []string{"daemon", "deacon", "mayor"}
for _, rigName := range rigs {
startedServices = append(startedServices, fmt.Sprintf("%s/witness", rigName))
startedServices = append(startedServices, fmt.Sprintf("%s/refinery", rigName))
}
_ = events.LogFeed(events.TypeBoot, "gt", events.BootPayload("town", startedServices))
} else {
fmt.Printf("%s Some services failed to start\n", style.Bold.Render("✗"))
return fmt.Errorf("not all services started")
}
return nil
}
func printStatus(name string, ok bool, detail string) {
if upQuiet && ok {
return
}
if ok {
fmt.Printf("%s %s: %s\n", style.SuccessPrefix, name, style.Dim.Render(detail))
} else {
fmt.Printf("%s %s: %s\n", style.ErrorPrefix, name, detail)
}
}
// ensureDaemon starts the daemon if not running.
func ensureDaemon(townRoot string) error {
running, _, err := daemon.IsRunning(townRoot)
if err != nil {
return err
}
if running {
return nil
}
// Start daemon
gtPath, err := os.Executable()
if err != nil {
return err
}
cmd := exec.Command(gtPath, "daemon", "run")
cmd.Dir = townRoot
// Detach from parent I/O for background daemon (uses its own logging)
cmd.Stdin = nil
cmd.Stdout = nil
cmd.Stderr = nil
if err := cmd.Start(); err != nil {
return err
}
// Wait for daemon to initialize
time.Sleep(300 * time.Millisecond)
// Verify it started
running, _, err = daemon.IsRunning(townRoot)
if err != nil {
return err
}
if !running {
return fmt.Errorf("daemon failed to start")
}
return nil
}
// rigPrefetchResult holds the result of loading a single rig config.
type rigPrefetchResult struct {
index int
rig *rig.Rig
err error
}
// prefetchRigs loads all rig configs in parallel for faster agent startup.
// Returns a map of rig name to loaded Rig, and any errors encountered.
func prefetchRigs(rigNames []string) (map[string]*rig.Rig, map[string]error) {
n := len(rigNames)
if n == 0 {
return make(map[string]*rig.Rig), make(map[string]error)
}
// Use channel to collect results without locking
results := make(chan rigPrefetchResult, n)
for i, name := range rigNames {
go func(idx int, rigName string) {
_, r, err := getRig(rigName)
results <- rigPrefetchResult{index: idx, rig: r, err: err}
}(i, name)
}
// Collect results - pre-allocate maps with capacity
rigs := make(map[string]*rig.Rig, n)
errors := make(map[string]error)
for i := 0; i < n; i++ {
res := <-results
name := rigNames[res.index]
if res.err != nil {
errors[name] = res.err
} else {
rigs[name] = res.rig
}
}
return rigs, errors
}
// agentTask represents a unit of work for the agent worker pool.
type agentTask struct {
rigName string
rigObj *rig.Rig
isWitness bool // true for witness, false for refinery
}
// agentResultMsg carries result back from worker to collector.
type agentResultMsg struct {
rigName string
isWitness bool
result agentStartResult
}
// startRigAgentsParallel starts all Witnesses and Refineries concurrently.
// Discovers and prefetches rigs internally. For use when rigs aren't pre-loaded.
func startRigAgentsParallel(rigNames []string) (witnessResults, refineryResults map[string]agentStartResult) {
prefetchedRigs, rigErrors := prefetchRigs(rigNames)
return startRigAgentsWithPrefetch(rigNames, prefetchedRigs, rigErrors)
}
// startRigAgentsWithPrefetch starts all Witnesses and Refineries using pre-loaded rig configs.
// Uses a worker pool with fixed goroutine count to limit concurrency and reduce overhead.
func startRigAgentsWithPrefetch(rigNames []string, prefetchedRigs map[string]*rig.Rig, rigErrors map[string]error) (witnessResults, refineryResults map[string]agentStartResult) {
n := len(rigNames)
witnessResults = make(map[string]agentStartResult, n)
refineryResults = make(map[string]agentStartResult, n)
if n == 0 {
return
}
// Record errors for rigs that failed to load
for rigName, err := range rigErrors {
errDetail := err.Error()
witnessResults[rigName] = agentStartResult{
name: "Witness (" + rigName + ")",
ok: false,
detail: errDetail,
}
refineryResults[rigName] = agentStartResult{
name: "Refinery (" + rigName + ")",
ok: false,
detail: errDetail,
}
}
numTasks := len(prefetchedRigs) * 2 // witness + refinery per rig
if numTasks == 0 {
return
}
// Task channel and result channel
tasks := make(chan agentTask, numTasks)
results := make(chan agentResultMsg, numTasks)
// Start fixed worker pool (bounded by maxConcurrentAgentStarts)
numWorkers := maxConcurrentAgentStarts
if numTasks < numWorkers {
numWorkers = numTasks
}
var wg sync.WaitGroup
for i := 0; i < numWorkers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for task := range tasks {
var result agentStartResult
if task.isWitness {
result = upStartWitness(task.rigName, task.rigObj)
} else {
result = upStartRefinery(task.rigName, task.rigObj)
}
results <- agentResultMsg{
rigName: task.rigName,
isWitness: task.isWitness,
result: result,
}
}
}()
}
// Enqueue all tasks
for rigName, r := range prefetchedRigs {
tasks <- agentTask{rigName: rigName, rigObj: r, isWitness: true}
tasks <- agentTask{rigName: rigName, rigObj: r, isWitness: false}
}
close(tasks)
// Close results channel when workers are done
go func() {
wg.Wait()
close(results)
}()
// Collect results - no locking needed, single goroutine collects
for msg := range results {
if msg.isWitness {
witnessResults[msg.rigName] = msg.result
} else {
refineryResults[msg.rigName] = msg.result
}
}
return
}
// upStartWitness starts a witness for the given rig and returns a result struct.
// Respects parked/docked status - skips starting if rig is not operational.
func upStartWitness(rigName string, r *rig.Rig) agentStartResult {
name := "Witness (" + rigName + ")"
// Check if rig is parked or docked
townRoot := filepath.Dir(r.Path)
cfg := wisp.NewConfig(townRoot, rigName)
status := cfg.GetString("status")
if status == "parked" || status == "docked" {
return agentStartResult{name: name, ok: true, detail: fmt.Sprintf("skipped (rig %s)", status)}
}
mgr := witness.NewManager(r)
if err := mgr.Start(false, "", nil); err != nil {
if err == witness.ErrAlreadyRunning {
return agentStartResult{name: name, ok: true, detail: mgr.SessionName()}
}
return agentStartResult{name: name, ok: false, detail: err.Error()}
}
return agentStartResult{name: name, ok: true, detail: mgr.SessionName()}
}
// upStartRefinery starts a refinery for the given rig and returns a result struct.
// Respects parked/docked status - skips starting if rig is not operational.
func upStartRefinery(rigName string, r *rig.Rig) agentStartResult {
name := "Refinery (" + rigName + ")"
// Check if rig is parked or docked
townRoot := filepath.Dir(r.Path)
cfg := wisp.NewConfig(townRoot, rigName)
status := cfg.GetString("status")
if status == "parked" || status == "docked" {
return agentStartResult{name: name, ok: true, detail: fmt.Sprintf("skipped (rig %s)", status)}
}
mgr := refinery.NewManager(r)
if err := mgr.Start(false, ""); err != nil {
if err == refinery.ErrAlreadyRunning {
return agentStartResult{name: name, ok: true, detail: mgr.SessionName()}
}
return agentStartResult{name: name, ok: false, detail: err.Error()}
}
return agentStartResult{name: name, ok: true, detail: mgr.SessionName()}
}
// discoverRigs finds all rigs in the town.
func discoverRigs(townRoot string) []string {
var rigs []string
// Try rigs.json first
rigsConfigPath := filepath.Join(townRoot, "mayor", "rigs.json")
if rigsConfig, err := config.LoadRigsConfig(rigsConfigPath); err == nil {
for name := range rigsConfig.Rigs {
rigs = append(rigs, name)
}
return rigs
}
// Fallback: scan directory for rig-like directories
entries, err := os.ReadDir(townRoot)
if err != nil {
return rigs
}
for _, entry := range entries {
if !entry.IsDir() {
continue
}
name := entry.Name()
// Skip known non-rig directories
if name == "mayor" || name == "daemon" || name == "deacon" ||
name == ".git" || name == "docs" || name[0] == '.' {
continue
}
dirPath := filepath.Join(townRoot, name)
// Check for .beads directory (indicates a rig)
beadsPath := filepath.Join(dirPath, ".beads")
if _, err := os.Stat(beadsPath); err == nil {
rigs = append(rigs, name)
continue
}
// Check for polecats directory (indicates a rig)
polecatsPath := filepath.Join(dirPath, "polecats")
if _, err := os.Stat(polecatsPath); err == nil {
rigs = append(rigs, name)
}
}
return rigs
}
// startCrewFromSettings starts crew members based on rig settings.
// Returns list of started crew names and map of errors.
func startCrewFromSettings(townRoot, rigName string) ([]string, map[string]error) {
started := []string{}
errors := map[string]error{}
rigPath := filepath.Join(townRoot, rigName)
// Load rig settings
settingsPath := filepath.Join(rigPath, "settings", "config.json")
settings, err := config.LoadRigSettings(settingsPath)
if err != nil {
// No settings file or error - skip crew startup
return started, errors
}
if settings.Crew == nil || settings.Crew.Startup == "" {
// No crew startup preference
return started, errors
}
// Get available crew members using helper
crewMgr, _, err := getCrewManager(rigName)
if err != nil {
return started, errors
}
crewWorkers, err := crewMgr.List()
if err != nil {
return started, errors
}
if len(crewWorkers) == 0 {
return started, errors
}
// Extract crew names
crewNames := make([]string, len(crewWorkers))
for i, w := range crewWorkers {
crewNames[i] = w.Name
}
// Parse startup preference and determine which crew to start
toStart := parseCrewStartupPreference(settings.Crew.Startup, crewNames)
// Start each crew member using Manager
for _, crewName := range toStart {
if err := crewMgr.Start(crewName, crew.StartOptions{}); err != nil {
if err == crew.ErrSessionRunning {
started = append(started, crewName)
} else {
errors[crewName] = err
}
} else {
started = append(started, crewName)
}
}
return started, errors
}
// parseCrewStartupPreference parses the natural language crew startup preference.
// Examples: "max", "joe and max", "all", "none", "pick one"
func parseCrewStartupPreference(pref string, available []string) []string {
pref = strings.ToLower(strings.TrimSpace(pref))
// Special keywords
switch pref {
case "none", "":
return []string{}
case "all":
return available
case "pick one", "any", "any one":
if len(available) > 0 {
return []string{available[0]}
}
return []string{}
}
// Parse comma/and-separated list
// "joe and max" -> ["joe", "max"]
// "joe, max" -> ["joe", "max"]
// "max" -> ["max"]
pref = strings.ReplaceAll(pref, " and ", ",")
pref = strings.ReplaceAll(pref, ", but not ", ",-")
pref = strings.ReplaceAll(pref, " but not ", ",-")
parts := strings.Split(pref, ",")
include := []string{}
exclude := map[string]bool{}
for _, part := range parts {
part = strings.TrimSpace(part)
if part == "" {
continue
}
if strings.HasPrefix(part, "-") {
// Exclusion
exclude[strings.TrimPrefix(part, "-")] = true
} else {
include = append(include, part)
}
}
// Filter to only available crew members
result := []string{}
for _, name := range include {
if exclude[name] {
continue
}
// Check if this crew exists
for _, avail := range available {
if avail == name {
result = append(result, name)
break
}
}
}
return result
}
// startPolecatsWithWork starts polecats that have pinned beads (work attached).
// Returns list of started polecat names and map of errors.
func startPolecatsWithWork(townRoot, rigName string) ([]string, map[string]error) {
started := []string{}
errors := map[string]error{}
rigPath := filepath.Join(townRoot, rigName)
polecatsDir := filepath.Join(rigPath, "polecats")
// List polecat directories
entries, err := os.ReadDir(polecatsDir)
if err != nil {
// No polecats directory
return started, errors
}
// Get polecat session manager
_, r, err := getRig(rigName)
if err != nil {
return started, errors
}
t := tmux.NewTmux()
polecatMgr := polecat.NewSessionManager(t, r)
for _, entry := range entries {
if !entry.IsDir() {
continue
}
if strings.HasPrefix(entry.Name(), ".") {
continue
}
polecatName := entry.Name()
polecatPath := filepath.Join(polecatsDir, polecatName)
// Check if this polecat has a pinned bead (work attached)
agentID := fmt.Sprintf("%s/polecats/%s", rigName, polecatName)
b := beads.New(polecatPath)
pinnedBeads, err := b.List(beads.ListOptions{
Status: beads.StatusPinned,
Assignee: agentID,
Priority: -1,
})
if err != nil || len(pinnedBeads) == 0 {
// No pinned beads - skip
continue
}
// This polecat has work - start it using SessionManager
if err := polecatMgr.Start(polecatName, polecat.SessionStartOptions{}); err != nil {
if err == polecat.ErrSessionRunning {
started = append(started, polecatName)
} else {
errors[polecatName] = err
}
} else {
started = append(started, polecatName)
}
}
return started, errors
}