feat: add gt up/down commands and daemon doctor check

New commands:
- `gt up` - Idempotent boot command that brings up all services:
  Daemon, Deacon, Mayor, and Witnesses for all rigs
- `gt down` - Graceful shutdown of all services

Doctor improvements:
- New daemon check verifies daemon is running
- Fixable with `gt doctor --fix` to auto-start daemon

The system can run degraded (any services down) but `gt up` ensures
a fully operational Gas Town with one idempotent command.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Steve Yegge
2025-12-20 02:26:09 -08:00
parent 1554380228
commit da12531d3d
4 changed files with 536 additions and 0 deletions

View File

@@ -53,6 +53,7 @@ func runDoctor(cmd *cobra.Command, args []string) error {
d := doctor.NewDoctor()
// Register built-in checks
d.Register(doctor.NewDaemonCheck())
d.Register(doctor.NewBeadsDatabaseCheck())
// Run checks

139
internal/cmd/down.go Normal file
View File

@@ -0,0 +1,139 @@
package cmd
import (
"fmt"
"time"
"github.com/spf13/cobra"
"github.com/steveyegge/gastown/internal/daemon"
"github.com/steveyegge/gastown/internal/style"
"github.com/steveyegge/gastown/internal/tmux"
"github.com/steveyegge/gastown/internal/workspace"
)
var downCmd = &cobra.Command{
Use: "down",
Short: "Stop all Gas Town services",
Long: `Stop all Gas Town long-lived services.
This gracefully shuts down all infrastructure agents:
• Witnesses - Per-rig polecat managers
• Mayor - Global work coordinator
• Deacon - Health orchestrator
• Daemon - Go background process
Polecats are NOT stopped by this command - use 'gt swarm stop' or
kill individual polecats with 'gt polecat kill'.
This is useful for:
• Taking a break (stop token consumption)
• Clean shutdown before system maintenance
• Resetting the town to a clean state`,
RunE: runDown,
}
var (
downQuiet bool
downForce bool
)
func init() {
downCmd.Flags().BoolVarP(&downQuiet, "quiet", "q", false, "Only show errors")
downCmd.Flags().BoolVarP(&downForce, "force", "f", false, "Force kill without graceful shutdown")
rootCmd.AddCommand(downCmd)
}
func runDown(cmd *cobra.Command, args []string) error {
townRoot, err := workspace.FindFromCwdOrError()
if err != nil {
return fmt.Errorf("not in a Gas Town workspace: %w", err)
}
t := tmux.NewTmux()
allOK := true
// Stop in reverse order of startup
// 1. Stop witnesses first
rigs := discoverRigs(townRoot)
for _, rigName := range rigs {
sessionName := fmt.Sprintf("gt-%s-witness", rigName)
if err := stopSession(t, sessionName); err != nil {
printDownStatus(fmt.Sprintf("Witness (%s)", rigName), false, err.Error())
allOK = false
} else {
printDownStatus(fmt.Sprintf("Witness (%s)", rigName), true, "stopped")
}
}
// 2. Stop Mayor
if err := stopSession(t, MayorSessionName); err != nil {
printDownStatus("Mayor", false, err.Error())
allOK = false
} else {
printDownStatus("Mayor", true, "stopped")
}
// 3. Stop Deacon
if err := stopSession(t, DeaconSessionName); err != nil {
printDownStatus("Deacon", false, err.Error())
allOK = false
} else {
printDownStatus("Deacon", true, "stopped")
}
// 4. Stop Daemon last
running, _, _ := daemon.IsRunning(townRoot)
if running {
if err := daemon.StopDaemon(townRoot); err != nil {
printDownStatus("Daemon", false, err.Error())
allOK = false
} else {
printDownStatus("Daemon", true, "stopped")
}
} else {
printDownStatus("Daemon", true, "not running")
}
fmt.Println()
if allOK {
fmt.Printf("%s All services stopped\n", style.Bold.Render("✓"))
} else {
fmt.Printf("%s Some services failed to stop\n", style.Bold.Render("✗"))
return fmt.Errorf("not all services stopped")
}
return nil
}
func printDownStatus(name string, ok bool, detail string) {
if downQuiet && ok {
return
}
if ok {
fmt.Printf("%s %s: %s\n", style.SuccessPrefix, name, style.Dim.Render(detail))
} else {
fmt.Printf("%s %s: %s\n", style.ErrorPrefix, name, detail)
}
}
// stopSession gracefully stops a tmux session.
func stopSession(t *tmux.Tmux, sessionName string) error {
running, err := t.HasSession(sessionName)
if err != nil {
return err
}
if !running {
return nil // Already stopped
}
// Try graceful shutdown first (Ctrl-C)
if !downForce {
_ = t.SendKeysRaw(sessionName, "C-c")
time.Sleep(100 * time.Millisecond)
}
// Kill the session
return t.KillSession(sessionName)
}

285
internal/cmd/up.go Normal file
View File

@@ -0,0 +1,285 @@
package cmd
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"time"
"github.com/spf13/cobra"
"github.com/steveyegge/gastown/internal/config"
"github.com/steveyegge/gastown/internal/daemon"
"github.com/steveyegge/gastown/internal/style"
"github.com/steveyegge/gastown/internal/tmux"
"github.com/steveyegge/gastown/internal/workspace"
)
var upCmd = &cobra.Command{
Use: "up",
Short: "Bring up all Gas Town services",
Long: `Start all Gas Town long-lived services.
This is the idempotent "boot" command for Gas Town. It ensures all
infrastructure agents are running:
• Daemon - Go background process that pokes agents
• Deacon - Health orchestrator (monitors Mayor/Witnesses)
• Mayor - Global work coordinator
• Witnesses - Per-rig polecat managers
Polecats are NOT started by this command - they are ephemeral workers
spawned on demand by the Mayor or Witnesses.
Running 'gt up' multiple times is safe - it only starts services that
aren't already running.`,
RunE: runUp,
}
var (
upQuiet bool
)
func init() {
upCmd.Flags().BoolVarP(&upQuiet, "quiet", "q", false, "Only show errors")
rootCmd.AddCommand(upCmd)
}
func runUp(cmd *cobra.Command, args []string) error {
townRoot, err := workspace.FindFromCwdOrError()
if err != nil {
return fmt.Errorf("not in a Gas Town workspace: %w", err)
}
t := tmux.NewTmux()
allOK := true
// 1. Daemon (Go process)
if err := ensureDaemon(townRoot); err != nil {
printStatus("Daemon", false, err.Error())
allOK = false
} else {
running, pid, _ := daemon.IsRunning(townRoot)
if running {
printStatus("Daemon", true, fmt.Sprintf("PID %d", pid))
}
}
// 2. Deacon (Claude agent)
if err := ensureSession(t, DeaconSessionName, townRoot, "deacon"); err != nil {
printStatus("Deacon", false, err.Error())
allOK = false
} else {
printStatus("Deacon", true, "gt-deacon")
}
// 3. Mayor (Claude agent)
if err := ensureSession(t, MayorSessionName, townRoot, "mayor"); err != nil {
printStatus("Mayor", false, err.Error())
allOK = false
} else {
printStatus("Mayor", true, "gt-mayor")
}
// 4. Witnesses (one per rig)
rigs := discoverRigs(townRoot)
for _, rigName := range rigs {
sessionName := fmt.Sprintf("gt-%s-witness", rigName)
rigPath := filepath.Join(townRoot, rigName)
if err := ensureWitness(t, sessionName, rigPath, rigName); err != nil {
printStatus(fmt.Sprintf("Witness (%s)", rigName), false, err.Error())
allOK = false
} else {
printStatus(fmt.Sprintf("Witness (%s)", rigName), true, sessionName)
}
}
fmt.Println()
if allOK {
fmt.Printf("%s All services running\n", style.Bold.Render("✓"))
} else {
fmt.Printf("%s Some services failed to start\n", style.Bold.Render("✗"))
return fmt.Errorf("not all services started")
}
return nil
}
func printStatus(name string, ok bool, detail string) {
if upQuiet && ok {
return
}
if ok {
fmt.Printf("%s %s: %s\n", style.SuccessPrefix, name, style.Dim.Render(detail))
} else {
fmt.Printf("%s %s: %s\n", style.ErrorPrefix, name, detail)
}
}
// ensureDaemon starts the daemon if not running.
func ensureDaemon(townRoot string) error {
running, _, err := daemon.IsRunning(townRoot)
if err != nil {
return err
}
if running {
return nil
}
// Start daemon
gtPath, err := os.Executable()
if err != nil {
return err
}
cmd := exec.Command(gtPath, "daemon", "run")
cmd.Dir = townRoot
cmd.Stdin = nil
cmd.Stdout = nil
cmd.Stderr = nil
if err := cmd.Start(); err != nil {
return err
}
// Wait for daemon to initialize
time.Sleep(300 * time.Millisecond)
// Verify it started
running, _, err = daemon.IsRunning(townRoot)
if err != nil {
return err
}
if !running {
return fmt.Errorf("daemon failed to start")
}
return nil
}
// ensureSession starts a Claude session if not running.
func ensureSession(t *tmux.Tmux, sessionName, workDir, role string) error {
running, err := t.HasSession(sessionName)
if err != nil {
return err
}
if running {
return nil
}
// Create session
if err := t.NewSession(sessionName, workDir); err != nil {
return err
}
// Set environment
_ = t.SetEnvironment(sessionName, "GT_ROLE", role)
// Apply theme based on role
switch role {
case "mayor":
theme := tmux.MayorTheme()
_ = t.ConfigureGasTownSession(sessionName, theme, "", "Mayor", "coordinator")
case "deacon":
theme := tmux.DeaconTheme()
_ = t.ConfigureGasTownSession(sessionName, theme, "", "Deacon", "health-check")
}
// Launch Claude
var claudeCmd string
if role == "deacon" {
// Deacon uses respawn loop
claudeCmd = `while true; do echo "⛪ Starting Deacon session..."; claude --dangerously-skip-permissions; echo ""; echo "Deacon exited. Restarting in 2s... (Ctrl-C to stop)"; sleep 2; done`
} else {
claudeCmd = `claude --dangerously-skip-permissions`
}
if err := t.SendKeysDelayed(sessionName, claudeCmd, 200); err != nil {
return err
}
return nil
}
// ensureWitness starts a witness session for a rig.
func ensureWitness(t *tmux.Tmux, sessionName, rigPath, rigName string) error {
running, err := t.HasSession(sessionName)
if err != nil {
return err
}
if running {
return nil
}
// Create session in rig directory
if err := t.NewSession(sessionName, rigPath); err != nil {
return err
}
// Set environment
_ = t.SetEnvironment(sessionName, "GT_ROLE", "witness")
_ = t.SetEnvironment(sessionName, "GT_RIG", rigName)
// Apply theme (use rig-based theme)
theme := tmux.AssignTheme(rigName)
_ = t.ConfigureGasTownSession(sessionName, theme, "", "Witness", rigName)
// Launch Claude
claudeCmd := `claude --dangerously-skip-permissions`
if err := t.SendKeysDelayed(sessionName, claudeCmd, 200); err != nil {
return err
}
return nil
}
// discoverRigs finds all rigs in the town.
func discoverRigs(townRoot string) []string {
var rigs []string
// Try rigs.json first
rigsConfigPath := filepath.Join(townRoot, "mayor", "rigs.json")
if rigsConfig, err := config.LoadRigsConfig(rigsConfigPath); err == nil {
for name := range rigsConfig.Rigs {
rigs = append(rigs, name)
}
return rigs
}
// Fallback: scan directory for rig-like directories
entries, err := os.ReadDir(townRoot)
if err != nil {
return rigs
}
for _, entry := range entries {
if !entry.IsDir() {
continue
}
name := entry.Name()
// Skip known non-rig directories
if name == "mayor" || name == "daemon" || name == "deacon" ||
name == ".git" || name == "docs" || name[0] == '.' {
continue
}
dirPath := filepath.Join(townRoot, name)
// Check for .beads directory (indicates a rig)
beadsPath := filepath.Join(dirPath, ".beads")
if _, err := os.Stat(beadsPath); err == nil {
rigs = append(rigs, name)
continue
}
// Check for polecats directory (indicates a rig)
polecatsPath := filepath.Join(dirPath, "polecats")
if _, err := os.Stat(polecatsPath); err == nil {
rigs = append(rigs, name)
}
}
return rigs
}

View File

@@ -0,0 +1,111 @@
package doctor
import (
"os"
"os/exec"
"time"
"github.com/steveyegge/gastown/internal/daemon"
)
// DaemonCheck verifies the daemon is running.
type DaemonCheck struct {
FixableCheck
}
// NewDaemonCheck creates a new daemon check.
func NewDaemonCheck() *DaemonCheck {
return &DaemonCheck{
FixableCheck: FixableCheck{
BaseCheck: BaseCheck{
CheckName: "daemon",
CheckDescription: "Check if Gas Town daemon is running",
},
},
}
}
// Run checks if the daemon is running.
func (c *DaemonCheck) Run(ctx *CheckContext) *CheckResult {
running, pid, err := daemon.IsRunning(ctx.TownRoot)
if err != nil {
return &CheckResult{
Name: c.Name(),
Status: StatusError,
Message: "Failed to check daemon status",
Details: []string{err.Error()},
}
}
if running {
// Get more info about daemon state
state, err := daemon.LoadState(ctx.TownRoot)
details := []string{}
if err == nil && !state.StartedAt.IsZero() {
uptime := time.Since(state.StartedAt).Round(time.Second)
details = append(details, "Uptime: "+uptime.String())
if state.HeartbeatCount > 0 {
details = append(details, "Heartbeats: "+string(rune(state.HeartbeatCount)))
}
}
return &CheckResult{
Name: c.Name(),
Status: StatusOK,
Message: "Daemon is running (PID " + itoa(pid) + ")",
Details: details,
}
}
return &CheckResult{
Name: c.Name(),
Status: StatusWarning,
Message: "Daemon is not running",
FixHint: "Run 'gt daemon start' or 'gt doctor --fix'",
}
}
// Fix starts the daemon.
func (c *DaemonCheck) Fix(ctx *CheckContext) error {
// Find gt executable
gtPath, err := os.Executable()
if err != nil {
return err
}
// Start daemon in background
cmd := exec.Command(gtPath, "daemon", "run")
cmd.Dir = ctx.TownRoot
cmd.Stdin = nil
cmd.Stdout = nil
cmd.Stderr = nil
if err := cmd.Start(); err != nil {
return err
}
// Wait a moment for daemon to initialize
time.Sleep(300 * time.Millisecond)
return nil
}
// itoa is a simple int to string helper
func itoa(i int) string {
if i == 0 {
return "0"
}
s := ""
neg := i < 0
if neg {
i = -i
}
for i > 0 {
s = string(rune('0'+i%10)) + s
i /= 10
}
if neg {
s = "-" + s
}
return s
}