Files
gastown/internal/cmd/boot.go
toast 9dcbdf8106 fix(boot): Rename session gt-deacon-boot → gt-boot to fix prefix collision
The tmux session name "gt-deacon-boot" was causing HasSession("gt-deacon")
to return true due to tmux prefix matching behavior. This made the daemon
think the Deacon was running when only Boot was active, and caused commands
targeting "gt-deacon" to be sent to Boot session instead.

The fix renames Boot session from "gt-deacon-boot" to "gt-boot", which
has no prefix overlap with "gt-deacon".

Fixes: gt-sgzsb

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-02 18:56:40 -08:00

344 lines
8.5 KiB
Go

package cmd
import (
"encoding/json"
"fmt"
"os"
"time"
"github.com/spf13/cobra"
"github.com/steveyegge/gastown/internal/boot"
"github.com/steveyegge/gastown/internal/deacon"
"github.com/steveyegge/gastown/internal/style"
"github.com/steveyegge/gastown/internal/workspace"
)
var (
bootStatusJSON bool
bootDegraded bool
)
var bootCmd = &cobra.Command{
Use: "boot",
GroupID: GroupAgents,
Short: "Manage Boot (Deacon watchdog)",
Long: `Manage Boot - the daemon's watchdog for Deacon triage.
Boot is a special dog that runs fresh on each daemon tick. It observes
the system state and decides whether to start/wake/nudge/interrupt the
Deacon, or do nothing. This centralizes the "when to wake" decision in
an agent that can reason about it.
Boot lifecycle:
1. Daemon tick spawns Boot (fresh each time)
2. Boot runs triage: observe, decide, act
3. Boot cleans inbox (discards stale handoffs)
4. Boot exits (or handoffs in non-degraded mode)
Location: ~/gt/deacon/dogs/boot/
Session: gt-boot`,
}
var bootStatusCmd = &cobra.Command{
Use: "status",
Short: "Show Boot status",
Long: `Show Boot's current status and last execution.
Displays:
- Whether Boot is currently running
- Last action taken (start/wake/nudge/nothing)
- Timing information
- Degraded mode status`,
RunE: runBootStatus,
}
var bootSpawnCmd = &cobra.Command{
Use: "spawn",
Short: "Spawn Boot for triage",
Long: `Spawn Boot to run the triage cycle.
This is normally called by the daemon. It spawns Boot in a fresh
tmux session (or subprocess in degraded mode) to observe and decide
what action to take on the Deacon.
Boot runs to completion and exits - it doesn't maintain state
between invocations.`,
RunE: runBootSpawn,
}
var bootTriageCmd = &cobra.Command{
Use: "triage",
Short: "Run triage directly (degraded mode)",
Long: `Run Boot's triage logic directly without Claude.
This is for degraded mode operation when tmux is unavailable.
It performs basic observation and takes conservative action:
- If Deacon is not running: start it
- If Deacon appears stuck: attempt restart
- Otherwise: do nothing
Use --degraded flag when running in degraded mode.`,
RunE: runBootTriage,
}
func init() {
bootStatusCmd.Flags().BoolVar(&bootStatusJSON, "json", false, "Output as JSON")
bootTriageCmd.Flags().BoolVar(&bootDegraded, "degraded", false, "Run in degraded mode (no tmux)")
bootCmd.AddCommand(bootStatusCmd)
bootCmd.AddCommand(bootSpawnCmd)
bootCmd.AddCommand(bootTriageCmd)
rootCmd.AddCommand(bootCmd)
}
func getBootManager() (*boot.Boot, error) {
townRoot, err := workspace.FindFromCwd()
if err != nil {
return nil, fmt.Errorf("finding town root: %w", err)
}
return boot.New(townRoot), nil
}
func runBootStatus(cmd *cobra.Command, args []string) error {
b, err := getBootManager()
if err != nil {
return err
}
status, err := b.LoadStatus()
if err != nil {
return fmt.Errorf("loading status: %w", err)
}
isRunning := b.IsRunning()
sessionAlive := b.IsSessionAlive()
if bootStatusJSON {
output := map[string]interface{}{
"running": isRunning,
"session_alive": sessionAlive,
"degraded": b.IsDegraded(),
"boot_dir": b.Dir(),
"last_status": status,
}
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
return enc.Encode(output)
}
// Pretty print
fmt.Println(style.Bold.Render("Boot Status"))
fmt.Println()
if isRunning {
fmt.Printf(" State: %s\n", style.Bold.Render("running"))
} else {
fmt.Printf(" State: %s\n", style.Dim.Render("idle"))
}
if sessionAlive {
fmt.Printf(" Session: %s (alive)\n", boot.SessionName)
} else {
fmt.Printf(" Session: %s\n", style.Dim.Render("not running"))
}
if b.IsDegraded() {
fmt.Printf(" Mode: %s\n", style.Bold.Render("DEGRADED"))
} else {
fmt.Printf(" Mode: normal\n")
}
fmt.Println()
fmt.Println(style.Dim.Render("Last Execution:"))
if status.StartedAt.IsZero() {
fmt.Printf(" %s\n", style.Dim.Render("(no executions recorded)"))
} else {
if !status.CompletedAt.IsZero() {
duration := status.CompletedAt.Sub(status.StartedAt)
fmt.Printf(" Completed: %s (%s ago)\n",
status.CompletedAt.Format("15:04:05"),
formatDurationAgo(time.Since(status.CompletedAt)))
fmt.Printf(" Duration: %s\n", duration.Round(time.Millisecond))
} else {
fmt.Printf(" Started: %s\n", status.StartedAt.Format("15:04:05"))
}
if status.LastAction != "" {
fmt.Printf(" Action: %s", status.LastAction)
if status.Target != "" {
fmt.Printf(" → %s", status.Target)
}
fmt.Println()
}
if status.Error != "" {
fmt.Printf(" Error: %s\n", style.Bold.Render(status.Error))
}
}
fmt.Println()
fmt.Printf(" Dir: %s\n", b.Dir())
return nil
}
func runBootSpawn(cmd *cobra.Command, args []string) error {
b, err := getBootManager()
if err != nil {
return err
}
if b.IsRunning() {
fmt.Println("Boot is already running - skipping spawn")
return nil
}
// Save starting status
status := &boot.Status{
Running: true,
StartedAt: time.Now(),
}
if err := b.SaveStatus(status); err != nil {
return fmt.Errorf("saving status: %w", err)
}
// Spawn Boot
if err := b.Spawn(); err != nil {
status.Error = err.Error()
status.CompletedAt = time.Now()
status.Running = false
_ = b.SaveStatus(status)
return fmt.Errorf("spawning boot: %w", err)
}
if b.IsDegraded() {
fmt.Println("Boot spawned in degraded mode (subprocess)")
} else {
fmt.Printf("Boot spawned in session: %s\n", boot.SessionName)
}
return nil
}
func runBootTriage(cmd *cobra.Command, args []string) error {
b, err := getBootManager()
if err != nil {
return err
}
// Acquire lock
if err := b.AcquireLock(); err != nil {
return fmt.Errorf("acquiring lock: %w", err)
}
defer func() { _ = b.ReleaseLock() }()
startTime := time.Now()
status := &boot.Status{
Running: true,
StartedAt: startTime,
}
// In degraded mode, we do basic mechanical triage
// without full Claude reasoning capability
action, target, triageErr := runDegradedTriage(b)
status.LastAction = action
status.Target = target
status.Running = false
status.CompletedAt = time.Now()
if triageErr != nil {
status.Error = triageErr.Error()
}
if err := b.SaveStatus(status); err != nil {
return fmt.Errorf("saving status: %w", err)
}
if triageErr != nil {
return triageErr
}
fmt.Printf("Triage complete: %s", action)
if target != "" {
fmt.Printf(" → %s", target)
}
fmt.Println()
return nil
}
// runDegradedTriage performs basic Deacon health check without AI reasoning.
// This is a mechanical fallback when full Claude sessions aren't available.
func runDegradedTriage(b *boot.Boot) (action, target string, err error) {
tm := b.Tmux()
// Check if Deacon session exists
deaconSession := "gt-deacon"
hasDeacon, err := tm.HasSession(deaconSession)
if err != nil {
return "error", "deacon", fmt.Errorf("checking deacon session: %w", err)
}
if !hasDeacon {
// Deacon not running - this is unusual, daemon should have restarted it
// In degraded mode, we just report - let daemon handle restart
return "report", "deacon-missing", nil
}
// Deacon exists - check heartbeat to detect stuck sessions
// A session can exist but be stuck (not making progress)
townRoot, _ := workspace.FindFromCwd()
if townRoot != "" {
hb := deacon.ReadHeartbeat(townRoot)
if hb.ShouldPoke() {
// Heartbeat is stale (>15 min) - Deacon is stuck
// Nudge the session to try to wake it up
age := hb.Age()
if age > 30*time.Minute {
// Very stuck - restart the session
fmt.Printf("Deacon heartbeat is %s old - restarting session\n", age.Round(time.Minute))
if err := tm.KillSession(deaconSession); err == nil {
return "restart", "deacon-stuck", nil
}
} else {
// Stuck but not critically - try nudging first
fmt.Printf("Deacon heartbeat is %s old - nudging session\n", age.Round(time.Minute))
_ = tm.NudgeSession(deaconSession, "HEALTH_CHECK: heartbeat is stale, respond to confirm responsiveness")
return "nudge", "deacon-stale", nil
}
}
}
return "nothing", "", nil
}
// formatDurationAgo formats a duration for human display.
func formatDurationAgo(d time.Duration) string {
switch {
case d < time.Minute:
return "just now"
case d < time.Hour:
mins := int(d.Minutes())
if mins == 1 {
return "1 min"
}
return fmt.Sprintf("%d min", mins)
case d < 24*time.Hour:
hours := int(d.Hours())
if hours == 1 {
return "1 hour"
}
return fmt.Sprintf("%d hours", hours)
default:
days := int(d.Hours() / 24)
if days == 1 {
return "1 day"
}
return fmt.Sprintf("%d days", days)
}
}