Files
gastown/internal/cmd/boot.go
aleiby 31bd120077 fix(startup): unify agent startup with beacon + instructions in CLI prompt (#977)
All agents now receive their startup beacon + role-specific instructions via
the CLI prompt, making sessions identifiable in /resume picker while removing
unreliable post-startup nudges.

Changes:
- Rename FormatStartupNudge → FormatStartupBeacon, StartupNudgeConfig → BeaconConfig
- Remove StartupNudge() function (no longer needed)
- Remove PropulsionNudge() and PropulsionNudgeForRole() functions
- Update deacon, witness, refinery, polecat managers to include beacon in CLI prompt
- Update boot to inline beacon (can't import session due to import cycle)
- Update daemon/lifecycle.go to include beacon via BuildCommandWithPrompt
- Update cmd/deacon.go to include beacon in startup command
- Remove redundant StartupNudge and PropulsionNudge calls from all startup paths

The beacon is now part of the CLI prompt which is queued before Claude starts,
making it more reliable than post-startup nudges which had timing issues.
SessionStart hook runs gt prime automatically, so PropulsionNudge was redundant.

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-25 18:08:57 -08:00

348 lines
8.8 KiB
Go

package cmd
import (
"encoding/json"
"fmt"
"os"
"time"
"github.com/spf13/cobra"
"github.com/steveyegge/gastown/internal/boot"
"github.com/steveyegge/gastown/internal/deacon"
"github.com/steveyegge/gastown/internal/session"
"github.com/steveyegge/gastown/internal/style"
"github.com/steveyegge/gastown/internal/workspace"
)
var (
bootStatusJSON bool
bootDegraded bool
bootAgentOverride string
)
var bootCmd = &cobra.Command{
Use: "boot",
GroupID: GroupAgents,
Short: "Manage Boot (Deacon watchdog)",
Long: `Manage Boot - the daemon's watchdog for Deacon triage.
Boot is a special dog that runs fresh on each daemon tick. It observes
the system state and decides whether to start/wake/nudge/interrupt the
Deacon, or do nothing. This centralizes the "when to wake" decision in
an agent that can reason about it.
Boot lifecycle:
1. Daemon tick spawns Boot (fresh each time)
2. Boot runs triage: observe, decide, act
3. Boot cleans inbox (discards stale handoffs)
4. Boot exits (or handoffs in non-degraded mode)
Location: ~/gt/deacon/dogs/boot/
Session: gt-boot`,
}
var bootStatusCmd = &cobra.Command{
Use: "status",
Short: "Show Boot status",
Long: `Show Boot's current status and last execution.
Displays:
- Whether Boot is currently running
- Last action taken (start/wake/nudge/nothing)
- Timing information
- Degraded mode status`,
RunE: runBootStatus,
}
var bootSpawnCmd = &cobra.Command{
Use: "spawn",
Short: "Spawn Boot for triage",
Long: `Spawn Boot to run the triage cycle.
This is normally called by the daemon. It spawns Boot in a fresh
tmux session (or subprocess in degraded mode) to observe and decide
what action to take on the Deacon.
Boot runs to completion and exits - it doesn't maintain state
between invocations.`,
RunE: runBootSpawn,
}
var bootTriageCmd = &cobra.Command{
Use: "triage",
Short: "Run triage directly (degraded mode)",
Long: `Run Boot's triage logic directly without Claude.
This is for degraded mode operation when tmux is unavailable.
It performs basic observation and takes conservative action:
- If Deacon is not running: start it
- If Deacon appears stuck: attempt restart
- Otherwise: do nothing
Use --degraded flag when running in degraded mode.`,
RunE: runBootTriage,
}
func init() {
bootStatusCmd.Flags().BoolVar(&bootStatusJSON, "json", false, "Output as JSON")
bootTriageCmd.Flags().BoolVar(&bootDegraded, "degraded", false, "Run in degraded mode (no tmux)")
bootSpawnCmd.Flags().StringVar(&bootAgentOverride, "agent", "", "Agent alias to run Boot with (overrides town default)")
bootCmd.AddCommand(bootStatusCmd)
bootCmd.AddCommand(bootSpawnCmd)
bootCmd.AddCommand(bootTriageCmd)
rootCmd.AddCommand(bootCmd)
}
func getBootManager() (*boot.Boot, error) {
townRoot, err := workspace.FindFromCwd()
if err != nil {
return nil, fmt.Errorf("finding town root: %w", err)
}
return boot.New(townRoot), nil
}
func runBootStatus(cmd *cobra.Command, args []string) error {
b, err := getBootManager()
if err != nil {
return err
}
status, err := b.LoadStatus()
if err != nil {
return fmt.Errorf("loading status: %w", err)
}
isRunning := b.IsRunning()
sessionAlive := b.IsSessionAlive()
if bootStatusJSON {
output := map[string]interface{}{
"running": isRunning,
"session_alive": sessionAlive,
"degraded": b.IsDegraded(),
"boot_dir": b.Dir(),
"last_status": status,
}
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
return enc.Encode(output)
}
// Pretty print
fmt.Println(style.Bold.Render("Boot Status"))
fmt.Println()
if isRunning {
fmt.Printf(" State: %s\n", style.Bold.Render("running"))
} else {
fmt.Printf(" State: %s\n", style.Dim.Render("idle"))
}
if sessionAlive {
fmt.Printf(" Session: %s (alive)\n", session.BootSessionName())
} else {
fmt.Printf(" Session: %s\n", style.Dim.Render("not running"))
}
if b.IsDegraded() {
fmt.Printf(" Mode: %s\n", style.Bold.Render("DEGRADED"))
} else {
fmt.Printf(" Mode: normal\n")
}
fmt.Println()
fmt.Println(style.Dim.Render("Last Execution:"))
if status.StartedAt.IsZero() {
fmt.Printf(" %s\n", style.Dim.Render("(no executions recorded)"))
} else {
if !status.CompletedAt.IsZero() {
duration := status.CompletedAt.Sub(status.StartedAt)
fmt.Printf(" Completed: %s (%s ago)\n",
status.CompletedAt.Format("15:04:05"),
formatDurationAgo(time.Since(status.CompletedAt)))
fmt.Printf(" Duration: %s\n", duration.Round(time.Millisecond))
} else {
fmt.Printf(" Started: %s\n", status.StartedAt.Format("15:04:05"))
}
if status.LastAction != "" {
fmt.Printf(" Action: %s", status.LastAction)
if status.Target != "" {
fmt.Printf(" → %s", status.Target)
}
fmt.Println()
}
if status.Error != "" {
fmt.Printf(" Error: %s\n", style.Bold.Render(status.Error))
}
}
fmt.Println()
fmt.Printf(" Dir: %s\n", b.Dir())
return nil
}
func runBootSpawn(cmd *cobra.Command, args []string) error {
b, err := getBootManager()
if err != nil {
return err
}
if b.IsRunning() {
fmt.Println("Boot is already running - skipping spawn")
return nil
}
// Save starting status
status := &boot.Status{
Running: true,
StartedAt: time.Now(),
}
if err := b.SaveStatus(status); err != nil {
return fmt.Errorf("saving status: %w", err)
}
// Spawn Boot
if err := b.Spawn(bootAgentOverride); err != nil {
status.Error = err.Error()
status.CompletedAt = time.Now()
status.Running = false
_ = b.SaveStatus(status)
return fmt.Errorf("spawning boot: %w", err)
}
if b.IsDegraded() {
fmt.Println("Boot spawned in degraded mode (subprocess)")
} else {
fmt.Printf("Boot spawned in session: %s\n", session.BootSessionName())
}
return nil
}
func runBootTriage(cmd *cobra.Command, args []string) error {
b, err := getBootManager()
if err != nil {
return err
}
// Acquire lock
if err := b.AcquireLock(); err != nil {
return fmt.Errorf("acquiring lock: %w", err)
}
defer func() { _ = b.ReleaseLock() }()
startTime := time.Now()
status := &boot.Status{
Running: true,
StartedAt: startTime,
}
// In degraded mode, we do basic mechanical triage
// without full Claude reasoning capability
action, target, triageErr := runDegradedTriage(b)
status.LastAction = action
status.Target = target
status.Running = false
status.CompletedAt = time.Now()
if triageErr != nil {
status.Error = triageErr.Error()
}
if err := b.SaveStatus(status); err != nil {
return fmt.Errorf("saving status: %w", err)
}
if triageErr != nil {
return triageErr
}
fmt.Printf("Triage complete: %s", action)
if target != "" {
fmt.Printf(" → %s", target)
}
fmt.Println()
return nil
}
// runDegradedTriage performs basic Deacon health check without AI reasoning.
// This is a mechanical fallback when full Claude sessions aren't available.
func runDegradedTriage(b *boot.Boot) (action, target string, err error) {
tm := b.Tmux()
// Check if Deacon session exists
deaconSession := getDeaconSessionName()
hasDeacon, err := tm.HasSession(deaconSession)
if err != nil {
return "error", "deacon", fmt.Errorf("checking deacon session: %w", err)
}
if !hasDeacon {
// Deacon not running - this is unusual, daemon should have restarted it
// In degraded mode, we just report - let daemon handle restart
return "report", "deacon-missing", nil
}
// Deacon exists - check heartbeat to detect stuck sessions
// A session can exist but be stuck (not making progress)
townRoot, _ := workspace.FindFromCwd()
if townRoot != "" {
hb := deacon.ReadHeartbeat(townRoot)
if hb.ShouldPoke() {
// Heartbeat is stale (>15 min) - Deacon is stuck
// Nudge the session to try to wake it up
age := hb.Age()
if age > 30*time.Minute {
// Very stuck - restart the session.
// Use KillSessionWithProcesses to ensure all descendant processes are killed.
fmt.Printf("Deacon heartbeat is %s old - restarting session\n", age.Round(time.Minute))
if err := tm.KillSessionWithProcesses(deaconSession); err == nil {
return "restart", "deacon-stuck", nil
}
} else {
// Stuck but not critically - try nudging first
fmt.Printf("Deacon heartbeat is %s old - nudging session\n", age.Round(time.Minute))
_ = tm.NudgeSession(deaconSession, "HEALTH_CHECK: heartbeat is stale, respond to confirm responsiveness")
return "nudge", "deacon-stale", nil
}
}
}
return "nothing", "", nil
}
// formatDurationAgo formats a duration for human display.
func formatDurationAgo(d time.Duration) string {
switch {
case d < time.Minute:
return "just now"
case d < time.Hour:
mins := int(d.Minutes())
if mins == 1 {
return "1 min"
}
return fmt.Sprintf("%d min", mins)
case d < 24*time.Hour:
hours := int(d.Hours())
if hours == 1 {
return "1 hour"
}
return fmt.Sprintf("%d hours", hours)
default:
days := int(d.Hours() / 24)
if days == 1 {
return "1 day"
}
return fmt.Sprintf("%d days", days)
}
}