The tmux session name "gt-deacon-boot" was causing HasSession("gt-deacon")
to return true due to tmux prefix matching behavior. This made the daemon
think the Deacon was running when only Boot was active, and caused commands
targeting "gt-deacon" to be sent to Boot session instead.
The fix renames Boot session from "gt-deacon-boot" to "gt-boot", which
has no prefix overlap with "gt-deacon".
Fixes: gt-sgzsb
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
344 lines
8.5 KiB
Go
344 lines
8.5 KiB
Go
package cmd
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"time"
|
|
|
|
"github.com/spf13/cobra"
|
|
"github.com/steveyegge/gastown/internal/boot"
|
|
"github.com/steveyegge/gastown/internal/deacon"
|
|
"github.com/steveyegge/gastown/internal/style"
|
|
"github.com/steveyegge/gastown/internal/workspace"
|
|
)
|
|
|
|
var (
|
|
bootStatusJSON bool
|
|
bootDegraded bool
|
|
)
|
|
|
|
var bootCmd = &cobra.Command{
|
|
Use: "boot",
|
|
GroupID: GroupAgents,
|
|
Short: "Manage Boot (Deacon watchdog)",
|
|
Long: `Manage Boot - the daemon's watchdog for Deacon triage.
|
|
|
|
Boot is a special dog that runs fresh on each daemon tick. It observes
|
|
the system state and decides whether to start/wake/nudge/interrupt the
|
|
Deacon, or do nothing. This centralizes the "when to wake" decision in
|
|
an agent that can reason about it.
|
|
|
|
Boot lifecycle:
|
|
1. Daemon tick spawns Boot (fresh each time)
|
|
2. Boot runs triage: observe, decide, act
|
|
3. Boot cleans inbox (discards stale handoffs)
|
|
4. Boot exits (or handoffs in non-degraded mode)
|
|
|
|
Location: ~/gt/deacon/dogs/boot/
|
|
Session: gt-boot`,
|
|
}
|
|
|
|
var bootStatusCmd = &cobra.Command{
|
|
Use: "status",
|
|
Short: "Show Boot status",
|
|
Long: `Show Boot's current status and last execution.
|
|
|
|
Displays:
|
|
- Whether Boot is currently running
|
|
- Last action taken (start/wake/nudge/nothing)
|
|
- Timing information
|
|
- Degraded mode status`,
|
|
RunE: runBootStatus,
|
|
}
|
|
|
|
var bootSpawnCmd = &cobra.Command{
|
|
Use: "spawn",
|
|
Short: "Spawn Boot for triage",
|
|
Long: `Spawn Boot to run the triage cycle.
|
|
|
|
This is normally called by the daemon. It spawns Boot in a fresh
|
|
tmux session (or subprocess in degraded mode) to observe and decide
|
|
what action to take on the Deacon.
|
|
|
|
Boot runs to completion and exits - it doesn't maintain state
|
|
between invocations.`,
|
|
RunE: runBootSpawn,
|
|
}
|
|
|
|
var bootTriageCmd = &cobra.Command{
|
|
Use: "triage",
|
|
Short: "Run triage directly (degraded mode)",
|
|
Long: `Run Boot's triage logic directly without Claude.
|
|
|
|
This is for degraded mode operation when tmux is unavailable.
|
|
It performs basic observation and takes conservative action:
|
|
- If Deacon is not running: start it
|
|
- If Deacon appears stuck: attempt restart
|
|
- Otherwise: do nothing
|
|
|
|
Use --degraded flag when running in degraded mode.`,
|
|
RunE: runBootTriage,
|
|
}
|
|
|
|
func init() {
|
|
bootStatusCmd.Flags().BoolVar(&bootStatusJSON, "json", false, "Output as JSON")
|
|
bootTriageCmd.Flags().BoolVar(&bootDegraded, "degraded", false, "Run in degraded mode (no tmux)")
|
|
|
|
bootCmd.AddCommand(bootStatusCmd)
|
|
bootCmd.AddCommand(bootSpawnCmd)
|
|
bootCmd.AddCommand(bootTriageCmd)
|
|
|
|
rootCmd.AddCommand(bootCmd)
|
|
}
|
|
|
|
func getBootManager() (*boot.Boot, error) {
|
|
townRoot, err := workspace.FindFromCwd()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("finding town root: %w", err)
|
|
}
|
|
|
|
return boot.New(townRoot), nil
|
|
}
|
|
|
|
func runBootStatus(cmd *cobra.Command, args []string) error {
|
|
b, err := getBootManager()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
status, err := b.LoadStatus()
|
|
if err != nil {
|
|
return fmt.Errorf("loading status: %w", err)
|
|
}
|
|
|
|
isRunning := b.IsRunning()
|
|
sessionAlive := b.IsSessionAlive()
|
|
|
|
if bootStatusJSON {
|
|
output := map[string]interface{}{
|
|
"running": isRunning,
|
|
"session_alive": sessionAlive,
|
|
"degraded": b.IsDegraded(),
|
|
"boot_dir": b.Dir(),
|
|
"last_status": status,
|
|
}
|
|
enc := json.NewEncoder(os.Stdout)
|
|
enc.SetIndent("", " ")
|
|
return enc.Encode(output)
|
|
}
|
|
|
|
// Pretty print
|
|
fmt.Println(style.Bold.Render("Boot Status"))
|
|
fmt.Println()
|
|
|
|
if isRunning {
|
|
fmt.Printf(" State: %s\n", style.Bold.Render("running"))
|
|
} else {
|
|
fmt.Printf(" State: %s\n", style.Dim.Render("idle"))
|
|
}
|
|
|
|
if sessionAlive {
|
|
fmt.Printf(" Session: %s (alive)\n", boot.SessionName)
|
|
} else {
|
|
fmt.Printf(" Session: %s\n", style.Dim.Render("not running"))
|
|
}
|
|
|
|
if b.IsDegraded() {
|
|
fmt.Printf(" Mode: %s\n", style.Bold.Render("DEGRADED"))
|
|
} else {
|
|
fmt.Printf(" Mode: normal\n")
|
|
}
|
|
|
|
fmt.Println()
|
|
fmt.Println(style.Dim.Render("Last Execution:"))
|
|
|
|
if status.StartedAt.IsZero() {
|
|
fmt.Printf(" %s\n", style.Dim.Render("(no executions recorded)"))
|
|
} else {
|
|
if !status.CompletedAt.IsZero() {
|
|
duration := status.CompletedAt.Sub(status.StartedAt)
|
|
fmt.Printf(" Completed: %s (%s ago)\n",
|
|
status.CompletedAt.Format("15:04:05"),
|
|
formatDurationAgo(time.Since(status.CompletedAt)))
|
|
fmt.Printf(" Duration: %s\n", duration.Round(time.Millisecond))
|
|
} else {
|
|
fmt.Printf(" Started: %s\n", status.StartedAt.Format("15:04:05"))
|
|
}
|
|
|
|
if status.LastAction != "" {
|
|
fmt.Printf(" Action: %s", status.LastAction)
|
|
if status.Target != "" {
|
|
fmt.Printf(" → %s", status.Target)
|
|
}
|
|
fmt.Println()
|
|
}
|
|
|
|
if status.Error != "" {
|
|
fmt.Printf(" Error: %s\n", style.Bold.Render(status.Error))
|
|
}
|
|
}
|
|
|
|
fmt.Println()
|
|
fmt.Printf(" Dir: %s\n", b.Dir())
|
|
|
|
return nil
|
|
}
|
|
|
|
func runBootSpawn(cmd *cobra.Command, args []string) error {
|
|
b, err := getBootManager()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if b.IsRunning() {
|
|
fmt.Println("Boot is already running - skipping spawn")
|
|
return nil
|
|
}
|
|
|
|
// Save starting status
|
|
status := &boot.Status{
|
|
Running: true,
|
|
StartedAt: time.Now(),
|
|
}
|
|
if err := b.SaveStatus(status); err != nil {
|
|
return fmt.Errorf("saving status: %w", err)
|
|
}
|
|
|
|
// Spawn Boot
|
|
if err := b.Spawn(); err != nil {
|
|
status.Error = err.Error()
|
|
status.CompletedAt = time.Now()
|
|
status.Running = false
|
|
_ = b.SaveStatus(status)
|
|
return fmt.Errorf("spawning boot: %w", err)
|
|
}
|
|
|
|
if b.IsDegraded() {
|
|
fmt.Println("Boot spawned in degraded mode (subprocess)")
|
|
} else {
|
|
fmt.Printf("Boot spawned in session: %s\n", boot.SessionName)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func runBootTriage(cmd *cobra.Command, args []string) error {
|
|
b, err := getBootManager()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Acquire lock
|
|
if err := b.AcquireLock(); err != nil {
|
|
return fmt.Errorf("acquiring lock: %w", err)
|
|
}
|
|
defer func() { _ = b.ReleaseLock() }()
|
|
|
|
startTime := time.Now()
|
|
status := &boot.Status{
|
|
Running: true,
|
|
StartedAt: startTime,
|
|
}
|
|
|
|
// In degraded mode, we do basic mechanical triage
|
|
// without full Claude reasoning capability
|
|
action, target, triageErr := runDegradedTriage(b)
|
|
|
|
status.LastAction = action
|
|
status.Target = target
|
|
status.Running = false
|
|
status.CompletedAt = time.Now()
|
|
|
|
if triageErr != nil {
|
|
status.Error = triageErr.Error()
|
|
}
|
|
|
|
if err := b.SaveStatus(status); err != nil {
|
|
return fmt.Errorf("saving status: %w", err)
|
|
}
|
|
|
|
if triageErr != nil {
|
|
return triageErr
|
|
}
|
|
|
|
fmt.Printf("Triage complete: %s", action)
|
|
if target != "" {
|
|
fmt.Printf(" → %s", target)
|
|
}
|
|
fmt.Println()
|
|
|
|
return nil
|
|
}
|
|
|
|
// runDegradedTriage performs basic Deacon health check without AI reasoning.
|
|
// This is a mechanical fallback when full Claude sessions aren't available.
|
|
func runDegradedTriage(b *boot.Boot) (action, target string, err error) {
|
|
tm := b.Tmux()
|
|
|
|
// Check if Deacon session exists
|
|
deaconSession := "gt-deacon"
|
|
hasDeacon, err := tm.HasSession(deaconSession)
|
|
if err != nil {
|
|
return "error", "deacon", fmt.Errorf("checking deacon session: %w", err)
|
|
}
|
|
|
|
if !hasDeacon {
|
|
// Deacon not running - this is unusual, daemon should have restarted it
|
|
// In degraded mode, we just report - let daemon handle restart
|
|
return "report", "deacon-missing", nil
|
|
}
|
|
|
|
// Deacon exists - check heartbeat to detect stuck sessions
|
|
// A session can exist but be stuck (not making progress)
|
|
townRoot, _ := workspace.FindFromCwd()
|
|
if townRoot != "" {
|
|
hb := deacon.ReadHeartbeat(townRoot)
|
|
if hb.ShouldPoke() {
|
|
// Heartbeat is stale (>15 min) - Deacon is stuck
|
|
// Nudge the session to try to wake it up
|
|
age := hb.Age()
|
|
if age > 30*time.Minute {
|
|
// Very stuck - restart the session
|
|
fmt.Printf("Deacon heartbeat is %s old - restarting session\n", age.Round(time.Minute))
|
|
if err := tm.KillSession(deaconSession); err == nil {
|
|
return "restart", "deacon-stuck", nil
|
|
}
|
|
} else {
|
|
// Stuck but not critically - try nudging first
|
|
fmt.Printf("Deacon heartbeat is %s old - nudging session\n", age.Round(time.Minute))
|
|
_ = tm.NudgeSession(deaconSession, "HEALTH_CHECK: heartbeat is stale, respond to confirm responsiveness")
|
|
return "nudge", "deacon-stale", nil
|
|
}
|
|
}
|
|
}
|
|
|
|
return "nothing", "", nil
|
|
}
|
|
|
|
// formatDurationAgo formats a duration for human display.
|
|
func formatDurationAgo(d time.Duration) string {
|
|
switch {
|
|
case d < time.Minute:
|
|
return "just now"
|
|
case d < time.Hour:
|
|
mins := int(d.Minutes())
|
|
if mins == 1 {
|
|
return "1 min"
|
|
}
|
|
return fmt.Sprintf("%d min", mins)
|
|
case d < 24*time.Hour:
|
|
hours := int(d.Hours())
|
|
if hours == 1 {
|
|
return "1 hour"
|
|
}
|
|
return fmt.Sprintf("%d hours", hours)
|
|
default:
|
|
days := int(d.Hours() / 24)
|
|
if days == 1 {
|
|
return "1 day"
|
|
}
|
|
return fmt.Sprintf("%d days", days)
|
|
}
|
|
}
|