Implement Boot: daemon entry point dog for Deacon triage (gt-rwd5j)
Boot is a watchdog that the daemon pokes instead of Deacon directly, centralizing the 'when to wake Deacon' decision in an agent that can reason about context. Key changes: - Add internal/boot package with marker file and status tracking - Add gt boot commands: status, spawn, triage - Add mol-boot-triage formula for Boot's triage cycle - Modify daemon to call ensureBootRunning instead of ensureDeaconRunning - Add tmux.IsAvailable() for degraded mode detection - Add boot.md.tmpl role template Boot lifecycle: 1. Daemon tick spawns Boot (fresh each time) 2. Boot runs triage: observe, decide, act 3. Boot cleans stale handoffs from Deacon inbox 4. Boot exits (or handoffs in non-degraded mode) In degraded mode (no tmux), Boot runs mechanical triage directly. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
231
internal/boot/boot.go
Normal file
231
internal/boot/boot.go
Normal file
@@ -0,0 +1,231 @@
|
||||
// Package boot manages the Boot watchdog - the daemon's entry point for Deacon triage.
|
||||
// Boot is a dog that runs fresh on each daemon tick, deciding whether to wake/nudge/interrupt
|
||||
// the Deacon or let it continue. This centralizes the "when to wake" decision in an agent.
|
||||
package boot
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/steveyegge/gastown/internal/tmux"
|
||||
)
|
||||
|
||||
// SessionName is the tmux session name for Boot.
|
||||
const SessionName = "gt-deacon-boot"
|
||||
|
||||
// MarkerFileName is the file that indicates Boot is currently running.
|
||||
const MarkerFileName = ".boot-running"
|
||||
|
||||
// StatusFileName stores Boot's last execution status.
|
||||
const StatusFileName = ".boot-status.json"
|
||||
|
||||
// DefaultMarkerTTL is how long a marker is considered valid before it's stale.
|
||||
const DefaultMarkerTTL = 5 * time.Minute
|
||||
|
||||
// Status represents Boot's execution status.
|
||||
type Status struct {
|
||||
Running bool `json:"running"`
|
||||
StartedAt time.Time `json:"started_at,omitempty"`
|
||||
CompletedAt time.Time `json:"completed_at,omitempty"`
|
||||
LastAction string `json:"last_action,omitempty"` // start/wake/nudge/nothing
|
||||
Target string `json:"target,omitempty"` // deacon, witness, etc.
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// Boot manages the Boot watchdog lifecycle.
|
||||
type Boot struct {
|
||||
townRoot string
|
||||
bootDir string // ~/gt/deacon/dogs/boot/
|
||||
deaconDir string // ~/gt/deacon/
|
||||
tmux *tmux.Tmux
|
||||
degraded bool
|
||||
}
|
||||
|
||||
// New creates a new Boot manager.
|
||||
func New(townRoot string) *Boot {
|
||||
return &Boot{
|
||||
townRoot: townRoot,
|
||||
bootDir: filepath.Join(townRoot, "deacon", "dogs", "boot"),
|
||||
deaconDir: filepath.Join(townRoot, "deacon"),
|
||||
tmux: tmux.NewTmux(),
|
||||
degraded: os.Getenv("GT_DEGRADED") == "true",
|
||||
}
|
||||
}
|
||||
|
||||
// EnsureDir ensures the Boot directory exists.
|
||||
func (b *Boot) EnsureDir() error {
|
||||
return os.MkdirAll(b.bootDir, 0755)
|
||||
}
|
||||
|
||||
// markerPath returns the path to the marker file.
|
||||
func (b *Boot) markerPath() string {
|
||||
return filepath.Join(b.bootDir, MarkerFileName)
|
||||
}
|
||||
|
||||
// statusPath returns the path to the status file.
|
||||
func (b *Boot) statusPath() string {
|
||||
return filepath.Join(b.bootDir, StatusFileName)
|
||||
}
|
||||
|
||||
// IsRunning checks if Boot is currently running.
|
||||
// Returns true if marker exists and isn't stale, false otherwise.
|
||||
func (b *Boot) IsRunning() bool {
|
||||
info, err := os.Stat(b.markerPath())
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if marker is stale (older than TTL)
|
||||
age := time.Since(info.ModTime())
|
||||
if age > DefaultMarkerTTL {
|
||||
// Stale marker - clean it up
|
||||
_ = os.Remove(b.markerPath())
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// IsSessionAlive checks if the Boot tmux session exists.
|
||||
func (b *Boot) IsSessionAlive() bool {
|
||||
has, err := b.tmux.HasSession(SessionName)
|
||||
return err == nil && has
|
||||
}
|
||||
|
||||
// AcquireLock creates the marker file to indicate Boot is starting.
|
||||
// Returns error if Boot is already running.
|
||||
func (b *Boot) AcquireLock() error {
|
||||
if b.IsRunning() {
|
||||
return fmt.Errorf("boot is already running (marker exists)")
|
||||
}
|
||||
|
||||
if err := b.EnsureDir(); err != nil {
|
||||
return fmt.Errorf("ensuring boot dir: %w", err)
|
||||
}
|
||||
|
||||
// Create marker file
|
||||
f, err := os.Create(b.markerPath())
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating marker: %w", err)
|
||||
}
|
||||
return f.Close()
|
||||
}
|
||||
|
||||
// ReleaseLock removes the marker file.
|
||||
func (b *Boot) ReleaseLock() error {
|
||||
return os.Remove(b.markerPath())
|
||||
}
|
||||
|
||||
// SaveStatus saves Boot's execution status.
|
||||
func (b *Boot) SaveStatus(status *Status) error {
|
||||
if err := b.EnsureDir(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
data, err := json.MarshalIndent(status, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return os.WriteFile(b.statusPath(), data, 0644)
|
||||
}
|
||||
|
||||
// LoadStatus loads Boot's last execution status.
|
||||
func (b *Boot) LoadStatus() (*Status, error) {
|
||||
data, err := os.ReadFile(b.statusPath())
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return &Status{}, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var status Status
|
||||
if err := json.Unmarshal(data, &status); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &status, nil
|
||||
}
|
||||
|
||||
// Spawn starts Boot in a fresh tmux session.
|
||||
// Boot runs the mol-boot-triage molecule and exits when done.
|
||||
// In degraded mode (no tmux), it runs in a subprocess.
|
||||
func (b *Boot) Spawn() error {
|
||||
if b.IsRunning() {
|
||||
return fmt.Errorf("boot is already running")
|
||||
}
|
||||
|
||||
// Check for degraded mode
|
||||
if b.degraded {
|
||||
return b.spawnDegraded()
|
||||
}
|
||||
|
||||
return b.spawnTmux()
|
||||
}
|
||||
|
||||
// spawnTmux spawns Boot in a tmux session.
|
||||
func (b *Boot) spawnTmux() error {
|
||||
// Kill any stale session first
|
||||
if b.IsSessionAlive() {
|
||||
_ = b.tmux.KillSession(SessionName)
|
||||
}
|
||||
|
||||
// Create new session in deacon directory
|
||||
if err := b.tmux.NewSession(SessionName, b.deaconDir); err != nil {
|
||||
return fmt.Errorf("creating boot session: %w", err)
|
||||
}
|
||||
|
||||
// Set environment
|
||||
_ = b.tmux.SetEnvironment(SessionName, "GT_ROLE", "boot")
|
||||
_ = b.tmux.SetEnvironment(SessionName, "BD_ACTOR", "deacon-boot")
|
||||
|
||||
// Launch Claude with environment exported inline
|
||||
startCmd := "export GT_ROLE=boot BD_ACTOR=deacon-boot && claude --dangerously-skip-permissions"
|
||||
if err := b.tmux.SendKeys(SessionName, startCmd); err != nil {
|
||||
return fmt.Errorf("sending startup command: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// spawnDegraded spawns Boot in degraded mode (no tmux).
|
||||
// Boot runs to completion and exits without handoff.
|
||||
func (b *Boot) spawnDegraded() error {
|
||||
// In degraded mode, we run gt boot triage directly
|
||||
// This performs the triage logic without a full Claude session
|
||||
cmd := exec.Command("gt", "boot", "triage", "--degraded")
|
||||
cmd.Dir = b.deaconDir
|
||||
cmd.Env = append(os.Environ(),
|
||||
"GT_ROLE=boot",
|
||||
"BD_ACTOR=deacon-boot",
|
||||
"GT_DEGRADED=true",
|
||||
)
|
||||
|
||||
// Run async - don't wait for completion
|
||||
return cmd.Start()
|
||||
}
|
||||
|
||||
// IsDegraded returns whether Boot is in degraded mode.
|
||||
func (b *Boot) IsDegraded() bool {
|
||||
return b.degraded
|
||||
}
|
||||
|
||||
// Dir returns Boot's working directory.
|
||||
func (b *Boot) Dir() string {
|
||||
return b.bootDir
|
||||
}
|
||||
|
||||
// DeaconDir returns the Deacon's directory.
|
||||
func (b *Boot) DeaconDir() string {
|
||||
return b.deaconDir
|
||||
}
|
||||
|
||||
// Tmux returns the tmux manager.
|
||||
func (b *Boot) Tmux() *tmux.Tmux {
|
||||
return b.tmux
|
||||
}
|
||||
321
internal/cmd/boot.go
Normal file
321
internal/cmd/boot.go
Normal file
@@ -0,0 +1,321 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/steveyegge/gastown/internal/boot"
|
||||
"github.com/steveyegge/gastown/internal/style"
|
||||
"github.com/steveyegge/gastown/internal/workspace"
|
||||
)
|
||||
|
||||
var (
|
||||
bootStatusJSON bool
|
||||
bootDegraded bool
|
||||
)
|
||||
|
||||
var bootCmd = &cobra.Command{
|
||||
Use: "boot",
|
||||
GroupID: GroupAgents,
|
||||
Short: "Manage Boot (Deacon watchdog)",
|
||||
Long: `Manage Boot - the daemon's watchdog for Deacon triage.
|
||||
|
||||
Boot is a special dog that runs fresh on each daemon tick. It observes
|
||||
the system state and decides whether to start/wake/nudge/interrupt the
|
||||
Deacon, or do nothing. This centralizes the "when to wake" decision in
|
||||
an agent that can reason about it.
|
||||
|
||||
Boot lifecycle:
|
||||
1. Daemon tick spawns Boot (fresh each time)
|
||||
2. Boot runs triage: observe, decide, act
|
||||
3. Boot cleans inbox (discards stale handoffs)
|
||||
4. Boot exits (or handoffs in non-degraded mode)
|
||||
|
||||
Location: ~/gt/deacon/dogs/boot/
|
||||
Session: gt-deacon-boot`,
|
||||
}
|
||||
|
||||
var bootStatusCmd = &cobra.Command{
|
||||
Use: "status",
|
||||
Short: "Show Boot status",
|
||||
Long: `Show Boot's current status and last execution.
|
||||
|
||||
Displays:
|
||||
- Whether Boot is currently running
|
||||
- Last action taken (start/wake/nudge/nothing)
|
||||
- Timing information
|
||||
- Degraded mode status`,
|
||||
RunE: runBootStatus,
|
||||
}
|
||||
|
||||
var bootSpawnCmd = &cobra.Command{
|
||||
Use: "spawn",
|
||||
Short: "Spawn Boot for triage",
|
||||
Long: `Spawn Boot to run the triage cycle.
|
||||
|
||||
This is normally called by the daemon. It spawns Boot in a fresh
|
||||
tmux session (or subprocess in degraded mode) to observe and decide
|
||||
what action to take on the Deacon.
|
||||
|
||||
Boot runs to completion and exits - it doesn't maintain state
|
||||
between invocations.`,
|
||||
RunE: runBootSpawn,
|
||||
}
|
||||
|
||||
var bootTriageCmd = &cobra.Command{
|
||||
Use: "triage",
|
||||
Short: "Run triage directly (degraded mode)",
|
||||
Long: `Run Boot's triage logic directly without Claude.
|
||||
|
||||
This is for degraded mode operation when tmux is unavailable.
|
||||
It performs basic observation and takes conservative action:
|
||||
- If Deacon is not running: start it
|
||||
- If Deacon appears stuck: attempt restart
|
||||
- Otherwise: do nothing
|
||||
|
||||
Use --degraded flag when running in degraded mode.`,
|
||||
RunE: runBootTriage,
|
||||
}
|
||||
|
||||
func init() {
|
||||
bootStatusCmd.Flags().BoolVar(&bootStatusJSON, "json", false, "Output as JSON")
|
||||
bootTriageCmd.Flags().BoolVar(&bootDegraded, "degraded", false, "Run in degraded mode (no tmux)")
|
||||
|
||||
bootCmd.AddCommand(bootStatusCmd)
|
||||
bootCmd.AddCommand(bootSpawnCmd)
|
||||
bootCmd.AddCommand(bootTriageCmd)
|
||||
|
||||
rootCmd.AddCommand(bootCmd)
|
||||
}
|
||||
|
||||
func getBootManager() (*boot.Boot, error) {
|
||||
townRoot, err := workspace.FindFromCwd()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("finding town root: %w", err)
|
||||
}
|
||||
|
||||
return boot.New(townRoot), nil
|
||||
}
|
||||
|
||||
func runBootStatus(cmd *cobra.Command, args []string) error {
|
||||
b, err := getBootManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
status, err := b.LoadStatus()
|
||||
if err != nil {
|
||||
return fmt.Errorf("loading status: %w", err)
|
||||
}
|
||||
|
||||
isRunning := b.IsRunning()
|
||||
sessionAlive := b.IsSessionAlive()
|
||||
|
||||
if bootStatusJSON {
|
||||
output := map[string]interface{}{
|
||||
"running": isRunning,
|
||||
"session_alive": sessionAlive,
|
||||
"degraded": b.IsDegraded(),
|
||||
"boot_dir": b.Dir(),
|
||||
"last_status": status,
|
||||
}
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
return enc.Encode(output)
|
||||
}
|
||||
|
||||
// Pretty print
|
||||
fmt.Println(style.Bold.Render("Boot Status"))
|
||||
fmt.Println()
|
||||
|
||||
if isRunning {
|
||||
fmt.Printf(" State: %s\n", style.Bold.Render("running"))
|
||||
} else {
|
||||
fmt.Printf(" State: %s\n", style.Dim.Render("idle"))
|
||||
}
|
||||
|
||||
if sessionAlive {
|
||||
fmt.Printf(" Session: %s (alive)\n", boot.SessionName)
|
||||
} else {
|
||||
fmt.Printf(" Session: %s\n", style.Dim.Render("not running"))
|
||||
}
|
||||
|
||||
if b.IsDegraded() {
|
||||
fmt.Printf(" Mode: %s\n", style.Bold.Render("DEGRADED"))
|
||||
} else {
|
||||
fmt.Printf(" Mode: normal\n")
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println(style.Dim.Render("Last Execution:"))
|
||||
|
||||
if status.StartedAt.IsZero() {
|
||||
fmt.Printf(" %s\n", style.Dim.Render("(no executions recorded)"))
|
||||
} else {
|
||||
if !status.CompletedAt.IsZero() {
|
||||
duration := status.CompletedAt.Sub(status.StartedAt)
|
||||
fmt.Printf(" Completed: %s (%s ago)\n",
|
||||
status.CompletedAt.Format("15:04:05"),
|
||||
formatDurationAgo(time.Since(status.CompletedAt)))
|
||||
fmt.Printf(" Duration: %s\n", duration.Round(time.Millisecond))
|
||||
} else {
|
||||
fmt.Printf(" Started: %s\n", status.StartedAt.Format("15:04:05"))
|
||||
}
|
||||
|
||||
if status.LastAction != "" {
|
||||
fmt.Printf(" Action: %s", status.LastAction)
|
||||
if status.Target != "" {
|
||||
fmt.Printf(" → %s", status.Target)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
if status.Error != "" {
|
||||
fmt.Printf(" Error: %s\n", style.Bold.Render(status.Error))
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
fmt.Printf(" Dir: %s\n", b.Dir())
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func runBootSpawn(cmd *cobra.Command, args []string) error {
|
||||
b, err := getBootManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if b.IsRunning() {
|
||||
fmt.Println("Boot is already running - skipping spawn")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Save starting status
|
||||
status := &boot.Status{
|
||||
Running: true,
|
||||
StartedAt: time.Now(),
|
||||
}
|
||||
if err := b.SaveStatus(status); err != nil {
|
||||
return fmt.Errorf("saving status: %w", err)
|
||||
}
|
||||
|
||||
// Spawn Boot
|
||||
if err := b.Spawn(); err != nil {
|
||||
status.Error = err.Error()
|
||||
status.CompletedAt = time.Now()
|
||||
status.Running = false
|
||||
_ = b.SaveStatus(status)
|
||||
return fmt.Errorf("spawning boot: %w", err)
|
||||
}
|
||||
|
||||
if b.IsDegraded() {
|
||||
fmt.Println("Boot spawned in degraded mode (subprocess)")
|
||||
} else {
|
||||
fmt.Printf("Boot spawned in session: %s\n", boot.SessionName)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func runBootTriage(cmd *cobra.Command, args []string) error {
|
||||
b, err := getBootManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Acquire lock
|
||||
if err := b.AcquireLock(); err != nil {
|
||||
return fmt.Errorf("acquiring lock: %w", err)
|
||||
}
|
||||
defer func() { _ = b.ReleaseLock() }()
|
||||
|
||||
startTime := time.Now()
|
||||
status := &boot.Status{
|
||||
Running: true,
|
||||
StartedAt: startTime,
|
||||
}
|
||||
|
||||
// In degraded mode, we do basic mechanical triage
|
||||
// without full Claude reasoning capability
|
||||
action, target, triageErr := runDegradedTriage(b)
|
||||
|
||||
status.LastAction = action
|
||||
status.Target = target
|
||||
status.Running = false
|
||||
status.CompletedAt = time.Now()
|
||||
|
||||
if triageErr != nil {
|
||||
status.Error = triageErr.Error()
|
||||
}
|
||||
|
||||
if err := b.SaveStatus(status); err != nil {
|
||||
return fmt.Errorf("saving status: %w", err)
|
||||
}
|
||||
|
||||
if triageErr != nil {
|
||||
return triageErr
|
||||
}
|
||||
|
||||
fmt.Printf("Triage complete: %s", action)
|
||||
if target != "" {
|
||||
fmt.Printf(" → %s", target)
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// runDegradedTriage performs basic Deacon health check without AI reasoning.
|
||||
// This is a mechanical fallback when full Claude sessions aren't available.
|
||||
func runDegradedTriage(b *boot.Boot) (action, target string, err error) {
|
||||
tm := b.Tmux()
|
||||
|
||||
// Check if Deacon session exists
|
||||
deaconSession := "gt-deacon"
|
||||
hasDeacon, err := tm.HasSession(deaconSession)
|
||||
if err != nil {
|
||||
return "error", "deacon", fmt.Errorf("checking deacon session: %w", err)
|
||||
}
|
||||
|
||||
if !hasDeacon {
|
||||
// Deacon not running - this is unusual, daemon should have restarted it
|
||||
// In degraded mode, we just report - let daemon handle restart
|
||||
return "report", "deacon-missing", nil
|
||||
}
|
||||
|
||||
// Deacon exists - check if it's responsive (basic pane output check)
|
||||
// In degraded mode, we can't do sophisticated analysis
|
||||
// Just verify the session is alive
|
||||
return "nothing", "", nil
|
||||
}
|
||||
|
||||
// formatDurationAgo formats a duration for human display.
|
||||
func formatDurationAgo(d time.Duration) string {
|
||||
switch {
|
||||
case d < time.Minute:
|
||||
return "just now"
|
||||
case d < time.Hour:
|
||||
mins := int(d.Minutes())
|
||||
if mins == 1 {
|
||||
return "1 min"
|
||||
}
|
||||
return fmt.Sprintf("%d min", mins)
|
||||
case d < 24*time.Hour:
|
||||
hours := int(d.Hours())
|
||||
if hours == 1 {
|
||||
return "1 hour"
|
||||
}
|
||||
return fmt.Sprintf("%d hours", hours)
|
||||
default:
|
||||
days := int(d.Hours() / 24)
|
||||
if days == 1 {
|
||||
return "1 day"
|
||||
}
|
||||
return fmt.Sprintf("%d days", days)
|
||||
}
|
||||
}
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/steveyegge/gastown/internal/beads"
|
||||
"github.com/steveyegge/gastown/internal/boot"
|
||||
"github.com/steveyegge/gastown/internal/constants"
|
||||
"github.com/steveyegge/gastown/internal/feed"
|
||||
"github.com/steveyegge/gastown/internal/keepalive"
|
||||
@@ -189,8 +190,9 @@ func (d *Daemon) calculateHeartbeatInterval() time.Duration {
|
||||
func (d *Daemon) heartbeat(state *State) {
|
||||
d.logger.Println("Heartbeat starting (recovery-focused)")
|
||||
|
||||
// 1. Ensure Deacon is running (restart if dead)
|
||||
d.ensureDeaconRunning()
|
||||
// 1. Poke Boot (the Deacon's watchdog) instead of Deacon directly
|
||||
// Boot handles the "when to wake Deacon" decision via triage logic
|
||||
d.ensureBootRunning()
|
||||
|
||||
// 2. Ensure Witnesses are running for all rigs (restart if dead)
|
||||
d.ensureWitnessesRunning()
|
||||
@@ -233,6 +235,71 @@ const DeaconSessionName = "gt-deacon"
|
||||
// DeaconRole is the role name for the Deacon's handoff bead.
|
||||
const DeaconRole = "deacon"
|
||||
|
||||
// ensureBootRunning spawns Boot to triage the Deacon.
|
||||
// Boot is a fresh-each-tick watchdog that decides whether to start/wake/nudge
|
||||
// the Deacon, centralizing the "when to wake" decision in an agent.
|
||||
// In degraded mode (no tmux), falls back to mechanical checks.
|
||||
func (d *Daemon) ensureBootRunning() {
|
||||
b := boot.New(d.config.TownRoot)
|
||||
|
||||
// Check if Boot is already running (recent marker)
|
||||
if b.IsRunning() {
|
||||
d.logger.Println("Boot already running, skipping spawn")
|
||||
return
|
||||
}
|
||||
|
||||
// Check for degraded mode
|
||||
degraded := os.Getenv("GT_DEGRADED") == "true"
|
||||
if degraded || !d.tmux.IsAvailable() {
|
||||
// In degraded mode, run mechanical triage directly
|
||||
d.logger.Println("Degraded mode: running mechanical Boot triage")
|
||||
d.runDegradedBootTriage(b)
|
||||
return
|
||||
}
|
||||
|
||||
// Spawn Boot in a fresh tmux session
|
||||
d.logger.Println("Spawning Boot for triage...")
|
||||
if err := b.Spawn(); err != nil {
|
||||
d.logger.Printf("Error spawning Boot: %v, falling back to direct Deacon check", err)
|
||||
// Fallback: ensure Deacon is running directly
|
||||
d.ensureDeaconRunning()
|
||||
return
|
||||
}
|
||||
|
||||
d.logger.Println("Boot spawned successfully")
|
||||
}
|
||||
|
||||
// runDegradedBootTriage performs mechanical Boot logic without AI reasoning.
|
||||
// This is for degraded mode when tmux is unavailable.
|
||||
func (d *Daemon) runDegradedBootTriage(b *boot.Boot) {
|
||||
startTime := time.Now()
|
||||
status := &boot.Status{
|
||||
Running: true,
|
||||
StartedAt: startTime,
|
||||
}
|
||||
|
||||
// Simple check: is Deacon session alive?
|
||||
hasDeacon, err := d.tmux.HasSession(DeaconSessionName)
|
||||
if err != nil {
|
||||
d.logger.Printf("Error checking Deacon session: %v", err)
|
||||
status.LastAction = "error"
|
||||
status.Error = err.Error()
|
||||
} else if !hasDeacon {
|
||||
d.logger.Println("Deacon not running, starting...")
|
||||
d.ensureDeaconRunning()
|
||||
status.LastAction = "start"
|
||||
status.Target = "deacon"
|
||||
} else {
|
||||
status.LastAction = "nothing"
|
||||
}
|
||||
|
||||
status.Running = false
|
||||
status.CompletedAt = time.Now()
|
||||
|
||||
if err := b.SaveStatus(status); err != nil {
|
||||
d.logger.Printf("Warning: failed to save Boot status: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ensureDeaconRunning ensures the Deacon is running.
|
||||
// ZFC-compliant: trusts agent bead state, no tmux inference.
|
||||
|
||||
136
internal/templates/roles/boot.md.tmpl
Normal file
136
internal/templates/roles/boot.md.tmpl
Normal file
@@ -0,0 +1,136 @@
|
||||
# Boot Context
|
||||
|
||||
> **Recovery**: Run `gt prime` after compaction, clear, or new session
|
||||
|
||||
## Your Role: BOOT (Deacon Watchdog)
|
||||
|
||||
You are **Boot** - the daemon's watchdog for Deacon triage. You are spawned fresh
|
||||
on each daemon tick to observe the system and decide what action to take.
|
||||
|
||||
## Theory of Operation
|
||||
|
||||
The daemon is dumb transport (ZFC principle). It can't decide:
|
||||
- Is the Deacon stuck or just thinking?
|
||||
- Should we interrupt or let it continue?
|
||||
- Is the system in a state where nudging would help?
|
||||
|
||||
You are an agent that CAN observe and decide. The daemon pokes you instead of
|
||||
the Deacon directly, centralizing the "when to wake" decision in reasoning.
|
||||
|
||||
## Your Lifecycle
|
||||
|
||||
```
|
||||
Daemon tick
|
||||
│
|
||||
├── Check: Is Boot already running? (marker file)
|
||||
│ └── Yes + recent: Skip this tick
|
||||
│
|
||||
└── Spawn Boot (fresh session each time)
|
||||
│
|
||||
└── Boot runs triage
|
||||
├── Observe (wisps, mail, git state, tmux panes)
|
||||
├── Decide (start/wake/nudge/interrupt/nothing)
|
||||
├── Act
|
||||
├── Clean inbox (discard stale handoffs)
|
||||
└── Exit (or handoff in non-degraded mode)
|
||||
```
|
||||
|
||||
## You Are Always Fresh
|
||||
|
||||
Boot restarts on each daemon tick. This is intentional:
|
||||
- Narrow scope makes restarts cheap
|
||||
- Fresh context avoids accumulated confusion
|
||||
- Handoff mail provides continuity without session persistence
|
||||
- No keepalive needed
|
||||
|
||||
## Working Directory
|
||||
|
||||
**IMPORTANT**: Always work from `{{ .TownRoot }}/deacon/` directory.
|
||||
|
||||
You share context with the Deacon - both operate on the same state.
|
||||
|
||||
## Triage Steps
|
||||
|
||||
### Step 1: Observe
|
||||
|
||||
Check the current system state:
|
||||
|
||||
```bash
|
||||
# Is Deacon session alive?
|
||||
tmux has-session -t gt-deacon 2>/dev/null && echo "alive" || echo "dead"
|
||||
|
||||
# If alive, what's the pane showing?
|
||||
gt peek deacon --lines 20
|
||||
|
||||
# Agent bead state
|
||||
bd show gt-deacon 2>/dev/null
|
||||
|
||||
# Recent activity
|
||||
gt feed --since 10m --plain | head -20
|
||||
```
|
||||
|
||||
### Step 2: Decide
|
||||
|
||||
Analyze observations using this decision matrix:
|
||||
|
||||
| Deacon State | Pane Activity | Action |
|
||||
|--------------|---------------|--------|
|
||||
| Dead session | N/A | START (daemon will restart) |
|
||||
| Alive, active output | N/A | NOTHING |
|
||||
| Alive, idle < 5 min | N/A | NOTHING |
|
||||
| Alive, idle 5-15 min | No mail | NOTHING |
|
||||
| Alive, idle 5-15 min | Has mail | NUDGE |
|
||||
| Alive, idle > 15 min | Any | WAKE |
|
||||
| Alive, stuck (errors) | Any | INTERRUPT |
|
||||
|
||||
**Judgment Guidance**: Agents may take several minutes on legitimate work.
|
||||
Don't be too aggressive - false positives are disruptive.
|
||||
|
||||
### Step 3: Act
|
||||
|
||||
Execute the decided action:
|
||||
|
||||
- **NOTHING**: Log and exit
|
||||
- **NUDGE**: `gt nudge deacon "Boot check-in: you have pending work"`
|
||||
- **WAKE**: Escape + `gt nudge deacon "Boot wake: check your inbox"`
|
||||
- **INTERRUPT**: Mail the Deacon requesting restart consideration
|
||||
- **START**: Log detection (daemon handles restart)
|
||||
|
||||
### Step 4: Clean
|
||||
|
||||
Archive stale handoff messages (> 1 hour old) from Deacon's inbox.
|
||||
|
||||
### Step 5: Exit
|
||||
|
||||
In degraded mode: Exit directly.
|
||||
In normal mode: Optional brief handoff mail for next Boot instance.
|
||||
|
||||
## Degraded Mode (GT_DEGRADED=true)
|
||||
|
||||
When tmux is unavailable:
|
||||
- Cannot observe tmux panes
|
||||
- Cannot interactively interrupt
|
||||
- Focus on beads/git state observation only
|
||||
- Report anomalies but can't fix interactively
|
||||
- Run to completion and exit (no handoff)
|
||||
|
||||
## Commands
|
||||
|
||||
```bash
|
||||
# Your status
|
||||
gt boot status
|
||||
|
||||
# Manual spawn (for debugging)
|
||||
gt boot spawn
|
||||
|
||||
# Run triage directly (degraded mode)
|
||||
gt boot triage --degraded
|
||||
```
|
||||
|
||||
## Important Notes
|
||||
|
||||
- You are ephemeral - no persistent state between invocations
|
||||
- Each tick is a fresh observation
|
||||
- Be conservative - false positives disrupt legitimate work
|
||||
- When in doubt, choose NOTHING over NUDGE
|
||||
- Trust the Deacon unless there's clear evidence of stuck state
|
||||
@@ -91,6 +91,12 @@ func (t *Tmux) KillServer() error {
|
||||
return err
|
||||
}
|
||||
|
||||
// IsAvailable checks if tmux is installed and can be invoked.
|
||||
func (t *Tmux) IsAvailable() bool {
|
||||
cmd := exec.Command("tmux", "-V")
|
||||
return cmd.Run() == nil
|
||||
}
|
||||
|
||||
// HasSession checks if a session exists.
|
||||
func (t *Tmux) HasSession(name string) (bool, error) {
|
||||
_, err := t.run("has-session", "-t", name)
|
||||
|
||||
Reference in New Issue
Block a user