Add Boot health check to gt doctor (gt-k1sl4)

Implements "vet mode" - the doctor checks on the Boot watchdog:
- Boot directory presence
- Session status (alive/not running)
- Last execution status and errors
- Marker file freshness (stale marker indicates crash)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gastown/polecats/dementus
2025-12-30 21:59:18 -08:00
committed by Steve Yegge
parent 06f5541502
commit 112420dad0
2 changed files with 125 additions and 0 deletions

View File

@@ -24,6 +24,10 @@ var doctorCmd = &cobra.Command{
Doctor checks for common configuration issues, missing files,
and other problems that could affect workspace operation.
Infrastructure checks:
- daemon Check if daemon is running (fixable)
- boot-health Check Boot watchdog health (vet mode)
Cleanup checks (fixable):
- orphan-sessions Detect orphaned tmux sessions
- orphan-processes Detect orphaned Claude processes
@@ -75,6 +79,7 @@ func runDoctor(cmd *cobra.Command, args []string) error {
// Register built-in checks
d.Register(doctor.NewTownGitCheck())
d.Register(doctor.NewDaemonCheck())
d.Register(doctor.NewBootHealthCheck())
d.Register(doctor.NewBeadsDatabaseCheck())
d.Register(doctor.NewPrefixConflictCheck())
d.Register(doctor.NewRoutesCheck())

View File

@@ -0,0 +1,120 @@
package doctor
import (
"fmt"
"os"
"path/filepath"
"time"
"github.com/steveyegge/gastown/internal/boot"
)
// BootHealthCheck verifies Boot watchdog health.
// "The vet checks on the dog."
type BootHealthCheck struct {
BaseCheck
}
// NewBootHealthCheck creates a new Boot health check.
func NewBootHealthCheck() *BootHealthCheck {
return &BootHealthCheck{
BaseCheck: BaseCheck{
CheckName: "boot-health",
CheckDescription: "Check Boot watchdog health (the vet checks on the dog)",
},
}
}
// Run checks Boot health: directory, session, status, and marker freshness.
func (c *BootHealthCheck) Run(ctx *CheckContext) *CheckResult {
b := boot.New(ctx.TownRoot)
details := []string{}
// Check 1: Boot directory exists
bootDir := b.Dir()
if _, err := os.Stat(bootDir); os.IsNotExist(err) {
return &CheckResult{
Name: c.Name(),
Status: StatusWarning,
Message: "Boot directory not present",
Details: []string{fmt.Sprintf("Expected: %s", bootDir)},
FixHint: "Boot directory is created on first daemon run",
}
}
// Check 2: Session alive
sessionAlive := b.IsSessionAlive()
if sessionAlive {
details = append(details, fmt.Sprintf("Session: %s (alive)", boot.SessionName))
} else {
details = append(details, fmt.Sprintf("Session: %s (not running)", boot.SessionName))
}
// Check 3: Last execution status
status, err := b.LoadStatus()
if err != nil {
return &CheckResult{
Name: c.Name(),
Status: StatusError,
Message: "Failed to load Boot status",
Details: []string{err.Error()},
}
}
if !status.CompletedAt.IsZero() {
age := time.Since(status.CompletedAt).Round(time.Second)
details = append(details, fmt.Sprintf("Last run: %s ago", age))
if status.LastAction != "" {
details = append(details, fmt.Sprintf("Last action: %s", status.LastAction))
}
if status.Target != "" {
details = append(details, fmt.Sprintf("Target: %s", status.Target))
}
if status.Error != "" {
details = append(details, fmt.Sprintf("Last error: %s", status.Error))
return &CheckResult{
Name: c.Name(),
Status: StatusWarning,
Message: "Boot last run had an error",
Details: details,
FixHint: "Check daemon logs for details",
}
}
} else if status.StartedAt.IsZero() {
details = append(details, "No previous run recorded")
}
// Check 4: Marker file freshness (stale marker indicates crash)
markerPath := filepath.Join(bootDir, boot.MarkerFileName)
if info, err := os.Stat(markerPath); err == nil {
age := time.Since(info.ModTime())
if age > boot.DefaultMarkerTTL {
return &CheckResult{
Name: c.Name(),
Status: StatusWarning,
Message: "Boot marker is stale (possible crash)",
Details: []string{
fmt.Sprintf("Marker age: %s", age.Round(time.Second)),
fmt.Sprintf("TTL: %s", boot.DefaultMarkerTTL),
},
FixHint: "Stale marker will be cleaned on next daemon tick",
}
}
// Marker exists and is fresh - Boot is currently running
details = append(details, fmt.Sprintf("Currently running (marker age: %s)", age.Round(time.Second)))
}
// All checks passed
message := "Boot watchdog healthy"
if b.IsDegraded() {
message = "Boot watchdog healthy (degraded mode)"
details = append(details, "Running in degraded mode (no tmux)")
}
return &CheckResult{
Name: c.Name(),
Status: StatusOK,
Message: message,
Details: details,
}
}