Add daemon health check endpoint (bd-146)

- Add OpHealth RPC operation to protocol
- Implement handleHealth() with DB ping and 1s timeout
- Returns status (healthy/degraded/unhealthy), uptime, cache metrics
- Update TryConnect() to use health check instead of ping
- Add 'bd daemon --health' CLI command with JSON output
- Track cache hits/misses for metrics
- Unhealthy daemon triggers automatic fallback to direct mode
- Health check completes in <2 seconds

Amp-Thread-ID: https://ampcode.com/threads/T-1a4889f3-77cf-433a-a704-e1c383929f48
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-18 13:41:06 -07:00
parent f987722f96
commit 9e2ee1889f
6 changed files with 199 additions and 5 deletions

View File

@@ -38,10 +38,12 @@ The daemon will:
- Auto-import when remote changes detected
Use --stop to stop a running daemon.
Use --status to check if daemon is running.`,
Use --status to check if daemon is running.
Use --health to check daemon health and metrics.`,
Run: func(cmd *cobra.Command, args []string) {
stop, _ := cmd.Flags().GetBool("stop")
status, _ := cmd.Flags().GetBool("status")
health, _ := cmd.Flags().GetBool("health")
interval, _ := cmd.Flags().GetDuration("interval")
autoCommit, _ := cmd.Flags().GetBool("auto-commit")
autoPush, _ := cmd.Flags().GetBool("auto-push")
@@ -64,6 +66,11 @@ Use --status to check if daemon is running.`,
return
}
if health {
showDaemonHealth(global)
return
}
if stop {
stopDaemon(pidFile)
return
@@ -119,6 +126,7 @@ func init() {
daemonCmd.Flags().Bool("auto-push", false, "Automatically push commits")
daemonCmd.Flags().Bool("stop", false, "Stop running daemon")
daemonCmd.Flags().Bool("status", false, "Show daemon status")
daemonCmd.Flags().Bool("health", false, "Check daemon health and metrics")
daemonCmd.Flags().String("log", "", "Log file path (default: .beads/daemon.log)")
daemonCmd.Flags().Bool("global", false, "Run as global daemon (socket at ~/.beads/bd.sock)")
rootCmd.AddCommand(daemonCmd)
@@ -250,6 +258,78 @@ func showDaemonStatus(pidFile string, global bool) {
}
}
func showDaemonHealth(global bool) {
var socketPath string
if global {
home, err := os.UserHomeDir()
if err != nil {
fmt.Fprintf(os.Stderr, "Error: cannot get home directory: %v\n", err)
os.Exit(1)
}
socketPath = filepath.Join(home, ".beads", "bd.sock")
} else {
beadsDir, err := ensureBeadsDir()
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1)
}
socketPath = filepath.Join(beadsDir, "bd.sock")
}
client, err := rpc.TryConnect(socketPath)
if err != nil {
fmt.Fprintf(os.Stderr, "Error connecting to daemon: %v\n", err)
os.Exit(1)
}
if client == nil {
fmt.Println("✗ Daemon is not running")
os.Exit(1)
}
defer client.Close()
health, err := client.Health()
if err != nil {
fmt.Fprintf(os.Stderr, "Error checking health: %v\n", err)
os.Exit(1)
}
if jsonOutput {
data, _ := json.MarshalIndent(health, "", " ")
fmt.Println(string(data))
return
}
statusIcon := "✓"
if health.Status == "unhealthy" {
statusIcon = "✗"
} else if health.Status == "degraded" {
statusIcon = "⚠"
}
fmt.Printf("%s Daemon Health: %s\n", statusIcon, health.Status)
fmt.Printf(" Version: %s\n", health.Version)
fmt.Printf(" Uptime: %.1f seconds\n", health.Uptime)
fmt.Printf(" Cache Size: %d databases\n", health.CacheSize)
fmt.Printf(" Cache Hits: %d\n", health.CacheHits)
fmt.Printf(" Cache Misses: %d\n", health.CacheMisses)
if health.CacheHits+health.CacheMisses > 0 {
hitRate := float64(health.CacheHits) / float64(health.CacheHits+health.CacheMisses) * 100
fmt.Printf(" Cache Hit Rate: %.1f%%\n", hitRate)
}
fmt.Printf(" DB Response Time: %.2f ms\n", health.DBResponseTime)
if health.Error != "" {
fmt.Printf(" Error: %s\n", health.Error)
}
if health.Status == "unhealthy" {
os.Exit(1)
}
}
func stopDaemon(pidFile string) {
if isRunning, pid := isDaemonRunning(pidFile); !isRunning {
fmt.Println("Daemon is not running")