Add daemon fallback visibility and version compatibility checks

Implemented bd-150: Improve daemon fallback visibility and user feedback
- Added DaemonStatus struct to track connection state
- Enhanced BD_DEBUG logging with detailed diagnostics and timing
- Added BD_VERBOSE mode with actionable warnings when falling back
- Implemented health checks before using daemon
- Clear fallback reasons: connect_failed, health_failed, auto_start_disabled, auto_start_failed, flag_no_daemon
- Updated documentation

Implemented bd-151: Add version compatibility checks for daemon RPC protocol
- Added ClientVersion field to RPC Request struct
- Client sends version (0.9.10) in all requests
- Server validates version compatibility using semver:
  - Major version must match
  - Daemon >= client for backward compatibility
  - Clear error messages with directional hints (upgrade daemon vs upgrade client)
- Added ClientVersion and Compatible fields to HealthResponse
- Implemented 'bd version --daemon' command to check compatibility
- Fixed batch operations to propagate ClientVersion for proper checks
- Updated documentation with version compatibility section

Code review improvements:
- Propagate ClientVersion in batch sub-requests
- Directional error messages based on which side is older
- Made ServerVersion a var for future unification

Amp-Thread-ID: https://ampcode.com/threads/T-b5fe36b8-c065-44a9-a55b-582573671609
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-19 08:04:48 -07:00
parent 5fefce4e85
commit 22daa12665
7 changed files with 515 additions and 200 deletions

File diff suppressed because one or more lines are too long

View File

@@ -523,7 +523,23 @@ bd --db ~/otherproject/.beads/other.db list
- `BEADS_DB` - Override database path - `BEADS_DB` - Override database path
- `BEADS_AUTO_START_DAEMON` - Enable/disable automatic daemon start (default: `true`). Set to `false` or `0` to disable. - `BEADS_AUTO_START_DAEMON` - Enable/disable automatic daemon start (default: `true`). Set to `false` or `0` to disable.
- `BD_ACTOR` - Set actor name for change tracking (defaults to `$USER`) - `BD_ACTOR` - Set actor name for change tracking (defaults to `$USER`)
- `BD_DEBUG` - Enable debug logging for troubleshooting - `BD_DEBUG` - Enable debug logging (connection attempts, auto-start timing, health checks)
- `BD_VERBOSE` - Show warnings when falling back from daemon to direct mode
### Version Compatibility
The daemon and CLI check version compatibility automatically:
- **Major version** must match (e.g., 1.x.x client requires 1.x.x daemon)
- **Minor version** backward compatible (e.g., daemon 1.2.x supports client 1.1.x)
- **Patch version** always compatible
When versions mismatch, you'll see a clear error message with instructions to restart the daemon.
Check daemon version and compatibility:
```bash
bd version --daemon # Show daemon and client versions
bd version --daemon --json # JSON output with compatibility info
```
## Dependency Model ## Dependency Model
@@ -927,6 +943,7 @@ bd daemon --status # Show daemon status
bd daemon --stop # Stop running daemon bd daemon --stop # Stop running daemon
bd daemon --global # Run as global daemon (see below) bd daemon --global # Run as global daemon (see below)
bd daemon --migrate-to-global # Migrate from local to global daemon bd daemon --migrate-to-global # Migrate from local to global daemon
bd version --daemon # Check daemon version and compatibility
``` ```
Log rotation is automatic and configurable via environment variables: Log rotation is automatic and configurable via environment variables:

View File

@@ -29,11 +29,36 @@ import (
"golang.org/x/mod/semver" "golang.org/x/mod/semver"
) )
// DaemonStatus captures daemon connection state for the current command
type DaemonStatus struct {
Mode string `json:"mode"` // "daemon" or "direct"
Connected bool `json:"connected"`
Degraded bool `json:"degraded"`
SocketPath string `json:"socket_path,omitempty"`
AutoStartEnabled bool `json:"auto_start_enabled"`
AutoStartAttempted bool `json:"auto_start_attempted"`
AutoStartSucceeded bool `json:"auto_start_succeeded"`
FallbackReason string `json:"fallback_reason,omitempty"` // "none","flag_no_daemon","connect_failed","health_failed","auto_start_disabled","auto_start_failed"
Detail string `json:"detail,omitempty"` // short diagnostic
Health string `json:"health,omitempty"` // "healthy","degraded","unhealthy"
}
// Fallback reason constants
const (
FallbackNone = "none"
FallbackFlagNoDaemon = "flag_no_daemon"
FallbackConnectFailed = "connect_failed"
FallbackHealthFailed = "health_failed"
FallbackAutoStartDisabled = "auto_start_disabled"
FallbackAutoStartFailed = "auto_start_failed"
)
var ( var (
dbPath string dbPath string
actor string actor string
store storage.Storage store storage.Storage
jsonOutput bool jsonOutput bool
daemonStatus DaemonStatus // Tracks daemon connection state for current command
// Daemon mode // Daemon mode
daemonClient *rpc.Client // RPC client when daemon is running daemonClient *rpc.Client // RPC client when daemon is running
@@ -96,38 +121,145 @@ var rootCmd = &cobra.Command{
} }
} }
// Initialize daemon status
socketPath := getSocketPath()
daemonStatus = DaemonStatus{
Mode: "direct",
Connected: false,
Degraded: true,
SocketPath: socketPath,
AutoStartEnabled: shouldAutoStartDaemon(),
FallbackReason: FallbackNone,
}
// Try to connect to daemon first (unless --no-daemon flag is set) // Try to connect to daemon first (unless --no-daemon flag is set)
if !noDaemon { if noDaemon {
socketPath := getSocketPath() daemonStatus.FallbackReason = FallbackFlagNoDaemon
if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: --no-daemon flag set, using direct mode\n")
}
} else {
// Attempt daemon connection
client, err := rpc.TryConnect(socketPath) client, err := rpc.TryConnect(socketPath)
if err == nil && client != nil { if err == nil && client != nil {
daemonClient = client // Perform health check
if os.Getenv("BD_DEBUG") != "" { health, healthErr := client.Health()
fmt.Fprintf(os.Stderr, "Debug: connected to daemon at %s\n", socketPath) if healthErr == nil && health.Status == "healthy" {
} // Daemon is healthy - use it
return // Skip direct storage initialization daemonClient = client
} daemonStatus.Mode = "daemon"
daemonStatus.Connected = true
// Daemon not running - try auto-start if enabled daemonStatus.Degraded = false
if shouldAutoStartDaemon() { daemonStatus.Health = health.Status
if os.Getenv("BD_DEBUG") != "" { if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: attempting to auto-start daemon\n") fmt.Fprintf(os.Stderr, "Debug: connected to daemon at %s (health: %s)\n", socketPath, health.Status)
} }
if tryAutoStartDaemon(socketPath) { return // Skip direct storage initialization
// Retry connection after auto-start } else {
client, err := rpc.TryConnect(socketPath) // Health check failed or daemon unhealthy
if err == nil && client != nil { client.Close()
daemonClient = client daemonStatus.FallbackReason = FallbackHealthFailed
if healthErr != nil {
daemonStatus.Detail = healthErr.Error()
if os.Getenv("BD_DEBUG") != "" { if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: connected to auto-started daemon at %s\n", socketPath) fmt.Fprintf(os.Stderr, "Debug: daemon health check failed: %v\n", healthErr)
} }
return // Skip direct storage initialization } else {
daemonStatus.Health = health.Status
daemonStatus.Detail = health.Error
if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: daemon unhealthy (status=%s): %s\n", health.Status, health.Error)
}
}
}
} else {
// Connection failed
daemonStatus.FallbackReason = FallbackConnectFailed
if err != nil {
daemonStatus.Detail = err.Error()
if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: daemon connect failed at %s: %v\n", socketPath, err)
} }
} }
} }
// Daemon not running or unhealthy - try auto-start if enabled
if daemonStatus.AutoStartEnabled {
daemonStatus.AutoStartAttempted = true
if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: attempting to auto-start daemon\n")
}
startTime := time.Now()
if tryAutoStartDaemon(socketPath) {
// Retry connection after auto-start
client, err := rpc.TryConnect(socketPath)
if err == nil && client != nil {
// Check health of auto-started daemon
health, healthErr := client.Health()
if healthErr == nil && health.Status == "healthy" {
daemonClient = client
daemonStatus.Mode = "daemon"
daemonStatus.Connected = true
daemonStatus.Degraded = false
daemonStatus.AutoStartSucceeded = true
daemonStatus.Health = health.Status
daemonStatus.FallbackReason = FallbackNone
if os.Getenv("BD_DEBUG") != "" {
elapsed := time.Since(startTime).Milliseconds()
fmt.Fprintf(os.Stderr, "Debug: auto-start succeeded; connected at %s in %dms\n", socketPath, elapsed)
}
return // Skip direct storage initialization
} else {
// Auto-started daemon is unhealthy
client.Close()
daemonStatus.FallbackReason = FallbackHealthFailed
if healthErr != nil {
daemonStatus.Detail = healthErr.Error()
} else {
daemonStatus.Health = health.Status
daemonStatus.Detail = health.Error
}
if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: auto-started daemon is unhealthy; falling back to direct mode\n")
}
}
} else {
// Auto-start completed but connection still failed
daemonStatus.FallbackReason = FallbackAutoStartFailed
if err != nil {
daemonStatus.Detail = err.Error()
}
if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: auto-start did not yield a running daemon; falling back to direct mode\n")
}
}
} else {
// Auto-start itself failed
daemonStatus.FallbackReason = FallbackAutoStartFailed
if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: auto-start failed; falling back to direct mode\n")
}
}
} else {
// Auto-start disabled - only override if we don't already have a health failure
if daemonStatus.FallbackReason != FallbackHealthFailed {
// For connect failures, mention that auto-start was disabled
if daemonStatus.FallbackReason == FallbackConnectFailed {
daemonStatus.FallbackReason = FallbackAutoStartDisabled
}
}
if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: auto-start disabled by BEADS_AUTO_START_DAEMON\n")
}
}
// Emit BD_VERBOSE warning if falling back to direct mode
if os.Getenv("BD_VERBOSE") != "" {
emitVerboseWarning()
}
if os.Getenv("BD_DEBUG") != "" { if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: daemon not available, using direct mode\n") fmt.Fprintf(os.Stderr, "Debug: using direct mode (reason: %s)\n", daemonStatus.FallbackReason)
} }
} }
@@ -209,6 +341,23 @@ func getDebounceDuration() time.Duration {
} }
// shouldAutoStartDaemon checks if daemon auto-start is enabled // shouldAutoStartDaemon checks if daemon auto-start is enabled
// emitVerboseWarning prints a one-line warning when falling back to direct mode
func emitVerboseWarning() {
switch daemonStatus.FallbackReason {
case FallbackConnectFailed:
fmt.Fprintf(os.Stderr, "Warning: Daemon unreachable at %s. Running in direct mode. Hint: bd daemon --status\n", daemonStatus.SocketPath)
case FallbackHealthFailed:
fmt.Fprintf(os.Stderr, "Warning: Daemon unhealthy. Falling back to direct mode. Hint: bd daemon --health\n")
case FallbackAutoStartDisabled:
fmt.Fprintf(os.Stderr, "Warning: Auto-start disabled (BEADS_AUTO_START_DAEMON=false). Running in direct mode. Hint: bd daemon\n")
case FallbackAutoStartFailed:
fmt.Fprintf(os.Stderr, "Warning: Failed to auto-start daemon. Running in direct mode. Hint: bd daemon --status\n")
case FallbackFlagNoDaemon:
// Don't warn when user explicitly requested --no-daemon
return
}
}
func shouldAutoStartDaemon() bool { func shouldAutoStartDaemon() bool {
// Check environment variable (default: true) // Check environment variable (default: true)
autoStart := strings.ToLower(strings.TrimSpace(os.Getenv("BEADS_AUTO_START_DAEMON"))) autoStart := strings.ToLower(strings.TrimSpace(os.Getenv("BEADS_AUTO_START_DAEMON")))

View File

@@ -2,8 +2,11 @@ package main
import ( import (
"fmt" "fmt"
"os"
"github.com/spf13/cobra" "github.com/spf13/cobra"
"github.com/steveyegge/beads"
"github.com/steveyegge/beads/internal/rpc"
) )
const ( const (
@@ -17,6 +20,13 @@ var versionCmd = &cobra.Command{
Use: "version", Use: "version",
Short: "Print version information", Short: "Print version information",
Run: func(cmd *cobra.Command, args []string) { Run: func(cmd *cobra.Command, args []string) {
checkDaemon, _ := cmd.Flags().GetBool("daemon")
if checkDaemon {
showDaemonVersion()
return
}
if jsonOutput { if jsonOutput {
outputJSON(map[string]string{ outputJSON(map[string]string{
"version": Version, "version": Version,
@@ -28,6 +38,55 @@ var versionCmd = &cobra.Command{
}, },
} }
func showDaemonVersion() {
// Connect to daemon (PersistentPreRun skips version command)
// We need to find the database path first to get the socket path
if dbPath == "" {
// Use public API to find database (same logic as PersistentPreRun)
if foundDB := beads.FindDatabasePath(); foundDB != "" {
dbPath = foundDB
}
}
socketPath := getSocketPath()
client, err := rpc.TryConnect(socketPath)
if err != nil || client == nil {
fmt.Fprintf(os.Stderr, "Error: daemon is not running\n")
fmt.Fprintf(os.Stderr, "Hint: start daemon with 'bd daemon'\n")
os.Exit(1)
}
defer client.Close()
health, err := client.Health()
if err != nil {
fmt.Fprintf(os.Stderr, "Error checking daemon health: %v\n", err)
os.Exit(1)
}
if jsonOutput {
outputJSON(map[string]interface{}{
"daemon_version": health.Version,
"client_version": Version,
"compatible": health.Compatible,
"daemon_uptime": health.Uptime,
})
} else {
fmt.Printf("Daemon version: %s\n", health.Version)
fmt.Printf("Client version: %s\n", Version)
if health.Compatible {
fmt.Printf("Compatibility: ✓ compatible\n")
} else {
fmt.Printf("Compatibility: ✗ incompatible (restart daemon recommended)\n")
}
fmt.Printf("Daemon uptime: %.1f seconds\n", health.Uptime)
}
if !health.Compatible {
os.Exit(1)
}
}
func init() { func init() {
versionCmd.Flags().Bool("daemon", false, "Check daemon version and compatibility")
rootCmd.AddCommand(versionCmd) rootCmd.AddCommand(versionCmd)
} }

View File

@@ -9,6 +9,10 @@ import (
"time" "time"
) )
// ClientVersion is the version of this RPC client
// This should match the bd CLI version for proper compatibility checks
var ClientVersion = "0.9.10"
// Client represents an RPC client that connects to the daemon // Client represents an RPC client that connects to the daemon
type Client struct { type Client struct {
conn net.Conn conn net.Conn
@@ -86,8 +90,9 @@ func (c *Client) Execute(operation string, args interface{}) (*Response, error)
} }
req := Request{ req := Request{
Operation: operation, Operation: operation,
Args: argsJSON, Args: argsJSON,
ClientVersion: ClientVersion,
} }
reqJSON, err := json.Marshal(req) reqJSON, err := json.Marshal(req)

View File

@@ -31,11 +31,12 @@ const (
// Request represents an RPC request from client to daemon // Request represents an RPC request from client to daemon
type Request struct { type Request struct {
Operation string `json:"operation"` Operation string `json:"operation"`
Args json.RawMessage `json:"args"` Args json.RawMessage `json:"args"`
Actor string `json:"actor,omitempty"` Actor string `json:"actor,omitempty"`
RequestID string `json:"request_id,omitempty"` RequestID string `json:"request_id,omitempty"`
Cwd string `json:"cwd,omitempty"` // Working directory for database discovery Cwd string `json:"cwd,omitempty"` // Working directory for database discovery
ClientVersion string `json:"client_version,omitempty"` // Client version for compatibility checks
} }
// Response represents an RPC response from daemon to client // Response represents an RPC response from daemon to client
@@ -141,7 +142,9 @@ type PingResponse struct {
// HealthResponse is the response for a health check operation // HealthResponse is the response for a health check operation
type HealthResponse struct { type HealthResponse struct {
Status string `json:"status"` // "healthy", "degraded", "unhealthy" Status string `json:"status"` // "healthy", "degraded", "unhealthy"
Version string `json:"version"` Version string `json:"version"` // Server/daemon version
ClientVersion string `json:"client_version,omitempty"` // Client version from request
Compatible bool `json:"compatible"` // Whether versions are compatible
Uptime float64 `json:"uptime_seconds"` Uptime float64 `json:"uptime_seconds"`
CacheSize int `json:"cache_size"` CacheSize int `json:"cache_size"`
CacheHits int64 `json:"cache_hits"` CacheHits int64 `json:"cache_hits"`

View File

@@ -10,6 +10,7 @@ import (
"os/signal" "os/signal"
"path/filepath" "path/filepath"
"sort" "sort"
"strings"
"sync" "sync"
"sync/atomic" "sync/atomic"
"syscall" "syscall"
@@ -18,8 +19,14 @@ import (
"github.com/steveyegge/beads/internal/storage" "github.com/steveyegge/beads/internal/storage"
"github.com/steveyegge/beads/internal/storage/sqlite" "github.com/steveyegge/beads/internal/storage/sqlite"
"github.com/steveyegge/beads/internal/types" "github.com/steveyegge/beads/internal/types"
"golang.org/x/mod/semver"
) )
// ServerVersion is the version of this RPC server
// This should match the bd CLI version for proper compatibility checks
// It's set as a var so it can be initialized from main
var ServerVersion = "0.9.10"
// StorageCacheEntry holds a cached storage with metadata for eviction // StorageCacheEntry holds a cached storage with metadata for eviction
type StorageCacheEntry struct { type StorageCacheEntry struct {
store storage.Storage store storage.Storage
@@ -288,7 +295,70 @@ func (s *Server) handleConnection(conn net.Conn) {
} }
} }
// checkVersionCompatibility validates client version against server version
// Returns error if versions are incompatible
func (s *Server) checkVersionCompatibility(clientVersion string) error {
// Allow empty client version (old clients before this feature)
if clientVersion == "" {
return nil
}
// Normalize versions to semver format (add 'v' prefix if missing)
serverVer := ServerVersion
if !strings.HasPrefix(serverVer, "v") {
serverVer = "v" + serverVer
}
clientVer := clientVersion
if !strings.HasPrefix(clientVer, "v") {
clientVer = "v" + clientVer
}
// Validate versions are valid semver
if !semver.IsValid(serverVer) || !semver.IsValid(clientVer) {
// If either version is invalid, allow connection (dev builds, etc)
return nil
}
// Extract major versions
serverMajor := semver.Major(serverVer)
clientMajor := semver.Major(clientVer)
// Major version must match
if serverMajor != clientMajor {
cmp := semver.Compare(serverVer, clientVer)
if cmp < 0 {
// Daemon is older - needs upgrade
return fmt.Errorf("incompatible major versions: client %s, daemon %s. Daemon is older; upgrade and restart daemon: 'bd daemon --stop && bd daemon'",
clientVersion, ServerVersion)
}
// Daemon is newer - client needs upgrade
return fmt.Errorf("incompatible major versions: client %s, daemon %s. Client is older; upgrade the bd CLI to match the daemon's major version",
clientVersion, ServerVersion)
}
// Compare full versions - daemon should be >= client for backward compatibility
cmp := semver.Compare(serverVer, clientVer)
if cmp < 0 {
// Server is older than client within same major version - may be missing features
return fmt.Errorf("version mismatch: daemon %s is older than client %s. Upgrade and restart daemon: 'bd daemon --stop && bd daemon'",
ServerVersion, clientVersion)
}
// Client is same version or older - OK (daemon supports backward compat within major version)
return nil
}
func (s *Server) handleRequest(req *Request) Response { func (s *Server) handleRequest(req *Request) Response {
// Check version compatibility (skip for ping/health to allow version checks)
if req.Operation != OpPing && req.Operation != OpHealth {
if err := s.checkVersionCompatibility(req.ClientVersion); err != nil {
return Response{
Success: false,
Error: err.Error(),
}
}
}
switch req.Operation { switch req.Operation {
case OpPing: case OpPing:
return s.handlePing(req) return s.handlePing(req)
@@ -391,7 +461,7 @@ func updatesFromArgs(a UpdateArgs) map[string]interface{} {
func (s *Server) handlePing(_ *Request) Response { func (s *Server) handlePing(_ *Request) Response {
data, _ := json.Marshal(PingResponse{ data, _ := json.Marshal(PingResponse{
Message: "pong", Message: "pong",
Version: "0.9.8", Version: ServerVersion,
}) })
return Response{ return Response{
Success: true, Success: true,
@@ -406,7 +476,7 @@ func (s *Server) handleHealth(req *Request) Response {
if err != nil { if err != nil {
data, _ := json.Marshal(HealthResponse{ data, _ := json.Marshal(HealthResponse{
Status: "unhealthy", Status: "unhealthy",
Version: "0.9.8", Version: ServerVersion,
Uptime: time.Since(s.startTime).Seconds(), Uptime: time.Since(s.startTime).Seconds(),
Error: fmt.Sprintf("storage error: %v", err), Error: fmt.Sprintf("storage error: %v", err),
}) })
@@ -437,9 +507,19 @@ func (s *Server) handleHealth(req *Request) Response {
cacheSize := len(s.storageCache) cacheSize := len(s.storageCache)
s.cacheMu.RUnlock() s.cacheMu.RUnlock()
// Check version compatibility
compatible := true
if req.ClientVersion != "" {
if err := s.checkVersionCompatibility(req.ClientVersion); err != nil {
compatible = false
}
}
health := HealthResponse{ health := HealthResponse{
Status: status, Status: status,
Version: "0.9.8", Version: ServerVersion,
ClientVersion: req.ClientVersion,
Compatible: compatible,
Uptime: time.Since(s.startTime).Seconds(), Uptime: time.Since(s.startTime).Seconds(),
CacheSize: cacheSize, CacheSize: cacheSize,
CacheHits: atomic.LoadInt64(&s.cacheHits), CacheHits: atomic.LoadInt64(&s.cacheHits),
@@ -874,11 +954,12 @@ func (s *Server) handleBatch(req *Request) Response {
for _, op := range batchArgs.Operations { for _, op := range batchArgs.Operations {
subReq := &Request{ subReq := &Request{
Operation: op.Operation, Operation: op.Operation,
Args: op.Args, Args: op.Args,
Actor: req.Actor, Actor: req.Actor,
RequestID: req.RequestID, RequestID: req.RequestID,
Cwd: req.Cwd, // Pass through context Cwd: req.Cwd, // Pass through context
ClientVersion: req.ClientVersion, // Pass through version for compatibility checks
} }
resp := s.handleRequest(subReq) resp := s.handleRequest(subReq)