Implement daemon auto-start with comprehensive improvements (bd-124)
- Auto-starts daemon on first bd command (unless --no-daemon or BEADS_AUTO_START_DAEMON=false) - Exponential backoff on failures: 5s, 10s, 20s, 40s, 80s, 120s (max) - Lockfile prevents race conditions when multiple commands start daemon simultaneously - Stdio redirected to /dev/null to prevent daemon output in foreground - Uses os.Executable() for security (prevents PATH hijacking) - Socket readiness verified with actual connection test - Accepts multiple falsy values: false, 0, no, off (case-insensitive) - Working directory set to database directory for local daemon context - Comprehensive test coverage including backoff math and concurrent starts Fixes: - Closes bd-1 (won't fix - compaction keeps DBs small) - Closes bd-124 (daemon auto-start implemented) Documentation updated in README.md and AGENTS.md Amp-Thread-ID: https://ampcode.com/threads/T-b10fe866-ab85-417f-9c4c-5d1f044c5796 Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
234
cmd/bd/autostart_test.go
Normal file
234
cmd/bd/autostart_test.go
Normal file
@@ -0,0 +1,234 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestDaemonAutoStart(t *testing.T) {
|
||||
// Save original env
|
||||
origAutoStart := os.Getenv("BEADS_AUTO_START_DAEMON")
|
||||
defer func() {
|
||||
if origAutoStart != "" {
|
||||
os.Setenv("BEADS_AUTO_START_DAEMON", origAutoStart)
|
||||
} else {
|
||||
os.Unsetenv("BEADS_AUTO_START_DAEMON")
|
||||
}
|
||||
}()
|
||||
|
||||
t.Run("shouldAutoStartDaemon defaults to true", func(t *testing.T) {
|
||||
os.Unsetenv("BEADS_AUTO_START_DAEMON")
|
||||
if !shouldAutoStartDaemon() {
|
||||
t.Error("Expected auto-start to be enabled by default")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("shouldAutoStartDaemon respects false", func(t *testing.T) {
|
||||
os.Setenv("BEADS_AUTO_START_DAEMON", "false")
|
||||
if shouldAutoStartDaemon() {
|
||||
t.Error("Expected auto-start to be disabled when set to 'false'")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("shouldAutoStartDaemon respects 0", func(t *testing.T) {
|
||||
os.Setenv("BEADS_AUTO_START_DAEMON", "0")
|
||||
if shouldAutoStartDaemon() {
|
||||
t.Error("Expected auto-start to be disabled when set to '0'")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("shouldAutoStartDaemon respects no", func(t *testing.T) {
|
||||
os.Setenv("BEADS_AUTO_START_DAEMON", "no")
|
||||
if shouldAutoStartDaemon() {
|
||||
t.Error("Expected auto-start to be disabled when set to 'no'")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("shouldAutoStartDaemon respects off", func(t *testing.T) {
|
||||
os.Setenv("BEADS_AUTO_START_DAEMON", "off")
|
||||
if shouldAutoStartDaemon() {
|
||||
t.Error("Expected auto-start to be disabled when set to 'off'")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("shouldAutoStartDaemon handles case and whitespace", func(t *testing.T) {
|
||||
os.Setenv("BEADS_AUTO_START_DAEMON", " FALSE ")
|
||||
if shouldAutoStartDaemon() {
|
||||
t.Error("Expected auto-start to be disabled when set to ' FALSE '")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("shouldAutoStartDaemon respects true", func(t *testing.T) {
|
||||
os.Setenv("BEADS_AUTO_START_DAEMON", "true")
|
||||
if !shouldAutoStartDaemon() {
|
||||
t.Error("Expected auto-start to be enabled when set to 'true'")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestDaemonStartFailureTracking(t *testing.T) {
|
||||
// Reset failure state
|
||||
daemonStartFailures = 0
|
||||
lastDaemonStartAttempt = time.Time{}
|
||||
|
||||
t.Run("canRetryDaemonStart allows first attempt", func(t *testing.T) {
|
||||
if !canRetryDaemonStart() {
|
||||
t.Error("Expected first attempt to be allowed")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("exponential backoff after failures", func(t *testing.T) {
|
||||
// Simulate first failure
|
||||
recordDaemonStartFailure()
|
||||
if daemonStartFailures != 1 {
|
||||
t.Errorf("Expected failure count 1, got %d", daemonStartFailures)
|
||||
}
|
||||
|
||||
// Should not allow immediate retry
|
||||
if canRetryDaemonStart() {
|
||||
t.Error("Expected retry to be blocked immediately after failure")
|
||||
}
|
||||
|
||||
// Wait for backoff period (5 seconds for first failure)
|
||||
lastDaemonStartAttempt = time.Now().Add(-6 * time.Second)
|
||||
if !canRetryDaemonStart() {
|
||||
t.Error("Expected retry to be allowed after backoff period")
|
||||
}
|
||||
|
||||
// Simulate second failure
|
||||
recordDaemonStartFailure()
|
||||
if daemonStartFailures != 2 {
|
||||
t.Errorf("Expected failure count 2, got %d", daemonStartFailures)
|
||||
}
|
||||
|
||||
// Should not allow immediate retry (10 second backoff)
|
||||
if canRetryDaemonStart() {
|
||||
t.Error("Expected retry to be blocked immediately after second failure")
|
||||
}
|
||||
|
||||
// Wait for longer backoff
|
||||
lastDaemonStartAttempt = time.Now().Add(-11 * time.Second)
|
||||
if !canRetryDaemonStart() {
|
||||
t.Error("Expected retry to be allowed after longer backoff period")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("exponential backoff durations are correct", func(t *testing.T) {
|
||||
testCases := []struct {
|
||||
failures int
|
||||
expected time.Duration
|
||||
}{
|
||||
{1, 5 * time.Second},
|
||||
{2, 10 * time.Second},
|
||||
{3, 20 * time.Second},
|
||||
{4, 40 * time.Second},
|
||||
{5, 80 * time.Second},
|
||||
{6, 120 * time.Second}, // Capped
|
||||
{10, 120 * time.Second}, // Still capped
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
daemonStartFailures = tc.failures
|
||||
lastDaemonStartAttempt = time.Now()
|
||||
|
||||
// Should not allow retry immediately
|
||||
if canRetryDaemonStart() {
|
||||
t.Errorf("Failures=%d: Expected immediate retry to be blocked", tc.failures)
|
||||
}
|
||||
|
||||
// Should allow retry after expected duration
|
||||
lastDaemonStartAttempt = time.Now().Add(-(tc.expected + time.Second))
|
||||
if !canRetryDaemonStart() {
|
||||
t.Errorf("Failures=%d: Expected retry after %v", tc.failures, tc.expected)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("recordDaemonStartSuccess resets failures", func(t *testing.T) {
|
||||
daemonStartFailures = 10
|
||||
recordDaemonStartSuccess()
|
||||
if daemonStartFailures != 0 {
|
||||
t.Errorf("Expected failure count to reset to 0, got %d", daemonStartFailures)
|
||||
}
|
||||
})
|
||||
|
||||
// Reset state
|
||||
daemonStartFailures = 0
|
||||
lastDaemonStartAttempt = time.Time{}
|
||||
}
|
||||
|
||||
func TestGetSocketPath(t *testing.T) {
|
||||
// Create temp directory structure
|
||||
tmpDir := t.TempDir()
|
||||
beadsDir := filepath.Join(tmpDir, ".beads")
|
||||
if err := os.MkdirAll(beadsDir, 0755); err != nil {
|
||||
t.Fatalf("Failed to create temp directory: %v", err)
|
||||
}
|
||||
|
||||
// Set dbPath to temp location
|
||||
originalDbPath := dbPath
|
||||
dbPath = filepath.Join(beadsDir, "test.db")
|
||||
defer func() { dbPath = originalDbPath }()
|
||||
|
||||
t.Run("prefers local socket when it exists", func(t *testing.T) {
|
||||
localSocket := filepath.Join(beadsDir, "bd.sock")
|
||||
|
||||
// Create local socket file
|
||||
if err := os.WriteFile(localSocket, []byte{}, 0644); err != nil {
|
||||
t.Fatalf("Failed to create socket file: %v", err)
|
||||
}
|
||||
defer os.Remove(localSocket)
|
||||
|
||||
socketPath := getSocketPath()
|
||||
if socketPath != localSocket {
|
||||
t.Errorf("Expected local socket %s, got %s", localSocket, socketPath)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("falls back to global socket", func(t *testing.T) {
|
||||
// Ensure no local socket exists
|
||||
localSocket := filepath.Join(beadsDir, "bd.sock")
|
||||
os.Remove(localSocket)
|
||||
|
||||
// Create global socket
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
t.Skip("Cannot get home directory")
|
||||
}
|
||||
globalBeadsDir := filepath.Join(home, ".beads")
|
||||
if err := os.MkdirAll(globalBeadsDir, 0755); err != nil {
|
||||
t.Fatalf("Failed to create global beads directory: %v", err)
|
||||
}
|
||||
globalSocket := filepath.Join(globalBeadsDir, "bd.sock")
|
||||
|
||||
if err := os.WriteFile(globalSocket, []byte{}, 0644); err != nil {
|
||||
t.Fatalf("Failed to create global socket file: %v", err)
|
||||
}
|
||||
defer os.Remove(globalSocket)
|
||||
|
||||
socketPath := getSocketPath()
|
||||
if socketPath != globalSocket {
|
||||
t.Errorf("Expected global socket %s, got %s", globalSocket, socketPath)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("defaults to local socket when none exist", func(t *testing.T) {
|
||||
// Ensure no sockets exist
|
||||
localSocket := filepath.Join(beadsDir, "bd.sock")
|
||||
os.Remove(localSocket)
|
||||
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
t.Skip("Cannot get home directory")
|
||||
}
|
||||
globalSocket := filepath.Join(home, ".beads", "bd.sock")
|
||||
os.Remove(globalSocket)
|
||||
|
||||
socketPath := getSocketPath()
|
||||
if socketPath != localSocket {
|
||||
t.Errorf("Expected default to local socket %s, got %s", localSocket, socketPath)
|
||||
}
|
||||
})
|
||||
}
|
||||
170
cmd/bd/main.go
170
cmd/bd/main.go
@@ -9,10 +9,12 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/fatih/color"
|
||||
@@ -104,6 +106,25 @@ var rootCmd = &cobra.Command{
|
||||
}
|
||||
return // Skip direct storage initialization
|
||||
}
|
||||
|
||||
// Daemon not running - try auto-start if enabled
|
||||
if shouldAutoStartDaemon() {
|
||||
if os.Getenv("BD_DEBUG") != "" {
|
||||
fmt.Fprintf(os.Stderr, "Debug: attempting to auto-start daemon\n")
|
||||
}
|
||||
if tryAutoStartDaemon(socketPath) {
|
||||
// Retry connection after auto-start
|
||||
client, err := rpc.TryConnect(socketPath)
|
||||
if err == nil && client != nil {
|
||||
daemonClient = client
|
||||
if os.Getenv("BD_DEBUG") != "" {
|
||||
fmt.Fprintf(os.Stderr, "Debug: connected to auto-started daemon at %s\n", socketPath)
|
||||
}
|
||||
return // Skip direct storage initialization
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if os.Getenv("BD_DEBUG") != "" {
|
||||
fmt.Fprintf(os.Stderr, "Debug: daemon not available, using direct mode\n")
|
||||
}
|
||||
@@ -168,6 +189,155 @@ var rootCmd = &cobra.Command{
|
||||
},
|
||||
}
|
||||
|
||||
// shouldAutoStartDaemon checks if daemon auto-start is enabled
|
||||
func shouldAutoStartDaemon() bool {
|
||||
// Check environment variable (default: true)
|
||||
autoStart := strings.ToLower(strings.TrimSpace(os.Getenv("BEADS_AUTO_START_DAEMON")))
|
||||
if autoStart != "" {
|
||||
// Accept common falsy values
|
||||
return autoStart != "false" && autoStart != "0" && autoStart != "no" && autoStart != "off"
|
||||
}
|
||||
return true // Default to enabled
|
||||
}
|
||||
|
||||
// tryAutoStartDaemon attempts to start the daemon in the background
|
||||
// Returns true if daemon was started successfully and socket is ready
|
||||
func tryAutoStartDaemon(socketPath string) bool {
|
||||
// Check if we've failed recently (exponential backoff)
|
||||
if !canRetryDaemonStart() {
|
||||
if os.Getenv("BD_DEBUG") != "" {
|
||||
fmt.Fprintf(os.Stderr, "Debug: skipping auto-start due to recent failures\n")
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Use lockfile to prevent multiple processes from starting daemon simultaneously
|
||||
lockPath := socketPath + ".startlock"
|
||||
lockFile, err := os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0600)
|
||||
if err != nil {
|
||||
// Someone else is already starting daemon, wait for socket readiness
|
||||
if os.Getenv("BD_DEBUG") != "" {
|
||||
fmt.Fprintf(os.Stderr, "Debug: another process is starting daemon, waiting for readiness\n")
|
||||
}
|
||||
return waitForSocketReadiness(socketPath, 5*time.Second)
|
||||
}
|
||||
|
||||
// Write our PID to lockfile
|
||||
fmt.Fprintf(lockFile, "%d\n", os.Getpid())
|
||||
lockFile.Close()
|
||||
defer os.Remove(lockPath)
|
||||
|
||||
// Determine if we should start global or local daemon
|
||||
isGlobal := false
|
||||
if home, err := os.UserHomeDir(); err == nil {
|
||||
globalSocket := filepath.Join(home, ".beads", "bd.sock")
|
||||
if socketPath == globalSocket {
|
||||
isGlobal = true
|
||||
}
|
||||
}
|
||||
|
||||
// Build daemon command using absolute path for security
|
||||
binPath, err := os.Executable()
|
||||
if err != nil {
|
||||
binPath = os.Args[0] // Fallback
|
||||
}
|
||||
|
||||
args := []string{"daemon"}
|
||||
if isGlobal {
|
||||
args = append(args, "--global")
|
||||
}
|
||||
|
||||
// Start daemon in background with proper I/O redirection
|
||||
cmd := exec.Command(binPath, args...)
|
||||
|
||||
// Redirect stdio to /dev/null to prevent daemon output in foreground
|
||||
devNull, err := os.OpenFile(os.DevNull, os.O_RDWR, 0)
|
||||
if err == nil {
|
||||
cmd.Stdout = devNull
|
||||
cmd.Stderr = devNull
|
||||
cmd.Stdin = devNull
|
||||
defer devNull.Close()
|
||||
}
|
||||
|
||||
// Set working directory to database directory for local daemon
|
||||
if !isGlobal && dbPath != "" {
|
||||
cmd.Dir = filepath.Dir(dbPath)
|
||||
}
|
||||
|
||||
// Detach from parent process
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{
|
||||
Setpgid: true,
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
recordDaemonStartFailure()
|
||||
if os.Getenv("BD_DEBUG") != "" {
|
||||
fmt.Fprintf(os.Stderr, "Debug: failed to start daemon: %v\n", err)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Reap the process to avoid zombies
|
||||
go cmd.Wait()
|
||||
|
||||
// Wait for socket to be ready with actual connection test
|
||||
if waitForSocketReadiness(socketPath, 5*time.Second) {
|
||||
recordDaemonStartSuccess()
|
||||
return true
|
||||
}
|
||||
|
||||
recordDaemonStartFailure()
|
||||
if os.Getenv("BD_DEBUG") != "" {
|
||||
fmt.Fprintf(os.Stderr, "Debug: daemon socket not ready after 5 seconds\n")
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// waitForSocketReadiness waits for daemon socket to be ready by testing actual connections
|
||||
func waitForSocketReadiness(socketPath string, timeout time.Duration) bool {
|
||||
deadline := time.Now().Add(timeout)
|
||||
for time.Now().Before(deadline) {
|
||||
// Try actual connection, not just file existence
|
||||
client, err := rpc.TryConnect(socketPath)
|
||||
if err == nil && client != nil {
|
||||
client.Close()
|
||||
return true
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Daemon start failure tracking for exponential backoff
|
||||
var (
|
||||
lastDaemonStartAttempt time.Time
|
||||
daemonStartFailures int
|
||||
)
|
||||
|
||||
func canRetryDaemonStart() bool {
|
||||
if daemonStartFailures == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
// Exponential backoff: 5s, 10s, 20s, 40s, 80s, 120s (capped at 120s)
|
||||
backoff := time.Duration(5*(1<<uint(daemonStartFailures-1))) * time.Second
|
||||
if backoff > 120*time.Second {
|
||||
backoff = 120 * time.Second
|
||||
}
|
||||
|
||||
return time.Since(lastDaemonStartAttempt) > backoff
|
||||
}
|
||||
|
||||
func recordDaemonStartSuccess() {
|
||||
daemonStartFailures = 0
|
||||
}
|
||||
|
||||
func recordDaemonStartFailure() {
|
||||
lastDaemonStartAttempt = time.Now()
|
||||
daemonStartFailures++
|
||||
// No cap needed - backoff is capped at 120s in canRetryDaemonStart
|
||||
}
|
||||
|
||||
// getSocketPath returns the daemon socket path based on the database location
|
||||
// If no local socket exists, check for global socket at ~/.beads/bd.sock
|
||||
func getSocketPath() string {
|
||||
|
||||
Reference in New Issue
Block a user