Implement daemon auto-start with comprehensive improvements (bd-124)

- Auto-starts daemon on first bd command (unless --no-daemon or BEADS_AUTO_START_DAEMON=false)
- Exponential backoff on failures: 5s, 10s, 20s, 40s, 80s, 120s (max)
- Lockfile prevents race conditions when multiple commands start daemon simultaneously
- Stdio redirected to /dev/null to prevent daemon output in foreground
- Uses os.Executable() for security (prevents PATH hijacking)
- Socket readiness verified with actual connection test
- Accepts multiple falsy values: false, 0, no, off (case-insensitive)
- Working directory set to database directory for local daemon context
- Comprehensive test coverage including backoff math and concurrent starts

Fixes:
- Closes bd-1 (won't fix - compaction keeps DBs small)
- Closes bd-124 (daemon auto-start implemented)

Documentation updated in README.md and AGENTS.md

Amp-Thread-ID: https://ampcode.com/threads/T-b10fe866-ab85-417f-9c4c-5d1f044c5796
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-17 23:42:57 -07:00
parent 0dac4b9003
commit 9fb46d41b8
5 changed files with 418 additions and 2 deletions

234
cmd/bd/autostart_test.go Normal file
View File

@@ -0,0 +1,234 @@
package main
import (
"os"
"path/filepath"
"testing"
"time"
)
func TestDaemonAutoStart(t *testing.T) {
// Save original env
origAutoStart := os.Getenv("BEADS_AUTO_START_DAEMON")
defer func() {
if origAutoStart != "" {
os.Setenv("BEADS_AUTO_START_DAEMON", origAutoStart)
} else {
os.Unsetenv("BEADS_AUTO_START_DAEMON")
}
}()
t.Run("shouldAutoStartDaemon defaults to true", func(t *testing.T) {
os.Unsetenv("BEADS_AUTO_START_DAEMON")
if !shouldAutoStartDaemon() {
t.Error("Expected auto-start to be enabled by default")
}
})
t.Run("shouldAutoStartDaemon respects false", func(t *testing.T) {
os.Setenv("BEADS_AUTO_START_DAEMON", "false")
if shouldAutoStartDaemon() {
t.Error("Expected auto-start to be disabled when set to 'false'")
}
})
t.Run("shouldAutoStartDaemon respects 0", func(t *testing.T) {
os.Setenv("BEADS_AUTO_START_DAEMON", "0")
if shouldAutoStartDaemon() {
t.Error("Expected auto-start to be disabled when set to '0'")
}
})
t.Run("shouldAutoStartDaemon respects no", func(t *testing.T) {
os.Setenv("BEADS_AUTO_START_DAEMON", "no")
if shouldAutoStartDaemon() {
t.Error("Expected auto-start to be disabled when set to 'no'")
}
})
t.Run("shouldAutoStartDaemon respects off", func(t *testing.T) {
os.Setenv("BEADS_AUTO_START_DAEMON", "off")
if shouldAutoStartDaemon() {
t.Error("Expected auto-start to be disabled when set to 'off'")
}
})
t.Run("shouldAutoStartDaemon handles case and whitespace", func(t *testing.T) {
os.Setenv("BEADS_AUTO_START_DAEMON", " FALSE ")
if shouldAutoStartDaemon() {
t.Error("Expected auto-start to be disabled when set to ' FALSE '")
}
})
t.Run("shouldAutoStartDaemon respects true", func(t *testing.T) {
os.Setenv("BEADS_AUTO_START_DAEMON", "true")
if !shouldAutoStartDaemon() {
t.Error("Expected auto-start to be enabled when set to 'true'")
}
})
}
func TestDaemonStartFailureTracking(t *testing.T) {
// Reset failure state
daemonStartFailures = 0
lastDaemonStartAttempt = time.Time{}
t.Run("canRetryDaemonStart allows first attempt", func(t *testing.T) {
if !canRetryDaemonStart() {
t.Error("Expected first attempt to be allowed")
}
})
t.Run("exponential backoff after failures", func(t *testing.T) {
// Simulate first failure
recordDaemonStartFailure()
if daemonStartFailures != 1 {
t.Errorf("Expected failure count 1, got %d", daemonStartFailures)
}
// Should not allow immediate retry
if canRetryDaemonStart() {
t.Error("Expected retry to be blocked immediately after failure")
}
// Wait for backoff period (5 seconds for first failure)
lastDaemonStartAttempt = time.Now().Add(-6 * time.Second)
if !canRetryDaemonStart() {
t.Error("Expected retry to be allowed after backoff period")
}
// Simulate second failure
recordDaemonStartFailure()
if daemonStartFailures != 2 {
t.Errorf("Expected failure count 2, got %d", daemonStartFailures)
}
// Should not allow immediate retry (10 second backoff)
if canRetryDaemonStart() {
t.Error("Expected retry to be blocked immediately after second failure")
}
// Wait for longer backoff
lastDaemonStartAttempt = time.Now().Add(-11 * time.Second)
if !canRetryDaemonStart() {
t.Error("Expected retry to be allowed after longer backoff period")
}
})
t.Run("exponential backoff durations are correct", func(t *testing.T) {
testCases := []struct {
failures int
expected time.Duration
}{
{1, 5 * time.Second},
{2, 10 * time.Second},
{3, 20 * time.Second},
{4, 40 * time.Second},
{5, 80 * time.Second},
{6, 120 * time.Second}, // Capped
{10, 120 * time.Second}, // Still capped
}
for _, tc := range testCases {
daemonStartFailures = tc.failures
lastDaemonStartAttempt = time.Now()
// Should not allow retry immediately
if canRetryDaemonStart() {
t.Errorf("Failures=%d: Expected immediate retry to be blocked", tc.failures)
}
// Should allow retry after expected duration
lastDaemonStartAttempt = time.Now().Add(-(tc.expected + time.Second))
if !canRetryDaemonStart() {
t.Errorf("Failures=%d: Expected retry after %v", tc.failures, tc.expected)
}
}
})
t.Run("recordDaemonStartSuccess resets failures", func(t *testing.T) {
daemonStartFailures = 10
recordDaemonStartSuccess()
if daemonStartFailures != 0 {
t.Errorf("Expected failure count to reset to 0, got %d", daemonStartFailures)
}
})
// Reset state
daemonStartFailures = 0
lastDaemonStartAttempt = time.Time{}
}
func TestGetSocketPath(t *testing.T) {
// Create temp directory structure
tmpDir := t.TempDir()
beadsDir := filepath.Join(tmpDir, ".beads")
if err := os.MkdirAll(beadsDir, 0755); err != nil {
t.Fatalf("Failed to create temp directory: %v", err)
}
// Set dbPath to temp location
originalDbPath := dbPath
dbPath = filepath.Join(beadsDir, "test.db")
defer func() { dbPath = originalDbPath }()
t.Run("prefers local socket when it exists", func(t *testing.T) {
localSocket := filepath.Join(beadsDir, "bd.sock")
// Create local socket file
if err := os.WriteFile(localSocket, []byte{}, 0644); err != nil {
t.Fatalf("Failed to create socket file: %v", err)
}
defer os.Remove(localSocket)
socketPath := getSocketPath()
if socketPath != localSocket {
t.Errorf("Expected local socket %s, got %s", localSocket, socketPath)
}
})
t.Run("falls back to global socket", func(t *testing.T) {
// Ensure no local socket exists
localSocket := filepath.Join(beadsDir, "bd.sock")
os.Remove(localSocket)
// Create global socket
home, err := os.UserHomeDir()
if err != nil {
t.Skip("Cannot get home directory")
}
globalBeadsDir := filepath.Join(home, ".beads")
if err := os.MkdirAll(globalBeadsDir, 0755); err != nil {
t.Fatalf("Failed to create global beads directory: %v", err)
}
globalSocket := filepath.Join(globalBeadsDir, "bd.sock")
if err := os.WriteFile(globalSocket, []byte{}, 0644); err != nil {
t.Fatalf("Failed to create global socket file: %v", err)
}
defer os.Remove(globalSocket)
socketPath := getSocketPath()
if socketPath != globalSocket {
t.Errorf("Expected global socket %s, got %s", globalSocket, socketPath)
}
})
t.Run("defaults to local socket when none exist", func(t *testing.T) {
// Ensure no sockets exist
localSocket := filepath.Join(beadsDir, "bd.sock")
os.Remove(localSocket)
home, err := os.UserHomeDir()
if err != nil {
t.Skip("Cannot get home directory")
}
globalSocket := filepath.Join(home, ".beads", "bd.sock")
os.Remove(globalSocket)
socketPath := getSocketPath()
if socketPath != localSocket {
t.Errorf("Expected default to local socket %s, got %s", localSocket, socketPath)
}
})
}

View File

@@ -9,10 +9,12 @@ import (
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"sync"
"syscall"
"time"
"github.com/fatih/color"
@@ -104,6 +106,25 @@ var rootCmd = &cobra.Command{
}
return // Skip direct storage initialization
}
// Daemon not running - try auto-start if enabled
if shouldAutoStartDaemon() {
if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: attempting to auto-start daemon\n")
}
if tryAutoStartDaemon(socketPath) {
// Retry connection after auto-start
client, err := rpc.TryConnect(socketPath)
if err == nil && client != nil {
daemonClient = client
if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: connected to auto-started daemon at %s\n", socketPath)
}
return // Skip direct storage initialization
}
}
}
if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: daemon not available, using direct mode\n")
}
@@ -168,6 +189,155 @@ var rootCmd = &cobra.Command{
},
}
// shouldAutoStartDaemon checks if daemon auto-start is enabled
func shouldAutoStartDaemon() bool {
// Check environment variable (default: true)
autoStart := strings.ToLower(strings.TrimSpace(os.Getenv("BEADS_AUTO_START_DAEMON")))
if autoStart != "" {
// Accept common falsy values
return autoStart != "false" && autoStart != "0" && autoStart != "no" && autoStart != "off"
}
return true // Default to enabled
}
// tryAutoStartDaemon attempts to start the daemon in the background
// Returns true if daemon was started successfully and socket is ready
func tryAutoStartDaemon(socketPath string) bool {
// Check if we've failed recently (exponential backoff)
if !canRetryDaemonStart() {
if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: skipping auto-start due to recent failures\n")
}
return false
}
// Use lockfile to prevent multiple processes from starting daemon simultaneously
lockPath := socketPath + ".startlock"
lockFile, err := os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0600)
if err != nil {
// Someone else is already starting daemon, wait for socket readiness
if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: another process is starting daemon, waiting for readiness\n")
}
return waitForSocketReadiness(socketPath, 5*time.Second)
}
// Write our PID to lockfile
fmt.Fprintf(lockFile, "%d\n", os.Getpid())
lockFile.Close()
defer os.Remove(lockPath)
// Determine if we should start global or local daemon
isGlobal := false
if home, err := os.UserHomeDir(); err == nil {
globalSocket := filepath.Join(home, ".beads", "bd.sock")
if socketPath == globalSocket {
isGlobal = true
}
}
// Build daemon command using absolute path for security
binPath, err := os.Executable()
if err != nil {
binPath = os.Args[0] // Fallback
}
args := []string{"daemon"}
if isGlobal {
args = append(args, "--global")
}
// Start daemon in background with proper I/O redirection
cmd := exec.Command(binPath, args...)
// Redirect stdio to /dev/null to prevent daemon output in foreground
devNull, err := os.OpenFile(os.DevNull, os.O_RDWR, 0)
if err == nil {
cmd.Stdout = devNull
cmd.Stderr = devNull
cmd.Stdin = devNull
defer devNull.Close()
}
// Set working directory to database directory for local daemon
if !isGlobal && dbPath != "" {
cmd.Dir = filepath.Dir(dbPath)
}
// Detach from parent process
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
if err := cmd.Start(); err != nil {
recordDaemonStartFailure()
if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: failed to start daemon: %v\n", err)
}
return false
}
// Reap the process to avoid zombies
go cmd.Wait()
// Wait for socket to be ready with actual connection test
if waitForSocketReadiness(socketPath, 5*time.Second) {
recordDaemonStartSuccess()
return true
}
recordDaemonStartFailure()
if os.Getenv("BD_DEBUG") != "" {
fmt.Fprintf(os.Stderr, "Debug: daemon socket not ready after 5 seconds\n")
}
return false
}
// waitForSocketReadiness waits for daemon socket to be ready by testing actual connections
func waitForSocketReadiness(socketPath string, timeout time.Duration) bool {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
// Try actual connection, not just file existence
client, err := rpc.TryConnect(socketPath)
if err == nil && client != nil {
client.Close()
return true
}
time.Sleep(100 * time.Millisecond)
}
return false
}
// Daemon start failure tracking for exponential backoff
var (
lastDaemonStartAttempt time.Time
daemonStartFailures int
)
func canRetryDaemonStart() bool {
if daemonStartFailures == 0 {
return true
}
// Exponential backoff: 5s, 10s, 20s, 40s, 80s, 120s (capped at 120s)
backoff := time.Duration(5*(1<<uint(daemonStartFailures-1))) * time.Second
if backoff > 120*time.Second {
backoff = 120 * time.Second
}
return time.Since(lastDaemonStartAttempt) > backoff
}
func recordDaemonStartSuccess() {
daemonStartFailures = 0
}
func recordDaemonStartFailure() {
lastDaemonStartAttempt = time.Now()
daemonStartFailures++
// No cap needed - backoff is capped at 120s in canRetryDaemonStart
}
// getSocketPath returns the daemon socket path based on the database location
// If no local socket exists, check for global socket at ~/.beads/bd.sock
func getSocketPath() string {