Fix daemon auto-start reliability

- Run initial sync in background so daemon becomes responsive immediately
- Skip daemon-running check for forked child process (BD_DAEMON_FOREGROUND=1)
- Fix PID file conflict between acquireDaemonLock and runDaemonLoop
- Daemon now starts reliably even with slow/failing git pulls

Fixes issue where daemon would timeout during auto-start because it was
blocked on git pull in the initial sync cycle. Now the RPC server starts
immediately and sync runs asynchronously.

Amp-Thread-ID: https://ampcode.com/threads/T-57f3c00a-02b4-4878-adba-c7d1649759b4
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-24 21:51:30 -07:00
parent b0259fe36f
commit b405eefbe0

View File

@@ -3,9 +3,7 @@ package main
import ( import (
"context" "context"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io/fs"
"os" "os"
"os/exec" "os/exec"
"os/signal" "os/signal"
@@ -88,8 +86,11 @@ Use --health to check daemon health and metrics.`,
return return
} }
// Check if daemon is already running // Skip daemon-running check if we're the forked child (BD_DAEMON_FOREGROUND=1)
if isRunning, pid := isDaemonRunning(pidFile); isRunning { // because the check happens in the parent process before forking
if os.Getenv("BD_DAEMON_FOREGROUND") != "1" {
// Check if daemon is already running
if isRunning, pid := isDaemonRunning(pidFile); isRunning {
// Check if running daemon has compatible version // Check if running daemon has compatible version
socketPath := getSocketPathForPID(pidFile, global) socketPath := getSocketPathForPID(pidFile, global)
if client, err := rpc.TryConnectWithTimeout(socketPath, 1*time.Second); err == nil && client != nil { if client, err := rpc.TryConnectWithTimeout(socketPath, 1*time.Second); err == nil && client != nil {
@@ -117,6 +118,7 @@ Use --health to check daemon health and metrics.`,
os.Exit(1) os.Exit(1)
} }
} }
}
// Global daemon doesn't support auto-commit/auto-push (no sync loop) // Global daemon doesn't support auto-commit/auto-push (no sync loop)
if global && (autoCommit || autoPush) { if global && (autoCommit || autoPush) {
@@ -814,35 +816,19 @@ func runDaemonLoop(interval time.Duration, autoCommit, autoPush bool, logPath, p
} }
defer func() { _ = lock.Close() }() defer func() { _ = lock.Close() }()
// PID file was already written by acquireDaemonLock, but verify it has our PID
myPID := os.Getpid() myPID := os.Getpid()
pidFileCreated := false if data, err := os.ReadFile(pidFile); err == nil {
if pid, err := strconv.Atoi(strings.TrimSpace(string(data))); err == nil && pid == myPID {
for attempt := 0; attempt < 2; attempt++ { // PID file is correct, continue
f, err := os.OpenFile(pidFile, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0600) } else {
if err == nil { log("PID file has wrong PID (expected %d, got %d), overwriting", myPID, pid)
_, _ = fmt.Fprintf(f, "%d", myPID) _ = os.WriteFile(pidFile, []byte(fmt.Sprintf("%d\n", myPID)), 0600)
_ = f.Close()
pidFileCreated = true
break
} }
} else {
if errors.Is(err, fs.ErrExist) { // PID file missing (shouldn't happen since acquireDaemonLock writes it), create it
if isRunning, pid := isDaemonRunning(pidFile); isRunning { log("PID file missing after lock acquisition, creating")
log("Daemon already running (PID %d), exiting", pid) _ = os.WriteFile(pidFile, []byte(fmt.Sprintf("%d\n", myPID)), 0600)
os.Exit(1)
}
log("Stale PID file detected, removing and retrying")
_ = os.Remove(pidFile)
continue
}
log("Error creating PID file: %v", err)
os.Exit(1)
}
if !pidFileCreated {
log("Failed to create PID file after retries")
os.Exit(1)
} }
defer func() { _ = os.Remove(pidFile) }() defer func() { _ = os.Remove(pidFile) }()
@@ -1017,7 +1003,8 @@ func runDaemonLoop(interval time.Duration, autoCommit, autoPush bool, logPath, p
log("Sync cycle complete") log("Sync cycle complete")
} }
doSync() // Run initial sync in background so daemon becomes responsive immediately
go doSync()
for { for {
select { select {