Fix daemon auto-start reliability

- Run initial sync in background so daemon becomes responsive immediately
- Skip daemon-running check for forked child process (BD_DAEMON_FOREGROUND=1)
- Fix PID file conflict between acquireDaemonLock and runDaemonLoop
- Daemon now starts reliably even with slow/failing git pulls

Fixes issue where daemon would timeout during auto-start because it was
blocked on git pull in the initial sync cycle. Now the RPC server starts
immediately and sync runs asynchronously.

Amp-Thread-ID: https://ampcode.com/threads/T-57f3c00a-02b4-4878-adba-c7d1649759b4
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-24 21:51:30 -07:00
parent b0259fe36f
commit b405eefbe0

View File

@@ -3,9 +3,7 @@ package main
import (
"context"
"encoding/json"
"errors"
"fmt"
"io/fs"
"os"
"os/exec"
"os/signal"
@@ -88,8 +86,11 @@ Use --health to check daemon health and metrics.`,
return
}
// Check if daemon is already running
if isRunning, pid := isDaemonRunning(pidFile); isRunning {
// Skip daemon-running check if we're the forked child (BD_DAEMON_FOREGROUND=1)
// because the check happens in the parent process before forking
if os.Getenv("BD_DAEMON_FOREGROUND") != "1" {
// Check if daemon is already running
if isRunning, pid := isDaemonRunning(pidFile); isRunning {
// Check if running daemon has compatible version
socketPath := getSocketPathForPID(pidFile, global)
if client, err := rpc.TryConnectWithTimeout(socketPath, 1*time.Second); err == nil && client != nil {
@@ -117,6 +118,7 @@ Use --health to check daemon health and metrics.`,
os.Exit(1)
}
}
}
// Global daemon doesn't support auto-commit/auto-push (no sync loop)
if global && (autoCommit || autoPush) {
@@ -814,35 +816,19 @@ func runDaemonLoop(interval time.Duration, autoCommit, autoPush bool, logPath, p
}
defer func() { _ = lock.Close() }()
// PID file was already written by acquireDaemonLock, but verify it has our PID
myPID := os.Getpid()
pidFileCreated := false
for attempt := 0; attempt < 2; attempt++ {
f, err := os.OpenFile(pidFile, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0600)
if err == nil {
_, _ = fmt.Fprintf(f, "%d", myPID)
_ = f.Close()
pidFileCreated = true
break
if data, err := os.ReadFile(pidFile); err == nil {
if pid, err := strconv.Atoi(strings.TrimSpace(string(data))); err == nil && pid == myPID {
// PID file is correct, continue
} else {
log("PID file has wrong PID (expected %d, got %d), overwriting", myPID, pid)
_ = os.WriteFile(pidFile, []byte(fmt.Sprintf("%d\n", myPID)), 0600)
}
if errors.Is(err, fs.ErrExist) {
if isRunning, pid := isDaemonRunning(pidFile); isRunning {
log("Daemon already running (PID %d), exiting", pid)
os.Exit(1)
}
log("Stale PID file detected, removing and retrying")
_ = os.Remove(pidFile)
continue
}
log("Error creating PID file: %v", err)
os.Exit(1)
}
if !pidFileCreated {
log("Failed to create PID file after retries")
os.Exit(1)
} else {
// PID file missing (shouldn't happen since acquireDaemonLock writes it), create it
log("PID file missing after lock acquisition, creating")
_ = os.WriteFile(pidFile, []byte(fmt.Sprintf("%d\n", myPID)), 0600)
}
defer func() { _ = os.Remove(pidFile) }()
@@ -1017,7 +1003,8 @@ func runDaemonLoop(interval time.Duration, autoCommit, autoPush bool, logPath, p
log("Sync cycle complete")
}
doSync()
// Run initial sync in background so daemon becomes responsive immediately
go doSync()
for {
select {