diff --git a/cmd/bd/daemon_autostart.go b/cmd/bd/daemon_autostart.go index 4fdd0cc0..97419154 100644 --- a/cmd/bd/daemon_autostart.go +++ b/cmd/bd/daemon_autostart.go @@ -11,6 +11,7 @@ import ( "github.com/steveyegge/beads/internal/config" "github.com/steveyegge/beads/internal/debug" + "github.com/steveyegge/beads/internal/lockfile" "github.com/steveyegge/beads/internal/rpc" "github.com/steveyegge/beads/internal/ui" ) @@ -217,7 +218,7 @@ func acquireStartLock(lockPath, socketPath string) bool { // nolint:gosec // G304: lockPath is derived from secure beads directory lockFile, err := os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0600) if err != nil { - // Lock file exists - check if it's from a dead process (stale) or alive daemon + // Lock file exists - check if daemon is actually starting lockPID, pidErr := readPIDFromFile(lockPath) if pidErr != nil || !isPIDAlive(lockPID) { // Stale lock from crashed process - clean up immediately (avoids 5s wait) @@ -227,7 +228,17 @@ func acquireStartLock(lockPath, socketPath string) bool { return acquireStartLock(lockPath, socketPath) } - // PID is alive - daemon is legitimately starting, wait for socket to be ready + // PID is alive - but is daemon actually running/starting? + // Use flock-based check as authoritative source (immune to PID reuse) + beadsDir := filepath.Dir(socketPath) + if running, _ := lockfile.TryDaemonLock(beadsDir); !running { + // Daemon lock not held - the start attempt failed or process was reused + debugLog("startlock PID %d alive but daemon lock not held, cleaning up", lockPID) + _ = os.Remove(lockPath) + return acquireStartLock(lockPath, socketPath) + } + + // Daemon lock is held - daemon is legitimately starting, wait for socket debugLog("another process (PID %d) is starting daemon, waiting for readiness", lockPID) if waitForSocketReadiness(socketPath, 5*time.Second) { return true @@ -242,11 +253,24 @@ func acquireStartLock(lockPath, socketPath string) bool { func handleStaleLock(lockPath, socketPath string) bool { lockPID, err := readPIDFromFile(lockPath) - if err == nil && !isPIDAlive(lockPID) { - debugLog("lock is stale (PID %d dead), removing and retrying", lockPID) + + // Check if PID is dead + if err != nil || !isPIDAlive(lockPID) { + debugLog("lock is stale (PID %d dead or unreadable), removing and retrying", lockPID) _ = os.Remove(lockPath) return tryAutoStartDaemon(socketPath) } + + // PID is alive - but check daemon lock as authoritative source (immune to PID reuse) + beadsDir := filepath.Dir(socketPath) + if running, _ := lockfile.TryDaemonLock(beadsDir); !running { + debugLog("lock PID %d alive but daemon lock not held, removing and retrying", lockPID) + _ = os.Remove(lockPath) + return tryAutoStartDaemon(socketPath) + } + + // Daemon lock is held - daemon is genuinely running but socket isn't ready + // This shouldn't happen normally, but don't clean up a legitimate lock return false } @@ -260,19 +284,24 @@ func handleExistingSocket(socketPath string) bool { return true } - pidFile := getPIDFileForSocket(socketPath) - if pidFile != "" { - if pid, err := readPIDFromFile(pidFile); err == nil && isPIDAlive(pid) { - debugLog("daemon PID %d alive, waiting for socket", pid) - return waitForSocketReadiness(socketPath, 5*time.Second) - } + // Use flock-based check as authoritative source (immune to PID reuse) + // If daemon lock is not held, daemon is definitely dead regardless of PID file + beadsDir := filepath.Dir(socketPath) + if running, pid := lockfile.TryDaemonLock(beadsDir); running { + debugLog("daemon lock held (PID %d), waiting for socket", pid) + return waitForSocketReadiness(socketPath, 5*time.Second) } - debugLog("socket is stale, cleaning up") + // Lock not held - daemon is dead, clean up stale artifacts + debugLog("socket is stale (daemon lock not held), cleaning up") _ = os.Remove(socketPath) // Best-effort cleanup, file may not exist + pidFile := getPIDFileForSocket(socketPath) if pidFile != "" { _ = os.Remove(pidFile) // Best-effort cleanup, file may not exist } + // Also clean up daemon.lock file (contains stale metadata) + lockFile := filepath.Join(beadsDir, "daemon.lock") + _ = os.Remove(lockFile) // Best-effort cleanup return false } diff --git a/internal/lockfile/process_unix.go b/internal/lockfile/process_unix.go index 2c9c5e45..f69c70fe 100644 --- a/internal/lockfile/process_unix.go +++ b/internal/lockfile/process_unix.go @@ -8,5 +8,8 @@ import ( // isProcessRunning checks if a process with the given PID is running func isProcessRunning(pid int) bool { + if pid <= 0 { + return false // Invalid PID (0 would signal our process group, not a specific process) + } return syscall.Kill(pid, 0) == nil } diff --git a/internal/lockfile/process_windows.go b/internal/lockfile/process_windows.go index 3912dade..4421f7b6 100644 --- a/internal/lockfile/process_windows.go +++ b/internal/lockfile/process_windows.go @@ -10,6 +10,9 @@ const stillActive = 259 // isProcessRunning checks if a process with the given PID is running func isProcessRunning(pid int) bool { + if pid <= 0 { + return false // Invalid PID + } handle, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(pid)) if err != nil { return false