Fix bd-54: Prevent multiple daemon instances via file locking
- Implemented daemon.lock using flock (Unix) and LockFileEx (Windows) - Lock acquired before PID file, held for daemon lifetime - Eliminates race conditions in concurrent daemon starts - Backward compatible: falls back to PID check for old daemons - Updated isDaemonRunning() to check lock availability - All tests pass including new lock and backward compatibility tests Amp-Thread-ID: https://ampcode.com/threads/T-0e2627f4-03f9-4024-bb4b-21d23d296300 Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
@@ -244,21 +244,8 @@ func getLogFilePath(userPath string, global bool) (string, error) {
|
||||
}
|
||||
|
||||
func isDaemonRunning(pidFile string) (bool, int) {
|
||||
data, err := os.ReadFile(pidFile)
|
||||
if err != nil {
|
||||
return false, 0
|
||||
}
|
||||
|
||||
pid, err := strconv.Atoi(strings.TrimSpace(string(data)))
|
||||
if err != nil {
|
||||
return false, 0
|
||||
}
|
||||
|
||||
if !isProcessRunning(pid) {
|
||||
return false, 0
|
||||
}
|
||||
|
||||
return true, pid
|
||||
beadsDir := filepath.Dir(pidFile)
|
||||
return tryDaemonLock(beadsDir)
|
||||
}
|
||||
|
||||
func formatUptime(seconds float64) string {
|
||||
@@ -768,6 +755,19 @@ func runDaemonLoop(interval time.Duration, autoCommit, autoPush bool, logPath, p
|
||||
fmt.Fprintf(logF, "[%s] %s\n", timestamp, msg)
|
||||
}
|
||||
|
||||
// Acquire daemon lock FIRST - this is the single source of truth for exclusivity
|
||||
beadsDir := filepath.Dir(pidFile)
|
||||
lock, err := acquireDaemonLock(beadsDir, global)
|
||||
if err != nil {
|
||||
if err == ErrDaemonLocked {
|
||||
log("Daemon already running (lock held), exiting")
|
||||
os.Exit(1)
|
||||
}
|
||||
log("Error acquiring daemon lock: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer lock.Close()
|
||||
|
||||
myPID := os.Getpid()
|
||||
pidFileCreated := false
|
||||
|
||||
|
||||
116
cmd/bd/daemon_lock.go
Normal file
116
cmd/bd/daemon_lock.go
Normal file
@@ -0,0 +1,116 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var ErrDaemonLocked = errors.New("daemon lock already held by another process")
|
||||
|
||||
// DaemonLock represents a held lock on the daemon.lock file
|
||||
type DaemonLock struct {
|
||||
file *os.File
|
||||
path string
|
||||
}
|
||||
|
||||
// Close releases the daemon lock
|
||||
func (l *DaemonLock) Close() error {
|
||||
if l.file == nil {
|
||||
return nil
|
||||
}
|
||||
// Closing the file descriptor automatically releases the flock
|
||||
err := l.file.Close()
|
||||
l.file = nil
|
||||
return err
|
||||
}
|
||||
|
||||
// acquireDaemonLock attempts to acquire an exclusive lock on daemon.lock
|
||||
// Returns ErrDaemonLocked if another daemon is already running
|
||||
func acquireDaemonLock(beadsDir string, global bool) (*DaemonLock, error) {
|
||||
lockPath := filepath.Join(beadsDir, "daemon.lock")
|
||||
|
||||
// Open or create the lock file
|
||||
f, err := os.OpenFile(lockPath, os.O_CREATE|os.O_RDWR, 0600)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot open lock file: %w", err)
|
||||
}
|
||||
|
||||
// Try to acquire exclusive non-blocking lock
|
||||
if err := flockExclusive(f); err != nil {
|
||||
f.Close()
|
||||
if err == ErrDaemonLocked {
|
||||
return nil, ErrDaemonLocked
|
||||
}
|
||||
return nil, fmt.Errorf("cannot lock file: %w", err)
|
||||
}
|
||||
|
||||
// Write our PID to the lock file for debugging (optional)
|
||||
f.Truncate(0)
|
||||
f.Seek(0, 0)
|
||||
fmt.Fprintf(f, "%d\n", os.Getpid())
|
||||
f.Sync()
|
||||
|
||||
return &DaemonLock{file: f, path: lockPath}, nil
|
||||
}
|
||||
|
||||
// tryDaemonLock attempts to acquire and immediately release the daemon lock
|
||||
// to check if a daemon is running. Returns true if daemon is running.
|
||||
// Falls back to PID file check for backward compatibility with pre-lock daemons.
|
||||
func tryDaemonLock(beadsDir string) (running bool, pid int) {
|
||||
lockPath := filepath.Join(beadsDir, "daemon.lock")
|
||||
|
||||
// Try to open existing lock file
|
||||
f, err := os.Open(lockPath)
|
||||
if err != nil {
|
||||
// No lock file - could be old daemon without lock support
|
||||
// Fall back to PID file check for backward compatibility
|
||||
return checkPIDFile(beadsDir)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// Try to acquire lock non-blocking
|
||||
if err := flockExclusive(f); err != nil {
|
||||
if err == ErrDaemonLocked {
|
||||
// Lock is held - daemon is running
|
||||
// Try to read PID for display (best effort)
|
||||
if data := make([]byte, 32); true {
|
||||
n, _ := f.Read(data)
|
||||
if n > 0 {
|
||||
fmt.Sscanf(string(data), "%d", &pid)
|
||||
}
|
||||
}
|
||||
return true, pid
|
||||
}
|
||||
// Other errors mean we can't determine status
|
||||
return false, 0
|
||||
}
|
||||
|
||||
// We got the lock - no daemon running
|
||||
// Release immediately (file close will do this)
|
||||
return false, 0
|
||||
}
|
||||
|
||||
// checkPIDFile checks if a daemon is running by reading the PID file.
|
||||
// This is used for backward compatibility with pre-lock daemons.
|
||||
func checkPIDFile(beadsDir string) (running bool, pid int) {
|
||||
pidFile := filepath.Join(beadsDir, "daemon.pid")
|
||||
data, err := os.ReadFile(pidFile)
|
||||
if err != nil {
|
||||
return false, 0
|
||||
}
|
||||
|
||||
pidVal, err := strconv.Atoi(strings.TrimSpace(string(data)))
|
||||
if err != nil {
|
||||
return false, 0
|
||||
}
|
||||
|
||||
if !isProcessRunning(pidVal) {
|
||||
return false, 0
|
||||
}
|
||||
|
||||
return true, pidVal
|
||||
}
|
||||
209
cmd/bd/daemon_lock_test.go
Normal file
209
cmd/bd/daemon_lock_test.go
Normal file
@@ -0,0 +1,209 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestDaemonLockPreventsMultipleInstances(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
beadsDir := filepath.Join(tmpDir, ".beads")
|
||||
if err := os.MkdirAll(beadsDir, 0700); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Acquire lock
|
||||
lock1, err := acquireDaemonLock(beadsDir, false)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to acquire first lock: %v", err)
|
||||
}
|
||||
defer lock1.Close()
|
||||
|
||||
// Try to acquire lock again - should fail
|
||||
lock2, err := acquireDaemonLock(beadsDir, false)
|
||||
if err != ErrDaemonLocked {
|
||||
if lock2 != nil {
|
||||
lock2.Close()
|
||||
}
|
||||
t.Fatalf("Expected ErrDaemonLocked, got: %v", err)
|
||||
}
|
||||
|
||||
// Release first lock
|
||||
lock1.Close()
|
||||
|
||||
// Now should be able to acquire lock
|
||||
lock3, err := acquireDaemonLock(beadsDir, false)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to acquire lock after release: %v", err)
|
||||
}
|
||||
lock3.Close()
|
||||
}
|
||||
|
||||
func TestTryDaemonLockDetectsRunning(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
beadsDir := filepath.Join(tmpDir, ".beads")
|
||||
if err := os.MkdirAll(beadsDir, 0700); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Initially no daemon running
|
||||
running, _ := tryDaemonLock(beadsDir)
|
||||
if running {
|
||||
t.Fatal("Expected no daemon running initially")
|
||||
}
|
||||
|
||||
// Acquire lock
|
||||
lock, err := acquireDaemonLock(beadsDir, false)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to acquire lock: %v", err)
|
||||
}
|
||||
defer lock.Close()
|
||||
|
||||
// Now should detect daemon running
|
||||
running, pid := tryDaemonLock(beadsDir)
|
||||
if !running {
|
||||
t.Fatal("Expected daemon to be detected as running")
|
||||
}
|
||||
if pid != os.Getpid() {
|
||||
t.Errorf("Expected PID %d, got %d", os.Getpid(), pid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackwardCompatibilityWithOldDaemon(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
beadsDir := filepath.Join(tmpDir, ".beads")
|
||||
if err := os.MkdirAll(beadsDir, 0700); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Simulate old daemon: PID file exists but no lock file
|
||||
pidFile := filepath.Join(beadsDir, "daemon.pid")
|
||||
currentPID := os.Getpid()
|
||||
if err := os.WriteFile(pidFile, []byte(fmt.Sprintf("%d", currentPID)), 0644); err != nil {
|
||||
t.Fatalf("Failed to write PID file: %v", err)
|
||||
}
|
||||
|
||||
// tryDaemonLock should detect the old daemon via PID file fallback
|
||||
running, pid := tryDaemonLock(beadsDir)
|
||||
if !running {
|
||||
t.Fatal("Expected old daemon to be detected via PID file")
|
||||
}
|
||||
if pid != currentPID {
|
||||
t.Errorf("Expected PID %d, got %d", currentPID, pid)
|
||||
}
|
||||
|
||||
// Clean up PID file
|
||||
os.Remove(pidFile)
|
||||
|
||||
// Now should report no daemon running
|
||||
running, _ = tryDaemonLock(beadsDir)
|
||||
if running {
|
||||
t.Fatal("Expected no daemon running after PID file removed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultipleDaemonProcessesRace(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("Skipping race condition test in short mode")
|
||||
}
|
||||
|
||||
// Find the bd binary
|
||||
bdBinary, err := exec.LookPath("bd")
|
||||
if err != nil {
|
||||
// Try local build
|
||||
if _, err := os.Stat("./bd"); err == nil {
|
||||
bdBinary = "./bd"
|
||||
} else {
|
||||
t.Skip("bd binary not found, skipping race test")
|
||||
}
|
||||
}
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
dbPath := filepath.Join(tmpDir, ".beads", "beads.db")
|
||||
beadsDir := filepath.Dir(dbPath)
|
||||
|
||||
// Initialize a test database with git repo
|
||||
if err := os.MkdirAll(beadsDir, 0700); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Create git repo
|
||||
cmd := exec.Command("git", "init")
|
||||
cmd.Dir = tmpDir
|
||||
if err := cmd.Run(); err != nil {
|
||||
t.Fatalf("Failed to init git repo: %v", err)
|
||||
}
|
||||
|
||||
// Initialize bd
|
||||
cmd = exec.Command(bdBinary, "init", "--prefix", "test")
|
||||
cmd.Dir = tmpDir
|
||||
cmd.Env = append(os.Environ(), "BEADS_DB="+dbPath)
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
t.Fatalf("Failed to init bd: %v\nOutput: %s", err, out)
|
||||
}
|
||||
|
||||
// Try to start 5 daemons simultaneously
|
||||
numAttempts := 5
|
||||
results := make(chan error, numAttempts)
|
||||
|
||||
for i := 0; i < numAttempts; i++ {
|
||||
go func() {
|
||||
cmd := exec.Command(bdBinary, "daemon", "--interval", "10m")
|
||||
cmd.Dir = tmpDir
|
||||
cmd.Env = append(os.Environ(), "BEADS_DB="+dbPath)
|
||||
err := cmd.Start()
|
||||
if err != nil {
|
||||
results <- err
|
||||
return
|
||||
}
|
||||
|
||||
// Wait a bit for daemon to start
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
|
||||
// Check if it's still running
|
||||
if cmd.Process != nil {
|
||||
cmd.Process.Kill()
|
||||
}
|
||||
results <- cmd.Wait()
|
||||
}()
|
||||
}
|
||||
|
||||
// Wait for all attempts
|
||||
var successCount int
|
||||
var alreadyRunning int
|
||||
timeout := time.After(5 * time.Second)
|
||||
|
||||
for i := 0; i < numAttempts; i++ {
|
||||
select {
|
||||
case err := <-results:
|
||||
if err == nil {
|
||||
successCount++
|
||||
} else if strings.Contains(err.Error(), "exit status 1") {
|
||||
// Could be "already running" error
|
||||
alreadyRunning++
|
||||
}
|
||||
case <-timeout:
|
||||
t.Fatal("Test timed out waiting for daemon processes")
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up any remaining daemon files
|
||||
os.Remove(filepath.Join(beadsDir, "daemon.pid"))
|
||||
os.Remove(filepath.Join(beadsDir, "daemon.lock"))
|
||||
os.Remove(filepath.Join(beadsDir, "bd.sock"))
|
||||
|
||||
t.Logf("Results: %d success, %d already running", successCount, alreadyRunning)
|
||||
|
||||
// At most one should have succeeded in holding the lock
|
||||
// (though timing means even the first might have exited by the time we checked)
|
||||
if alreadyRunning < numAttempts-1 {
|
||||
t.Logf("Warning: Expected at least %d processes to fail with 'already running', got %d",
|
||||
numAttempts-1, alreadyRunning)
|
||||
t.Log("This could indicate a race condition, but may also be timing-related in tests")
|
||||
}
|
||||
}
|
||||
18
cmd/bd/daemon_lock_unix.go
Normal file
18
cmd/bd/daemon_lock_unix.go
Normal file
@@ -0,0 +1,18 @@
|
||||
//go:build unix
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// flockExclusive acquires an exclusive non-blocking lock on the file
|
||||
func flockExclusive(f *os.File) error {
|
||||
err := unix.Flock(int(f.Fd()), unix.LOCK_EX|unix.LOCK_NB)
|
||||
if err == unix.EWOULDBLOCK {
|
||||
return ErrDaemonLocked
|
||||
}
|
||||
return err
|
||||
}
|
||||
35
cmd/bd/daemon_lock_windows.go
Normal file
35
cmd/bd/daemon_lock_windows.go
Normal file
@@ -0,0 +1,35 @@
|
||||
//go:build windows
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
// flockExclusive acquires an exclusive non-blocking lock on the file using LockFileEx
|
||||
func flockExclusive(f *os.File) error {
|
||||
// LOCKFILE_EXCLUSIVE_LOCK (2) | LOCKFILE_FAIL_IMMEDIATELY (1) = 3
|
||||
const flags = windows.LOCKFILE_EXCLUSIVE_LOCK | windows.LOCKFILE_FAIL_IMMEDIATELY
|
||||
|
||||
// Create overlapped structure for the entire file
|
||||
ol := &windows.Overlapped{}
|
||||
|
||||
// Lock entire file (0xFFFFFFFF, 0xFFFFFFFF = maximum range)
|
||||
err := windows.LockFileEx(
|
||||
windows.Handle(f.Fd()),
|
||||
flags,
|
||||
0, // reserved
|
||||
0xFFFFFFFF, // number of bytes to lock (low)
|
||||
0xFFFFFFFF, // number of bytes to lock (high)
|
||||
ol,
|
||||
)
|
||||
|
||||
if err == windows.ERROR_LOCK_VIOLATION || err == syscall.EWOULDBLOCK {
|
||||
return ErrDaemonLocked
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
@@ -128,14 +128,18 @@ func TestIsDaemonRunning_CurrentProcess(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
pidFile := filepath.Join(tmpDir, "test.pid")
|
||||
|
||||
currentPID := os.Getpid()
|
||||
if err := os.WriteFile(pidFile, []byte(strconv.Itoa(currentPID)), 0644); err != nil {
|
||||
t.Fatalf("Failed to write PID file: %v", err)
|
||||
// Acquire the daemon lock to simulate a running daemon
|
||||
beadsDir := filepath.Dir(pidFile)
|
||||
lock, err := acquireDaemonLock(beadsDir, false)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to acquire daemon lock: %v", err)
|
||||
}
|
||||
defer lock.Close()
|
||||
|
||||
currentPID := os.Getpid()
|
||||
isRunning, pid := isDaemonRunning(pidFile)
|
||||
if !isRunning {
|
||||
t.Error("Expected daemon running (current process PID)")
|
||||
t.Error("Expected daemon running (lock held)")
|
||||
}
|
||||
if pid != currentPID {
|
||||
t.Errorf("Expected PID %d, got %d", currentPID, pid)
|
||||
|
||||
Reference in New Issue
Block a user