diff --git a/internal/cmd/daemon.go b/internal/cmd/daemon.go index 554c72e3..1c08d929 100644 --- a/internal/cmd/daemon.go +++ b/internal/cmd/daemon.go @@ -117,7 +117,7 @@ func runDaemonStart(cmd *cobra.Command, args []string) error { return fmt.Errorf("starting daemon: %w", err) } - // Wait a moment for the daemon to initialize + // Wait a moment for the daemon to initialize and acquire the lock time.Sleep(200 * time.Millisecond) // Verify it started @@ -129,6 +129,15 @@ func runDaemonStart(cmd *cobra.Command, args []string) error { return fmt.Errorf("daemon failed to start (check logs with 'gt daemon logs')") } + // Check if our spawned process is the one that won the race. + // If another concurrent start won, our process would have exited after + // failing to acquire the lock, and the PID file would have a different PID. + if pid != daemonCmd.Process.Pid { + // Another daemon won the race - that's fine, report it + fmt.Printf("%s Daemon already running (PID %d)\n", style.Bold.Render("●"), pid) + return nil + } + fmt.Printf("%s Daemon started (PID %d)\n", style.Bold.Render("✓"), pid) return nil } diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index 53835018..7ac908d6 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -67,6 +67,22 @@ func New(config *Config) (*Daemon, error) { func (d *Daemon) Run() error { d.logger.Printf("Daemon starting (PID %d)", os.Getpid()) + // Acquire exclusive lock to prevent multiple daemons from running. + // This prevents the TOCTOU race condition where multiple concurrent starts + // can all pass the IsRunning() check before any writes the PID file. + lockFile := filepath.Join(d.config.TownRoot, "daemon", "daemon.lock") + lock, err := os.OpenFile(lockFile, os.O_CREATE|os.O_RDWR, 0644) + if err != nil { + return fmt.Errorf("opening lock file: %w", err) + } + defer lock.Close() + + // Try to acquire exclusive lock (non-blocking) + if err := syscall.Flock(int(lock.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err != nil { + return fmt.Errorf("daemon already running (lock held by another process)") + } + defer func() { _ = syscall.Flock(int(lock.Fd()), syscall.LOCK_UN) }() + // Write PID file if err := os.WriteFile(d.config.PidFile, []byte(strconv.Itoa(os.Getpid())), 0644); err != nil { return fmt.Errorf("writing PID file: %w", err) @@ -684,6 +700,9 @@ func (d *Daemon) Stop() { } // IsRunning checks if a daemon is running for the given town. +// It checks the PID file and verifies the process is alive. +// Note: The file lock in Run() is the authoritative mechanism for preventing +// duplicate daemons. This function is for status checks and cleanup. func IsRunning(townRoot string) (bool, int, error) { pidFile := filepath.Join(townRoot, "daemon", "daemon.pid") data, err := os.ReadFile(pidFile) @@ -708,7 +727,7 @@ func IsRunning(townRoot string) (bool, int, error) { // On Unix, FindProcess always succeeds. Send signal 0 to check if alive. err = process.Signal(syscall.Signal(0)) if err != nil { - // Process not running, clean up stale PID file (best-effort cleanup) + // Process not running, clean up stale PID file _ = os.Remove(pidFile) return false, 0, nil } @@ -717,6 +736,8 @@ func IsRunning(townRoot string) (bool, int, error) { } // StopDaemon stops the running daemon for the given town. +// Note: The file lock in Run() prevents multiple daemons per town, so we only +// need to kill the process from the PID file. func StopDaemon(townRoot string) error { running, pid, err := IsRunning(townRoot) if err != nil { @@ -741,11 +762,11 @@ func StopDaemon(townRoot string) error { // Check if still running if err := process.Signal(syscall.Signal(0)); err == nil { - // Still running, force kill (best-effort) + // Still running, force kill _ = process.Signal(syscall.SIGKILL) } - // Clean up PID file (best-effort cleanup) + // Clean up PID file pidFile := filepath.Join(townRoot, "daemon", "daemon.pid") _ = os.Remove(pidFile)