From c25ff34b5bb6c8ccba522d418355e2d4fdbcc38b Mon Sep 17 00:00:00 2001 From: jakehemmerle Date: Sun, 4 Jan 2026 15:06:23 -0500 Subject: [PATCH] fix(daemon): prevent orphan daemons via file locking Add syscall.Flock() exclusive lock in daemon.Run() to prevent TOCTOU race condition where concurrent 'gt daemon start' commands could spawn multiple daemons. Only the first to acquire the lock succeeds; others exit cleanly. Lock is per-town (in townRoot/daemon/daemon.lock) so multiple GT instances from different directories work independently. Also detect race losers in runDaemonStart() by comparing spawned PID with PID file, reporting 'already running' instead of false success. --- internal/cmd/daemon.go | 11 ++++++++++- internal/daemon/daemon.go | 27 ++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/internal/cmd/daemon.go b/internal/cmd/daemon.go index 554c72e3..1c08d929 100644 --- a/internal/cmd/daemon.go +++ b/internal/cmd/daemon.go @@ -117,7 +117,7 @@ func runDaemonStart(cmd *cobra.Command, args []string) error { return fmt.Errorf("starting daemon: %w", err) } - // Wait a moment for the daemon to initialize + // Wait a moment for the daemon to initialize and acquire the lock time.Sleep(200 * time.Millisecond) // Verify it started @@ -129,6 +129,15 @@ func runDaemonStart(cmd *cobra.Command, args []string) error { return fmt.Errorf("daemon failed to start (check logs with 'gt daemon logs')") } + // Check if our spawned process is the one that won the race. + // If another concurrent start won, our process would have exited after + // failing to acquire the lock, and the PID file would have a different PID. + if pid != daemonCmd.Process.Pid { + // Another daemon won the race - that's fine, report it + fmt.Printf("%s Daemon already running (PID %d)\n", style.Bold.Render("●"), pid) + return nil + } + fmt.Printf("%s Daemon started (PID %d)\n", style.Bold.Render("✓"), pid) return nil } diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index 53835018..7ac908d6 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -67,6 +67,22 @@ func New(config *Config) (*Daemon, error) { func (d *Daemon) Run() error { d.logger.Printf("Daemon starting (PID %d)", os.Getpid()) + // Acquire exclusive lock to prevent multiple daemons from running. + // This prevents the TOCTOU race condition where multiple concurrent starts + // can all pass the IsRunning() check before any writes the PID file. + lockFile := filepath.Join(d.config.TownRoot, "daemon", "daemon.lock") + lock, err := os.OpenFile(lockFile, os.O_CREATE|os.O_RDWR, 0644) + if err != nil { + return fmt.Errorf("opening lock file: %w", err) + } + defer lock.Close() + + // Try to acquire exclusive lock (non-blocking) + if err := syscall.Flock(int(lock.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err != nil { + return fmt.Errorf("daemon already running (lock held by another process)") + } + defer func() { _ = syscall.Flock(int(lock.Fd()), syscall.LOCK_UN) }() + // Write PID file if err := os.WriteFile(d.config.PidFile, []byte(strconv.Itoa(os.Getpid())), 0644); err != nil { return fmt.Errorf("writing PID file: %w", err) @@ -684,6 +700,9 @@ func (d *Daemon) Stop() { } // IsRunning checks if a daemon is running for the given town. +// It checks the PID file and verifies the process is alive. +// Note: The file lock in Run() is the authoritative mechanism for preventing +// duplicate daemons. This function is for status checks and cleanup. func IsRunning(townRoot string) (bool, int, error) { pidFile := filepath.Join(townRoot, "daemon", "daemon.pid") data, err := os.ReadFile(pidFile) @@ -708,7 +727,7 @@ func IsRunning(townRoot string) (bool, int, error) { // On Unix, FindProcess always succeeds. Send signal 0 to check if alive. err = process.Signal(syscall.Signal(0)) if err != nil { - // Process not running, clean up stale PID file (best-effort cleanup) + // Process not running, clean up stale PID file _ = os.Remove(pidFile) return false, 0, nil } @@ -717,6 +736,8 @@ func IsRunning(townRoot string) (bool, int, error) { } // StopDaemon stops the running daemon for the given town. +// Note: The file lock in Run() prevents multiple daemons per town, so we only +// need to kill the process from the PID file. func StopDaemon(townRoot string) error { running, pid, err := IsRunning(townRoot) if err != nil { @@ -741,11 +762,11 @@ func StopDaemon(townRoot string) error { // Check if still running if err := process.Signal(syscall.Signal(0)); err == nil { - // Still running, force kill (best-effort) + // Still running, force kill _ = process.Signal(syscall.SIGKILL) } - // Clean up PID file (best-effort cleanup) + // Clean up PID file pidFile := filepath.Join(townRoot, "daemon", "daemon.pid") _ = os.Remove(pidFile)