fix(daemon): prevent stack overflow in handleStaleLock (#1238)

Remove recursive tryAutoStartDaemon calls from handleStaleLock that caused infinite recursion when daemon start lock cleanup looped. The call chain was: acquireStartLock -> handleStaleLock -> tryAutoStartDaemon -> acquireStartLock -> ... (unbounded) Now handleStaleLock just cleans up stale locks and returns false, letting the caller's bounded retry loop handle retries. Co-authored-by: Nelson Melo <nmelo@users.noreply.github.com>
2026-01-21 19:51:56 -05:00
parent caf4c368c4
commit 0eb3dd05a9
1 changed files with 11 additions and 8 deletions
@@ -307,35 +307,38 @@ func acquireStartLock(lockPath, socketPath string) bool {
 		if waitForSocketReadiness(socketPath, 5*time.Second) {
 			return true
 		}
-		return handleStaleLock(lockPath, socketPath)
+		return handleStaleLock(lockPath)
 	}
 	debugLog("failed to acquire start lock after %d attempts", maxRetries)
 	return false
 }
-func handleStaleLock(lockPath, socketPath string) bool {
+func handleStaleLock(lockPath string) bool {
 	lockPID, err := readPIDFromFile(lockPath)
 	// Check if PID is dead
 	if err != nil || !isPIDAlive(lockPID) {
-		debugLog("lock is stale (PID %d dead or unreadable), removing and retrying", lockPID)
+		debugLog("lock is stale (PID %d dead or unreadable), removing", lockPID)
 		if rmErr := removeFileFn(lockPath); rmErr != nil {
 			debugLog("failed to remove stale lock in handleStaleLock: %v", rmErr)
 			return false
 		}
-		return tryAutoStartDaemon(socketPath)
+		// Return false to let caller retry. DO NOT call tryAutoStartDaemon here
 		// to avoid infinite recursion: acquireStartLock -> handleStaleLock ->
 		// tryAutoStartDaemon -> acquireStartLock -> ...
 		return false
 	}
 	// PID is alive - but check daemon lock as authoritative source (immune to PID reuse)
 	beadsDir := filepath.Dir(dbPath)
 	if running, _ := lockfile.TryDaemonLock(beadsDir); !running {
-		debugLog("lock PID %d alive but daemon lock not held, removing and retrying", lockPID)
+		debugLog("lock PID %d alive but daemon lock not held, removing", lockPID)
 		if rmErr := removeFileFn(lockPath); rmErr != nil {
 			debugLog("failed to remove orphaned lock in handleStaleLock: %v", rmErr)
 			return false
 		}
-		return tryAutoStartDaemon(socketPath)
+		// Return false to let caller retry. DO NOT call tryAutoStartDaemon here
 		// to avoid infinite recursion.
 		return false
 	}
 	// Daemon lock is held - daemon is genuinely running but socket isn't ready