fix(daemon): prevent stack overflow in handleStaleLock (#1238)
Remove recursive tryAutoStartDaemon calls from handleStaleLock that caused infinite recursion when daemon start lock cleanup looped. The call chain was: acquireStartLock -> handleStaleLock -> tryAutoStartDaemon -> acquireStartLock -> ... (unbounded) Now handleStaleLock just cleans up stale locks and returns false, letting the caller's bounded retry loop handle retries. Co-authored-by: Nelson Melo <nmelo@users.noreply.github.com>
This commit is contained in:
@@ -307,35 +307,38 @@ func acquireStartLock(lockPath, socketPath string) bool {
|
|||||||
if waitForSocketReadiness(socketPath, 5*time.Second) {
|
if waitForSocketReadiness(socketPath, 5*time.Second) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return handleStaleLock(lockPath, socketPath)
|
return handleStaleLock(lockPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
debugLog("failed to acquire start lock after %d attempts", maxRetries)
|
debugLog("failed to acquire start lock after %d attempts", maxRetries)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func handleStaleLock(lockPath, socketPath string) bool {
|
func handleStaleLock(lockPath string) bool {
|
||||||
lockPID, err := readPIDFromFile(lockPath)
|
lockPID, err := readPIDFromFile(lockPath)
|
||||||
|
|
||||||
// Check if PID is dead
|
// Check if PID is dead
|
||||||
if err != nil || !isPIDAlive(lockPID) {
|
if err != nil || !isPIDAlive(lockPID) {
|
||||||
debugLog("lock is stale (PID %d dead or unreadable), removing and retrying", lockPID)
|
debugLog("lock is stale (PID %d dead or unreadable), removing", lockPID)
|
||||||
if rmErr := removeFileFn(lockPath); rmErr != nil {
|
if rmErr := removeFileFn(lockPath); rmErr != nil {
|
||||||
debugLog("failed to remove stale lock in handleStaleLock: %v", rmErr)
|
debugLog("failed to remove stale lock in handleStaleLock: %v", rmErr)
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
return tryAutoStartDaemon(socketPath)
|
// Return false to let caller retry. DO NOT call tryAutoStartDaemon here
|
||||||
|
// to avoid infinite recursion: acquireStartLock -> handleStaleLock ->
|
||||||
|
// tryAutoStartDaemon -> acquireStartLock -> ...
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// PID is alive - but check daemon lock as authoritative source (immune to PID reuse)
|
// PID is alive - but check daemon lock as authoritative source (immune to PID reuse)
|
||||||
beadsDir := filepath.Dir(dbPath)
|
beadsDir := filepath.Dir(dbPath)
|
||||||
if running, _ := lockfile.TryDaemonLock(beadsDir); !running {
|
if running, _ := lockfile.TryDaemonLock(beadsDir); !running {
|
||||||
debugLog("lock PID %d alive but daemon lock not held, removing and retrying", lockPID)
|
debugLog("lock PID %d alive but daemon lock not held, removing", lockPID)
|
||||||
if rmErr := removeFileFn(lockPath); rmErr != nil {
|
if rmErr := removeFileFn(lockPath); rmErr != nil {
|
||||||
debugLog("failed to remove orphaned lock in handleStaleLock: %v", rmErr)
|
debugLog("failed to remove orphaned lock in handleStaleLock: %v", rmErr)
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
return tryAutoStartDaemon(socketPath)
|
// Return false to let caller retry. DO NOT call tryAutoStartDaemon here
|
||||||
|
// to avoid infinite recursion.
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Daemon lock is held - daemon is genuinely running but socket isn't ready
|
// Daemon lock is held - daemon is genuinely running but socket isn't ready
|
||||||
|
|||||||
Reference in New Issue
Block a user