Standardize daemon detection with tryDaemonLock probe (bd-wgu4)

- Extract lock checking to internal/lockfile package
- Add lock probe in RPC client before connection attempts
- Update daemon discovery to use lock probe
- Eliminates unnecessary connection attempts when socket missing

Closes bd-wgu4

Amp-Thread-ID: https://ampcode.com/threads/T-3b863f21-3af4-49d3-9214-477d904b80fe
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-11-07 21:02:38 -08:00
parent eeef37f37b
commit ba1b856acb
10 changed files with 280 additions and 5 deletions

File diff suppressed because one or more lines are too long

View File

@@ -7,6 +7,7 @@ import (
"strings"
"time"
"github.com/steveyegge/beads/internal/lockfile"
"github.com/steveyegge/beads/internal/rpc"
)
@@ -145,6 +146,25 @@ func discoverDaemon(socketPath string) DaemonInfo {
Alive: false,
}
// Fast probe: check daemon lock before attempting RPC if socket doesn't exist
// This eliminates unnecessary connection attempts when no daemon is running
// If socket exists, we proceed with RPC for backwards compatibility
_, err := os.Stat(socketPath)
socketExists := err == nil
if !socketExists {
beadsDir := filepath.Dir(socketPath)
running, _ := lockfile.TryDaemonLock(beadsDir)
if !running {
daemon.Error = "daemon lock not held and socket missing"
// Check for daemon-error file
if errMsg := checkDaemonErrorFile(socketPath); errMsg != "" {
daemon.Error = errMsg
}
return daemon
}
}
// Try to connect with short timeout
client, err := rpc.TryConnectWithTimeout(socketPath, 500*time.Millisecond)
if err != nil {

118
internal/lockfile/lock.go Normal file
View File

@@ -0,0 +1,118 @@
package lockfile
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"time"
)
// LockInfo represents the metadata stored in the daemon.lock file
type LockInfo struct {
PID int `json:"pid"`
ParentPID int `json:"parent_pid,omitempty"`
Database string `json:"database"`
Version string `json:"version"`
StartedAt time.Time `json:"started_at"`
}
// TryDaemonLock attempts to acquire and immediately release the daemon lock
// to check if a daemon is running. Returns true if daemon is running.
// Falls back to PID file check for backward compatibility with pre-lock daemons.
//
// This is a cheap probe operation that should be called before attempting
// RPC connections to avoid unnecessary connection timeouts.
func TryDaemonLock(beadsDir string) (running bool, pid int) {
lockPath := filepath.Join(beadsDir, "daemon.lock")
// Open lock file with read-write access (required for LockFileEx on Windows)
// #nosec G304 - controlled path from config
f, err := os.OpenFile(lockPath, os.O_RDWR, 0)
if err != nil {
// No lock file - could be old daemon without lock support
// Fall back to PID file check for backward compatibility
return checkPIDFile(beadsDir)
}
defer func() { _ = f.Close() }()
// Try to acquire lock non-blocking
if err := flockExclusive(f); err != nil {
if err == errDaemonLocked {
// Lock is held - daemon is running
// Try to read PID from JSON format (best effort)
_, _ = f.Seek(0, 0)
var lockInfo LockInfo
if err := json.NewDecoder(f).Decode(&lockInfo); err == nil {
pid = lockInfo.PID
} else {
// Fallback: try reading as plain integer (old format)
_, _ = f.Seek(0, 0)
data := make([]byte, 32)
n, _ := f.Read(data)
if n > 0 {
_, _ = fmt.Sscanf(string(data[:n]), "%d", &pid)
}
// Fallback to PID file if we couldn't read PID from lock file
if pid == 0 {
_, pid = checkPIDFile(beadsDir)
}
}
return true, pid
}
// Other errors mean we can't determine status
return false, 0
}
// We got the lock - no daemon running
// Release immediately (file close will do this)
return false, 0
}
// checkPIDFile checks if a daemon is running by reading the PID file.
// This is used for backward compatibility with pre-lock daemons.
func checkPIDFile(beadsDir string) (running bool, pid int) {
pidFile := filepath.Join(beadsDir, "daemon.pid")
// #nosec G304 - controlled path from config
data, err := os.ReadFile(pidFile)
if err != nil {
return false, 0
}
pidVal, err := strconv.Atoi(strings.TrimSpace(string(data)))
if err != nil {
return false, 0
}
if !isProcessRunning(pidVal) {
return false, 0
}
return true, pidVal
}
// ReadLockInfo reads and parses the daemon lock file
// Returns lock info if available, or error if file doesn't exist or can't be parsed
func ReadLockInfo(beadsDir string) (*LockInfo, error) {
lockPath := filepath.Join(beadsDir, "daemon.lock")
// #nosec G304 - controlled path from config
data, err := os.ReadFile(lockPath)
if err != nil {
return nil, err
}
var lockInfo LockInfo
if err := json.Unmarshal(data, &lockInfo); err != nil {
// Try parsing as old format (plain PID)
var pid int
if _, err := fmt.Sscanf(string(data), "%d", &pid); err == nil {
return &LockInfo{PID: pid}, nil
}
return nil, fmt.Errorf("cannot parse lock file: %w", err)
}
return &lockInfo, nil
}

View File

@@ -0,0 +1,21 @@
//go:build unix
package lockfile
import (
"errors"
"os"
"golang.org/x/sys/unix"
)
var errDaemonLocked = errors.New("daemon lock already held by another process")
// flockExclusive acquires an exclusive non-blocking lock on the file
func flockExclusive(f *os.File) error {
err := unix.Flock(int(f.Fd()), unix.LOCK_EX|unix.LOCK_NB)
if err == unix.EWOULDBLOCK {
return errDaemonLocked
}
return err
}

View File

@@ -0,0 +1,17 @@
//go:build js && wasm
package lockfile
import (
"errors"
"fmt"
"os"
)
var errDaemonLocked = errors.New("daemon lock already held by another process")
func flockExclusive(f *os.File) error {
// WASM doesn't support file locking
// In a WASM environment, we're typically single-process anyway
return fmt.Errorf("file locking not supported in WASM")
}

View File

@@ -0,0 +1,38 @@
//go:build windows
package lockfile
import (
"errors"
"os"
"syscall"
"golang.org/x/sys/windows"
)
var errDaemonLocked = errors.New("daemon lock already held by another process")
// flockExclusive acquires an exclusive non-blocking lock on the file using LockFileEx
func flockExclusive(f *os.File) error {
// LOCKFILE_EXCLUSIVE_LOCK (2) | LOCKFILE_FAIL_IMMEDIATELY (1) = 3
const flags = windows.LOCKFILE_EXCLUSIVE_LOCK | windows.LOCKFILE_FAIL_IMMEDIATELY
// Create overlapped structure for the entire file
ol := &windows.Overlapped{}
// Lock entire file (0xFFFFFFFF, 0xFFFFFFFF = maximum range)
err := windows.LockFileEx(
windows.Handle(f.Fd()),
flags,
0, // reserved
0xFFFFFFFF, // number of bytes to lock (low)
0xFFFFFFFF, // number of bytes to lock (high)
ol,
)
if err == windows.ERROR_LOCK_VIOLATION || err == syscall.EWOULDBLOCK {
return errDaemonLocked
}
return err
}

View File

@@ -0,0 +1,12 @@
//go:build unix || linux || darwin
package lockfile
import (
"syscall"
)
// isProcessRunning checks if a process with the given PID is running
func isProcessRunning(pid int) bool {
return syscall.Kill(pid, 0) == nil
}

View File

@@ -0,0 +1,9 @@
//go:build js && wasm
package lockfile
// isProcessRunning checks if a process with the given PID is running
// In WASM, this always returns false since we don't have process management
func isProcessRunning(pid int) bool {
return false
}

View File

@@ -0,0 +1,25 @@
//go:build windows
package lockfile
import (
"golang.org/x/sys/windows"
)
const stillActive = 259
// isProcessRunning checks if a process with the given PID is running
func isProcessRunning(pid int) bool {
handle, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(pid))
if err != nil {
return false
}
defer windows.CloseHandle(handle)
var code uint32
if err := windows.GetExitCodeProcess(handle, &code); err != nil {
return false
}
return code == stillActive
}

View File

@@ -6,9 +6,11 @@ import (
"fmt"
"net"
"os"
"path/filepath"
"time"
"github.com/steveyegge/beads/internal/debug"
"github.com/steveyegge/beads/internal/lockfile"
)
// ClientVersion is the version of this RPC client
@@ -33,7 +35,20 @@ func TryConnect(socketPath string) (*Client, error) {
// TryConnectWithTimeout attempts to connect to the daemon socket using the provided dial timeout.
// Returns nil if no daemon is running or unhealthy.
func TryConnectWithTimeout(socketPath string, dialTimeout time.Duration) (*Client, error) {
if !endpointExists(socketPath) {
// Fast probe: check daemon lock before attempting RPC connection if socket doesn't exist
// This eliminates unnecessary connection attempts when no daemon is running
// If socket exists, we skip lock check for backwards compatibility and test scenarios
socketExists := endpointExists(socketPath)
if !socketExists {
beadsDir := filepath.Dir(socketPath)
running, _ := lockfile.TryDaemonLock(beadsDir)
if !running {
debug.Logf("daemon lock not held and socket missing (no daemon running)")
return nil, nil
}
}
if !socketExists {
debug.Logf("RPC endpoint does not exist: %s", socketPath)
return nil, nil
}