Standardize daemon detection with tryDaemonLock probe (bd-wgu4)
- Extract lock checking to internal/lockfile package - Add lock probe in RPC client before connection attempts - Update daemon discovery to use lock probe - Eliminates unnecessary connection attempts when socket missing Closes bd-wgu4 Amp-Thread-ID: https://ampcode.com/threads/T-3b863f21-3af4-49d3-9214-477d904b80fe Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -7,6 +7,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/steveyegge/beads/internal/lockfile"
|
||||||
"github.com/steveyegge/beads/internal/rpc"
|
"github.com/steveyegge/beads/internal/rpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -145,6 +146,25 @@ func discoverDaemon(socketPath string) DaemonInfo {
|
|||||||
Alive: false,
|
Alive: false,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fast probe: check daemon lock before attempting RPC if socket doesn't exist
|
||||||
|
// This eliminates unnecessary connection attempts when no daemon is running
|
||||||
|
// If socket exists, we proceed with RPC for backwards compatibility
|
||||||
|
_, err := os.Stat(socketPath)
|
||||||
|
socketExists := err == nil
|
||||||
|
|
||||||
|
if !socketExists {
|
||||||
|
beadsDir := filepath.Dir(socketPath)
|
||||||
|
running, _ := lockfile.TryDaemonLock(beadsDir)
|
||||||
|
if !running {
|
||||||
|
daemon.Error = "daemon lock not held and socket missing"
|
||||||
|
// Check for daemon-error file
|
||||||
|
if errMsg := checkDaemonErrorFile(socketPath); errMsg != "" {
|
||||||
|
daemon.Error = errMsg
|
||||||
|
}
|
||||||
|
return daemon
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Try to connect with short timeout
|
// Try to connect with short timeout
|
||||||
client, err := rpc.TryConnectWithTimeout(socketPath, 500*time.Millisecond)
|
client, err := rpc.TryConnectWithTimeout(socketPath, 500*time.Millisecond)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
118
internal/lockfile/lock.go
Normal file
118
internal/lockfile/lock.go
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
package lockfile
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// LockInfo represents the metadata stored in the daemon.lock file
|
||||||
|
type LockInfo struct {
|
||||||
|
PID int `json:"pid"`
|
||||||
|
ParentPID int `json:"parent_pid,omitempty"`
|
||||||
|
Database string `json:"database"`
|
||||||
|
Version string `json:"version"`
|
||||||
|
StartedAt time.Time `json:"started_at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// TryDaemonLock attempts to acquire and immediately release the daemon lock
|
||||||
|
// to check if a daemon is running. Returns true if daemon is running.
|
||||||
|
// Falls back to PID file check for backward compatibility with pre-lock daemons.
|
||||||
|
//
|
||||||
|
// This is a cheap probe operation that should be called before attempting
|
||||||
|
// RPC connections to avoid unnecessary connection timeouts.
|
||||||
|
func TryDaemonLock(beadsDir string) (running bool, pid int) {
|
||||||
|
lockPath := filepath.Join(beadsDir, "daemon.lock")
|
||||||
|
|
||||||
|
// Open lock file with read-write access (required for LockFileEx on Windows)
|
||||||
|
// #nosec G304 - controlled path from config
|
||||||
|
f, err := os.OpenFile(lockPath, os.O_RDWR, 0)
|
||||||
|
if err != nil {
|
||||||
|
// No lock file - could be old daemon without lock support
|
||||||
|
// Fall back to PID file check for backward compatibility
|
||||||
|
return checkPIDFile(beadsDir)
|
||||||
|
}
|
||||||
|
defer func() { _ = f.Close() }()
|
||||||
|
|
||||||
|
// Try to acquire lock non-blocking
|
||||||
|
if err := flockExclusive(f); err != nil {
|
||||||
|
if err == errDaemonLocked {
|
||||||
|
// Lock is held - daemon is running
|
||||||
|
// Try to read PID from JSON format (best effort)
|
||||||
|
_, _ = f.Seek(0, 0)
|
||||||
|
var lockInfo LockInfo
|
||||||
|
if err := json.NewDecoder(f).Decode(&lockInfo); err == nil {
|
||||||
|
pid = lockInfo.PID
|
||||||
|
} else {
|
||||||
|
// Fallback: try reading as plain integer (old format)
|
||||||
|
_, _ = f.Seek(0, 0)
|
||||||
|
data := make([]byte, 32)
|
||||||
|
n, _ := f.Read(data)
|
||||||
|
if n > 0 {
|
||||||
|
_, _ = fmt.Sscanf(string(data[:n]), "%d", &pid)
|
||||||
|
}
|
||||||
|
// Fallback to PID file if we couldn't read PID from lock file
|
||||||
|
if pid == 0 {
|
||||||
|
_, pid = checkPIDFile(beadsDir)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true, pid
|
||||||
|
}
|
||||||
|
// Other errors mean we can't determine status
|
||||||
|
return false, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// We got the lock - no daemon running
|
||||||
|
// Release immediately (file close will do this)
|
||||||
|
return false, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkPIDFile checks if a daemon is running by reading the PID file.
|
||||||
|
// This is used for backward compatibility with pre-lock daemons.
|
||||||
|
func checkPIDFile(beadsDir string) (running bool, pid int) {
|
||||||
|
pidFile := filepath.Join(beadsDir, "daemon.pid")
|
||||||
|
// #nosec G304 - controlled path from config
|
||||||
|
data, err := os.ReadFile(pidFile)
|
||||||
|
if err != nil {
|
||||||
|
return false, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
pidVal, err := strconv.Atoi(strings.TrimSpace(string(data)))
|
||||||
|
if err != nil {
|
||||||
|
return false, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if !isProcessRunning(pidVal) {
|
||||||
|
return false, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
return true, pidVal
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReadLockInfo reads and parses the daemon lock file
|
||||||
|
// Returns lock info if available, or error if file doesn't exist or can't be parsed
|
||||||
|
func ReadLockInfo(beadsDir string) (*LockInfo, error) {
|
||||||
|
lockPath := filepath.Join(beadsDir, "daemon.lock")
|
||||||
|
|
||||||
|
// #nosec G304 - controlled path from config
|
||||||
|
data, err := os.ReadFile(lockPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var lockInfo LockInfo
|
||||||
|
if err := json.Unmarshal(data, &lockInfo); err != nil {
|
||||||
|
// Try parsing as old format (plain PID)
|
||||||
|
var pid int
|
||||||
|
if _, err := fmt.Sscanf(string(data), "%d", &pid); err == nil {
|
||||||
|
return &LockInfo{PID: pid}, nil
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("cannot parse lock file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &lockInfo, nil
|
||||||
|
}
|
||||||
21
internal/lockfile/lock_unix.go
Normal file
21
internal/lockfile/lock_unix.go
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
//go:build unix
|
||||||
|
|
||||||
|
package lockfile
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
var errDaemonLocked = errors.New("daemon lock already held by another process")
|
||||||
|
|
||||||
|
// flockExclusive acquires an exclusive non-blocking lock on the file
|
||||||
|
func flockExclusive(f *os.File) error {
|
||||||
|
err := unix.Flock(int(f.Fd()), unix.LOCK_EX|unix.LOCK_NB)
|
||||||
|
if err == unix.EWOULDBLOCK {
|
||||||
|
return errDaemonLocked
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
17
internal/lockfile/lock_wasm.go
Normal file
17
internal/lockfile/lock_wasm.go
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
//go:build js && wasm
|
||||||
|
|
||||||
|
package lockfile
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
var errDaemonLocked = errors.New("daemon lock already held by another process")
|
||||||
|
|
||||||
|
func flockExclusive(f *os.File) error {
|
||||||
|
// WASM doesn't support file locking
|
||||||
|
// In a WASM environment, we're typically single-process anyway
|
||||||
|
return fmt.Errorf("file locking not supported in WASM")
|
||||||
|
}
|
||||||
38
internal/lockfile/lock_windows.go
Normal file
38
internal/lockfile/lock_windows.go
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
//go:build windows
|
||||||
|
|
||||||
|
package lockfile
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"os"
|
||||||
|
"syscall"
|
||||||
|
|
||||||
|
"golang.org/x/sys/windows"
|
||||||
|
)
|
||||||
|
|
||||||
|
var errDaemonLocked = errors.New("daemon lock already held by another process")
|
||||||
|
|
||||||
|
// flockExclusive acquires an exclusive non-blocking lock on the file using LockFileEx
|
||||||
|
func flockExclusive(f *os.File) error {
|
||||||
|
// LOCKFILE_EXCLUSIVE_LOCK (2) | LOCKFILE_FAIL_IMMEDIATELY (1) = 3
|
||||||
|
const flags = windows.LOCKFILE_EXCLUSIVE_LOCK | windows.LOCKFILE_FAIL_IMMEDIATELY
|
||||||
|
|
||||||
|
// Create overlapped structure for the entire file
|
||||||
|
ol := &windows.Overlapped{}
|
||||||
|
|
||||||
|
// Lock entire file (0xFFFFFFFF, 0xFFFFFFFF = maximum range)
|
||||||
|
err := windows.LockFileEx(
|
||||||
|
windows.Handle(f.Fd()),
|
||||||
|
flags,
|
||||||
|
0, // reserved
|
||||||
|
0xFFFFFFFF, // number of bytes to lock (low)
|
||||||
|
0xFFFFFFFF, // number of bytes to lock (high)
|
||||||
|
ol,
|
||||||
|
)
|
||||||
|
|
||||||
|
if err == windows.ERROR_LOCK_VIOLATION || err == syscall.EWOULDBLOCK {
|
||||||
|
return errDaemonLocked
|
||||||
|
}
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
12
internal/lockfile/process_unix.go
Normal file
12
internal/lockfile/process_unix.go
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
//go:build unix || linux || darwin
|
||||||
|
|
||||||
|
package lockfile
|
||||||
|
|
||||||
|
import (
|
||||||
|
"syscall"
|
||||||
|
)
|
||||||
|
|
||||||
|
// isProcessRunning checks if a process with the given PID is running
|
||||||
|
func isProcessRunning(pid int) bool {
|
||||||
|
return syscall.Kill(pid, 0) == nil
|
||||||
|
}
|
||||||
9
internal/lockfile/process_wasm.go
Normal file
9
internal/lockfile/process_wasm.go
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
//go:build js && wasm
|
||||||
|
|
||||||
|
package lockfile
|
||||||
|
|
||||||
|
// isProcessRunning checks if a process with the given PID is running
|
||||||
|
// In WASM, this always returns false since we don't have process management
|
||||||
|
func isProcessRunning(pid int) bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
25
internal/lockfile/process_windows.go
Normal file
25
internal/lockfile/process_windows.go
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
//go:build windows
|
||||||
|
|
||||||
|
package lockfile
|
||||||
|
|
||||||
|
import (
|
||||||
|
"golang.org/x/sys/windows"
|
||||||
|
)
|
||||||
|
|
||||||
|
const stillActive = 259
|
||||||
|
|
||||||
|
// isProcessRunning checks if a process with the given PID is running
|
||||||
|
func isProcessRunning(pid int) bool {
|
||||||
|
handle, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(pid))
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
defer windows.CloseHandle(handle)
|
||||||
|
|
||||||
|
var code uint32
|
||||||
|
if err := windows.GetExitCodeProcess(handle, &code); err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return code == stillActive
|
||||||
|
}
|
||||||
@@ -6,9 +6,11 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
"os"
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/steveyegge/beads/internal/debug"
|
"github.com/steveyegge/beads/internal/debug"
|
||||||
|
"github.com/steveyegge/beads/internal/lockfile"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ClientVersion is the version of this RPC client
|
// ClientVersion is the version of this RPC client
|
||||||
@@ -33,7 +35,20 @@ func TryConnect(socketPath string) (*Client, error) {
|
|||||||
// TryConnectWithTimeout attempts to connect to the daemon socket using the provided dial timeout.
|
// TryConnectWithTimeout attempts to connect to the daemon socket using the provided dial timeout.
|
||||||
// Returns nil if no daemon is running or unhealthy.
|
// Returns nil if no daemon is running or unhealthy.
|
||||||
func TryConnectWithTimeout(socketPath string, dialTimeout time.Duration) (*Client, error) {
|
func TryConnectWithTimeout(socketPath string, dialTimeout time.Duration) (*Client, error) {
|
||||||
if !endpointExists(socketPath) {
|
// Fast probe: check daemon lock before attempting RPC connection if socket doesn't exist
|
||||||
|
// This eliminates unnecessary connection attempts when no daemon is running
|
||||||
|
// If socket exists, we skip lock check for backwards compatibility and test scenarios
|
||||||
|
socketExists := endpointExists(socketPath)
|
||||||
|
if !socketExists {
|
||||||
|
beadsDir := filepath.Dir(socketPath)
|
||||||
|
running, _ := lockfile.TryDaemonLock(beadsDir)
|
||||||
|
if !running {
|
||||||
|
debug.Logf("daemon lock not held and socket missing (no daemon running)")
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !socketExists {
|
||||||
debug.Logf("RPC endpoint does not exist: %s", socketPath)
|
debug.Logf("RPC endpoint does not exist: %s", socketPath)
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user