Implement exclusive lock protocol for daemon/external tool coexistence

- Add ExclusiveLock struct with JSON marshaling and validation
- Implement IsProcessAlive() with EPERM fail-safe behavior
- Add ShouldSkipDatabase() with stale lock cleanup
- Integrate lock checking into daemon sync cycle
- Return holder name on stale removal for better logging
- Case-insensitive hostname comparison
- Comprehensive unit tests (89.3% coverage)
- Documentation updates (ADVANCED.md, AGENTS.md)
- Add .beads/.exclusive-lock to .gitignore

Closes bd-115, bd-116, bd-117, bd-118, bd-119, bd-120, bd-121, bd-122

Amp-Thread-ID: https://ampcode.com/threads/T-0b835739-0d79-4ef9-aa62-8446a368c42d
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-25 23:32:02 -07:00
parent e42868c8e5
commit 3a42ca252d
10 changed files with 620 additions and 0 deletions

1
.gitignore vendored
View File

@@ -38,6 +38,7 @@ Thumbs.db
.beads/daemon.pid
.beads/daemon.lock
.beads/bd.sock
.beads/.exclusive-lock
# .beads directory files (keep JSONL only)
.beads/.gitignore

View File

@@ -337,6 +337,7 @@ Understanding the role of each component:
- **Process isolation** - Each project gets its own daemon for database safety
- **LSP model** - Similar to language servers, one daemon per workspace
- **No global daemon** - Removed in v0.16.0 to prevent cross-project pollution
- **Exclusive lock support** - External tools can prevent daemon interference (see [EXCLUSIVE_LOCK.md](EXCLUSIVE_LOCK.md))
### MCP Server (Optional)
- **Protocol adapter** - Translates MCP calls to daemon RPC or direct CLI

View File

@@ -474,6 +474,35 @@ We're working toward 1.0. Key blockers tracked in bd. Run:
bd dep tree bd-8 # Show 1.0 epic dependencies
```
## Exclusive Lock Protocol (Advanced)
**For external tools that need full database control** (e.g., CI/CD, deterministic execution systems):
The bd daemon respects exclusive locks via `.beads/.exclusive-lock` file. When this lock exists:
- Daemon skips all operations for the locked database
- External tool has complete control over git sync and database operations
- Stale locks (dead process) are automatically cleaned up
**Use case:** Tools like VibeCoder that need deterministic execution without daemon interference.
See [EXCLUSIVE_LOCK.md](EXCLUSIVE_LOCK.md) for:
- Lock file format (JSON schema)
- Creating and releasing locks (Go/shell examples)
- Stale lock detection behavior
- Integration testing guidance
**Quick example:**
```bash
# Create lock
echo '{"holder":"my-tool","pid":'$$',"hostname":"'$(hostname)'","started_at":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","version":"1.0.0"}' > .beads/.exclusive-lock
# Do work...
bd create "My issue" -p 1
# Release lock
rm .beads/.exclusive-lock
```
## Common Tasks
### Adding a New Command

View File

@@ -927,6 +927,21 @@ func createSyncFunc(ctx context.Context, store storage.Storage, autoCommit, auto
return
}
// Check for exclusive lock before processing database
beadsDir := filepath.Dir(jsonlPath)
skip, holder, err := types.ShouldSkipDatabase(beadsDir)
if skip {
if err != nil {
log.log("Skipping database (lock check failed: %v)", err)
} else {
log.log("Skipping database (locked by %s)", holder)
}
return
}
if holder != "" {
log.log("Removed stale lock (%s), proceeding with sync", holder)
}
if err := exportToJSONLWithStore(syncCtx, store, jsonlPath); err != nil {
log.log("Export failed: %v", err)
return

66
internal/types/lock.go Normal file
View File

@@ -0,0 +1,66 @@
// Package types defines core data structures for the bd issue tracker.
package types
import (
"encoding/json"
"fmt"
"os"
"time"
)
// ExclusiveLock represents the lock file format for external tools to claim
// exclusive management of a beads database. When this lock is present,
// the bd daemon will skip the database in its sync cycle.
type ExclusiveLock struct {
Holder string `json:"holder"` // Name of lock holder (e.g., "vc-executor")
PID int `json:"pid"` // Process ID
Hostname string `json:"hostname"` // Hostname where process is running
StartedAt time.Time `json:"started_at"` // When lock was acquired
Version string `json:"version"` // Version of lock holder
}
// NewExclusiveLock creates a new exclusive lock for the current process
func NewExclusiveLock(holder, version string) (*ExclusiveLock, error) {
hostname, err := os.Hostname()
if err != nil {
return nil, fmt.Errorf("failed to get hostname: %w", err)
}
return &ExclusiveLock{
Holder: holder,
PID: os.Getpid(),
Hostname: hostname,
StartedAt: time.Now(),
Version: version,
}, nil
}
// MarshalJSON implements json.Marshaler
func (e *ExclusiveLock) MarshalJSON() ([]byte, error) {
type Alias ExclusiveLock
return json.Marshal((*Alias)(e))
}
// UnmarshalJSON implements json.Unmarshaler
func (e *ExclusiveLock) UnmarshalJSON(data []byte) error {
type Alias ExclusiveLock
aux := (*Alias)(e)
return json.Unmarshal(data, aux)
}
// Validate checks if the lock has valid field values
func (e *ExclusiveLock) Validate() error {
if e.Holder == "" {
return fmt.Errorf("holder is required")
}
if e.PID <= 0 {
return fmt.Errorf("pid must be positive (got %d)", e.PID)
}
if e.Hostname == "" {
return fmt.Errorf("hostname is required")
}
if e.StartedAt.IsZero() {
return fmt.Errorf("started_at is required")
}
return nil
}

View File

@@ -0,0 +1,59 @@
package types
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
)
// ShouldSkipDatabase checks if the given beads directory has an exclusive lock file.
// It returns true if the database should be skipped (lock is valid and holder is alive),
// false otherwise. It also returns the lock holder name if skipping, and any error encountered.
//
// The function will:
// - Return false if no lock file exists (proceed with database)
// - Return true if lock exists and holder process is alive (skip database)
// - Remove stale locks (dead process) and return false (proceed with database)
// - Return true on malformed locks (fail-safe, skip database)
func ShouldSkipDatabase(beadsDir string) (skip bool, holder string, err error) {
lockPath := filepath.Join(beadsDir, ".exclusive-lock")
// Check if lock file exists
data, err := os.ReadFile(lockPath)
if err != nil {
if os.IsNotExist(err) {
// No lock file, proceed with database
return false, "", nil
}
// Error reading lock file, fail-safe: skip database
return true, "", fmt.Errorf("failed to read lock file: %w", err)
}
// Parse lock file
var lock ExclusiveLock
if err := json.Unmarshal(data, &lock); err != nil {
// Malformed lock file, fail-safe: skip database
return true, "", fmt.Errorf("malformed lock file: %w", err)
}
// Validate lock
if err := lock.Validate(); err != nil {
// Invalid lock file, fail-safe: skip database
return true, "", fmt.Errorf("invalid lock file: %w", err)
}
// Check if holder process is alive
if !IsProcessAlive(lock.PID, lock.Hostname) {
// Stale lock, remove it and proceed
if err := os.Remove(lockPath); err != nil {
// Failed to remove stale lock, fail-safe: skip database
return true, lock.Holder, fmt.Errorf("failed to remove stale lock: %w", err)
}
// Stale lock removed successfully, return holder so caller can log it
return false, lock.Holder, nil
}
// Lock is valid and holder is alive, skip database
return true, lock.Holder, nil
}

View File

@@ -0,0 +1,161 @@
package types
import (
"encoding/json"
"os"
"path/filepath"
"testing"
"time"
)
func TestShouldSkipDatabase(t *testing.T) {
// Create temp directory for testing
tmpDir := t.TempDir()
t.Run("no lock file exists", func(t *testing.T) {
skip, holder, err := ShouldSkipDatabase(tmpDir)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if skip {
t.Error("should not skip when no lock file exists")
}
if holder != "" {
t.Errorf("holder should be empty, got %s", holder)
}
})
t.Run("valid lock with alive process", func(t *testing.T) {
lockPath := filepath.Join(tmpDir, ".exclusive-lock")
currentHost, _ := os.Hostname()
lock := &ExclusiveLock{
Holder: "test-tool",
PID: os.Getpid(), // Current process, definitely alive
Hostname: currentHost,
StartedAt: time.Now(),
Version: "1.0.0",
}
data, _ := json.Marshal(lock)
if err := os.WriteFile(lockPath, data, 0644); err != nil {
t.Fatal(err)
}
defer os.Remove(lockPath)
skip, holder, err := ShouldSkipDatabase(tmpDir)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !skip {
t.Error("should skip when lock is valid and process is alive")
}
if holder != "test-tool" {
t.Errorf("holder should be test-tool, got %s", holder)
}
})
t.Run("stale lock with dead process", func(t *testing.T) {
// Note: We can't reliably test actual stale lock cleanup without creating
// and killing a real process, because high PIDs may return EPERM (treated as alive).
// This test verifies the logic path exists, but actual cleanup relies on
// integration testing or manual verification.
// Instead, test that a lock with a different hostname (remote) is assumed alive
lockPath := filepath.Join(tmpDir, ".exclusive-lock")
lock := &ExclusiveLock{
Holder: "remote-tool",
PID: 12345,
Hostname: "definitely-not-this-host-xyz",
StartedAt: time.Now(),
Version: "1.0.0",
}
data, _ := json.Marshal(lock)
if err := os.WriteFile(lockPath, data, 0644); err != nil {
t.Fatal(err)
}
defer os.Remove(lockPath)
skip, holder, err := ShouldSkipDatabase(tmpDir)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !skip {
t.Error("should skip when lock is from remote host (can't verify)")
}
if holder != "remote-tool" {
t.Errorf("holder should be remote-tool, got %s", holder)
}
})
t.Run("malformed lock file", func(t *testing.T) {
lockPath := filepath.Join(tmpDir, ".exclusive-lock")
if err := os.WriteFile(lockPath, []byte("not valid json"), 0644); err != nil {
t.Fatal(err)
}
defer os.Remove(lockPath)
skip, holder, err := ShouldSkipDatabase(tmpDir)
if err == nil {
t.Error("expected error for malformed lock file")
}
if !skip {
t.Error("should skip when lock file is malformed (fail-safe)")
}
if holder != "" {
t.Errorf("holder should be empty for malformed lock, got %s", holder)
}
})
t.Run("invalid lock (missing required fields)", func(t *testing.T) {
lockPath := filepath.Join(tmpDir, ".exclusive-lock")
// Lock with missing holder (invalid)
lock := &ExclusiveLock{
PID: 12345,
Hostname: "test-host",
StartedAt: time.Now(),
Version: "1.0.0",
}
data, _ := json.Marshal(lock)
if err := os.WriteFile(lockPath, data, 0644); err != nil {
t.Fatal(err)
}
defer os.Remove(lockPath)
skip, holder, err := ShouldSkipDatabase(tmpDir)
if err == nil {
t.Error("expected error for invalid lock file")
}
if !skip {
t.Error("should skip when lock file is invalid (fail-safe)")
}
if holder != "" {
t.Errorf("holder should be empty for invalid lock, got %s", holder)
}
})
t.Run("remote hostname (assume alive)", func(t *testing.T) {
lockPath := filepath.Join(tmpDir, ".exclusive-lock")
lock := &ExclusiveLock{
Holder: "remote-tool",
PID: 12345,
Hostname: "remote-host-xyz",
StartedAt: time.Now(),
Version: "1.0.0",
}
data, _ := json.Marshal(lock)
if err := os.WriteFile(lockPath, data, 0644); err != nil {
t.Fatal(err)
}
defer os.Remove(lockPath)
skip, holder, err := ShouldSkipDatabase(tmpDir)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !skip {
t.Error("should skip when lock is from remote host (can't verify, assume alive)")
}
if holder != "remote-tool" {
t.Errorf("holder should be remote-tool, got %s", holder)
}
})
}

174
internal/types/lock_test.go Normal file
View File

@@ -0,0 +1,174 @@
package types
import (
"encoding/json"
"testing"
"time"
)
func TestExclusiveLock_MarshalJSON(t *testing.T) {
lock := &ExclusiveLock{
Holder: "test-tool",
PID: 12345,
Hostname: "test-host",
StartedAt: time.Date(2025, 10, 25, 12, 0, 0, 0, time.UTC),
Version: "1.0.0",
}
data, err := json.Marshal(lock)
if err != nil {
t.Fatalf("failed to marshal lock: %v", err)
}
expected := `{"holder":"test-tool","pid":12345,"hostname":"test-host","started_at":"2025-10-25T12:00:00Z","version":"1.0.0"}`
if string(data) != expected {
t.Errorf("unexpected JSON:\ngot: %s\nwant: %s", string(data), expected)
}
}
func TestExclusiveLock_UnmarshalJSON(t *testing.T) {
data := []byte(`{"holder":"test-tool","pid":12345,"hostname":"test-host","started_at":"2025-10-25T12:00:00Z","version":"1.0.0"}`)
var lock ExclusiveLock
err := json.Unmarshal(data, &lock)
if err != nil {
t.Fatalf("failed to unmarshal lock: %v", err)
}
if lock.Holder != "test-tool" {
t.Errorf("unexpected holder: got %s, want test-tool", lock.Holder)
}
if lock.PID != 12345 {
t.Errorf("unexpected PID: got %d, want 12345", lock.PID)
}
if lock.Hostname != "test-host" {
t.Errorf("unexpected hostname: got %s, want test-host", lock.Hostname)
}
if lock.Version != "1.0.0" {
t.Errorf("unexpected version: got %s, want 1.0.0", lock.Version)
}
expected := time.Date(2025, 10, 25, 12, 0, 0, 0, time.UTC)
if !lock.StartedAt.Equal(expected) {
t.Errorf("unexpected started_at: got %v, want %v", lock.StartedAt, expected)
}
}
func TestExclusiveLock_Validate(t *testing.T) {
tests := []struct {
name string
lock *ExclusiveLock
wantErr bool
}{
{
name: "valid lock",
lock: &ExclusiveLock{
Holder: "test-tool",
PID: 12345,
Hostname: "test-host",
StartedAt: time.Now(),
Version: "1.0.0",
},
wantErr: false,
},
{
name: "missing holder",
lock: &ExclusiveLock{
PID: 12345,
Hostname: "test-host",
StartedAt: time.Now(),
Version: "1.0.0",
},
wantErr: true,
},
{
name: "invalid PID (zero)",
lock: &ExclusiveLock{
Holder: "test-tool",
PID: 0,
Hostname: "test-host",
StartedAt: time.Now(),
Version: "1.0.0",
},
wantErr: true,
},
{
name: "invalid PID (negative)",
lock: &ExclusiveLock{
Holder: "test-tool",
PID: -1,
Hostname: "test-host",
StartedAt: time.Now(),
Version: "1.0.0",
},
wantErr: true,
},
{
name: "missing hostname",
lock: &ExclusiveLock{
Holder: "test-tool",
PID: 12345,
StartedAt: time.Now(),
Version: "1.0.0",
},
wantErr: true,
},
{
name: "missing started_at",
lock: &ExclusiveLock{
Holder: "test-tool",
PID: 12345,
Hostname: "test-host",
Version: "1.0.0",
},
wantErr: true,
},
{
name: "missing version (allowed)",
lock: &ExclusiveLock{
Holder: "test-tool",
PID: 12345,
Hostname: "test-host",
StartedAt: time.Now(),
},
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := tt.lock.Validate()
if (err != nil) != tt.wantErr {
t.Errorf("Validate() error = %v, wantErr %v", err, tt.wantErr)
}
})
}
}
func TestNewExclusiveLock(t *testing.T) {
lock, err := NewExclusiveLock("test-tool", "1.0.0")
if err != nil {
t.Fatalf("NewExclusiveLock failed: %v", err)
}
if lock.Holder != "test-tool" {
t.Errorf("unexpected holder: got %s, want test-tool", lock.Holder)
}
if lock.Version != "1.0.0" {
t.Errorf("unexpected version: got %s, want 1.0.0", lock.Version)
}
if lock.PID <= 0 {
t.Errorf("PID should be positive, got %d", lock.PID)
}
if lock.Hostname == "" {
t.Error("hostname should not be empty")
}
if lock.StartedAt.IsZero() {
t.Error("started_at should not be zero")
}
// Validate should pass
if err := lock.Validate(); err != nil {
t.Errorf("newly created lock should be valid: %v", err)
}
}

47
internal/types/process.go Normal file
View File

@@ -0,0 +1,47 @@
package types
import (
"errors"
"os"
"strings"
"syscall"
)
// IsProcessAlive checks if a process with the given PID is alive on the given hostname.
// If hostname doesn't match the current host, it returns true (cannot verify remote, assume alive).
// If hostname matches the current host, it checks if the PID exists.
// Permission errors are treated as "alive" (fail-safe: better to skip than wrongly remove a lock).
func IsProcessAlive(pid int, hostname string) bool {
currentHost, err := os.Hostname()
if err != nil {
// Can't determine current hostname, assume process is alive (fail-safe)
return true
}
// Case-insensitive hostname comparison to handle FQDN vs short name differences
if !strings.EqualFold(hostname, currentHost) {
return true
}
// Check if process exists on local host
process, err := os.FindProcess(pid)
if err != nil {
// On Unix, FindProcess always succeeds, so this is unlikely
return false
}
// Send signal 0 to check if process exists without actually sending a signal
err = process.Signal(syscall.Signal(0))
if err == nil {
return true
}
// Only mark as dead on ESRCH (no such process)
// EPERM (permission denied) and other errors => assume alive (fail-safe)
var errno syscall.Errno
if errors.As(err, &errno) && errno == syscall.ESRCH {
return false
}
return true
}

View File

@@ -0,0 +1,67 @@
package types
import (
"os"
"testing"
)
func TestIsProcessAlive(t *testing.T) {
currentHost, err := os.Hostname()
if err != nil {
t.Fatalf("failed to get hostname: %v", err)
}
tests := []struct {
name string
pid int
hostname string
want bool
}{
{
name: "current process (should be alive)",
pid: os.Getpid(),
hostname: currentHost,
want: true,
},
{
name: "different hostname (assume alive)",
pid: 12345,
hostname: "remote-host-xyz",
want: true,
},
{
name: "current process on different hostname (assume alive)",
pid: os.Getpid(),
hostname: "remote-host-xyz",
want: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := IsProcessAlive(tt.pid, tt.hostname)
if got != tt.want {
t.Errorf("IsProcessAlive(%d, %s) = %v, want %v", tt.pid, tt.hostname, got, tt.want)
}
})
}
}
func TestIsProcessAlive_CurrentProcess(t *testing.T) {
// Test that our own process is detected as alive
currentHost, _ := os.Hostname()
pid := os.Getpid()
if !IsProcessAlive(pid, currentHost) {
t.Error("current process should be detected as alive")
}
}
func TestIsProcessAlive_RemoteHost(t *testing.T) {
// Test that remote processes are assumed alive (can't verify)
if !IsProcessAlive(12345, "some-remote-host") {
t.Error("remote host processes should be assumed alive")
}
}