Fix race condition in TestSocketCleanup by protecting listener access with mutex

Fixes bd-160

The race was between Start() writing s.listener and Stop() reading it.
Now all listener access is protected by the server mutex:
- Start() stores listener under lock after creation
- Accept loop reads listener under RLock
- Stop() closes listener under lock

All RPC tests now pass with -race flag.
This commit is contained in:
Steve Yegge
2025-10-19 09:14:37 -07:00
parent 91948f7a2b
commit 5aa7658433
4 changed files with 54 additions and 11 deletions

View File

@@ -63,9 +63,10 @@
{"id":"bd-155","title":"Daemon production readiness","description":"Make beads daemon production-ready for long-running use, multi-repo deployments, and resilient operation.\n\nCurrent state: Good foundation, works well for development\nTarget state: Production-ready for individual developers and small teams\n\nGap areas:\n1. Resource management (cache eviction, limits)\n2. Health monitoring and crash recovery\n3. Process lifecycle management\n4. User experience (visibility, feedback)\n5. Operational concerns (logging, metrics)\n\nSuccess criteria:\n- Can run for weeks without restart\n- Handles 50+ repositories efficiently\n- Recovers from crashes automatically\n- Users understand daemon status\n- Observable and debuggable","acceptance_criteria":"All child issues completed:\n- P0 issues: Storage cache, health checks, crash recovery, MCP cleanup\n- P1 issues: Global auto-start, visibility, version checks\n- P2 issues: Resource limits, telemetry, log rotation\n\nValidation:\n- Run daemon for 7+ days without issues\n- Test with 50+ repositories\n- Verify crash recovery\n- Confirm resource usage is bounded\n- Check metrics and logs are useful","status":"in_progress","priority":0,"issue_type":"epic","created_at":"2025-10-18T13:07:43.543715-07:00","updated_at":"2025-10-18T18:35:11.752924-07:00"}
{"id":"bd-156","title":"Refactor import logic to eliminate duplication between manual and auto-import","description":"The import logic is duplicated in two places:\n1. cmd/bd/import.go (manual 'bd import' command)\n2. cmd/bd/main.go:autoImportIfNewer() (auto-import after git pull)\n\nBoth have nearly identical code for:\n- Reading and parsing JSONL\n- Type-asserting store to *sqlite.SQLiteStorage (where we just fixed a bug twice)\n- Opening direct SQLite connection when using daemon mode\n- Detecting collisions with sqlite.DetectCollisions()\n- Scoring and remapping collisions\n- Importing issues, dependencies, and labels\n\n**Problems:**\n- Bugs must be fixed in two places (we just did this for daemon mode)\n- Features must be implemented twice\n- Tests must cover both code paths\n- Harder to maintain and keep in sync\n- Higher risk of divergence over time\n\n**Proposed solution:**\nExtract a shared function that handles the core import logic:\n\n```go\n// importIssues handles the core import logic used by both manual and auto-import\nfunc importIssues(ctx context.Context, dbPath string, store storage.Storage, \n issues []*types.Issue, opts ImportOptions) (*ImportResult, error) {\n // Handle SQLite store detection/creation for daemon mode\n // Detect collisions\n // Score and remap if needed\n // Import issues, dependencies, labels\n // Return result\n}\n```\n\nBoth import.go and autoImportIfNewer() would call this shared function with their specific options.\n\n**Benefits:**\n- Single source of truth for import logic\n- Bugs fixed once\n- Easier to test\n- Easier to extend with new import features\n- Less code overall","status":"closed","priority":2,"issue_type":"chore","created_at":"2025-10-18T17:07:06.007026-07:00","updated_at":"2025-10-18T18:35:11.753484-07:00","closed_at":"2025-10-18T17:11:20.280214-07:00"}
{"id":"bd-157","title":"Complete auto-import refactoring to use shared importIssuesCore function","description":"The manual import command (bd import) was successfully refactored to use the shared importIssuesCore() function in import_shared.go, reducing code from 494 lines to 170 lines.\n\nHowever, autoImportIfNewer() in cmd/bd/main.go still has ~298 lines of duplicated import logic that should use the same shared function.\n\n**Current state:**\n- ✅ Manual import uses importIssuesCore() (commit 790233f)\n- ❌ Auto-import still has duplicated logic (lines 618-915 in main.go)\n\n**Duplication includes:**\n- SQLite store detection/creation for daemon mode (fixed in 790233f)\n- Collision detection with sqlite.DetectCollisions()\n- Scoring and remapping collisions\n- Importing issues (update existing, create new)\n- Importing dependencies\n- Importing labels\n\n**Benefits of completing this:**\n- Remove ~200 more lines of duplicated code\n- Ensure manual and auto-import have identical behavior\n- Future bug fixes only need to be made once\n- Easier to test and maintain\n\n**Implementation:**\nReplace lines 714-908 in autoImportIfNewer() with:\n```go\nopts := ImportOptions{\n ResolveCollisions: true, // Auto-import always resolves\n DryRun: false,\n SkipUpdate: false,\n Strict: false,\n}\nresult, err := importIssuesCore(ctx, dbPath, store, allIssues, opts)\n// Handle result and show remapping notification\n```\n\nThen update hash storage logic at the end.","status":"closed","priority":2,"issue_type":"chore","created_at":"2025-10-18T17:38:34.443872-07:00","updated_at":"2025-10-18T18:35:11.754006-07:00","closed_at":"2025-10-18T18:07:05.553928-07:00"}
{"id":"bd-158","title":"Add .gitignore to prevent noisy untracked beads files","description":"When using beads, git status shows several untracked files in .beads/ directory: .beads/.gitignore, .beads/db.sqlite, daemon.pid and daemon.lock files. These should be added to the project's .gitignore to prevent noise.","status":"open","priority":2,"issue_type":"chore","created_at":"2025-10-18T18:27:16.424878-07:00","updated_at":"2025-10-18T18:35:11.754574-07:00"}
{"id":"bd-158","title":"Add .gitignore to prevent noisy untracked beads files","description":"When using beads, git status shows several untracked files in .beads/ directory: .beads/.gitignore, .beads/db.sqlite, daemon.pid and daemon.lock files. These should be added to the project's .gitignore to prevent noise.","status":"closed","priority":2,"issue_type":"chore","created_at":"2025-10-18T18:27:16.424878-07:00","updated_at":"2025-10-19T09:05:48.4899-07:00","closed_at":"2025-10-19T09:05:48.4899-07:00"}
{"id":"bd-159","title":"Implement --max-depth flag for bd dep tree","description":"PR #87 adds the flag but doesn't wire it through. Need to:\n1. Add flag definition in cmd/bd/dep.go\n2. Pass maxDepth to store.GetDependencyTree()\n3. Fix truncation warning to show actual depth used\n4. Add tests for truncation behavior (TestGetDependencyTree_TruncationDepth, TestGetDependencyTree_DefaultDepth)\n5. Update CLI docs/help\n\nDefault should remain 50. Keep using direct storage mode (no RPC needed for now).\n\nRelated: PR #87, bd-3","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-10-19T08:31:15.473267-07:00","updated_at":"2025-10-19T08:55:21.266386-07:00","closed_at":"2025-10-19T08:55:21.266386-07:00"}
{"id":"bd-16","title":"Add EXPLAIN QUERY PLAN tests for ready work query","description":"Verify that the hierarchical blocking query uses proper indexes and doesn't do full table scans.\n\n**Queries to analyze:**\n1. The recursive CTE (both base case and recursive case)\n2. The final SELECT with NOT EXISTS\n3. Impact of various filters (status, priority, assignee)\n\n**Implementation:**\nAdd test function that:\n- Runs EXPLAIN QUERY PLAN on GetReadyWork query\n- Parses output to verify no SCAN TABLE operations\n- Documents expected query plan in comments\n- Fails if query plan degrades\n\n**Benefits:**\n- Catch performance regressions in tests\n- Document expected query behavior\n- Ensure indexes are being used\n\nRelated to: bd-77 (composite index on depends_on_id, type)","status":"closed","priority":3,"issue_type":"task","created_at":"2025-10-16T20:46:08.971822-07:00","updated_at":"2025-10-18T18:35:11.755001-07:00","closed_at":"2025-10-18T12:47:44.284846-07:00"}
{"id":"bd-160","title":"Fix race condition in TestSocketCleanup","description":"Race condition detected in internal/rpc/rpc_test.go:195 in TestSocketCleanup. This is causing CI test failures.\n\nThe race appears to be between goroutines accessing shared state during server startup/shutdown in the socket cleanup test.\n\nLocation: internal/rpc/rpc_test.go:195\nTest output shows DATA RACE between goroutines 83 and 85.","status":"in_progress","priority":1,"issue_type":"bug","created_at":"2025-10-19T09:11:34.766584-07:00","updated_at":"2025-10-19T09:13:05.448924-07:00"}
{"id":"bd-17","title":"Make auto-flush debounce duration configurable","description":"flushDebounce is hardcoded to 5 seconds. Make it configurable via environment variable BEADS_FLUSH_DEBOUNCE (e.g., '500ms', '10s'). Current 5-second value is reasonable for interactive use, but CI/automated scenarios might want faster flush. Add getDebounceDuration() helper function. Located in cmd/bd/main.go:31.","status":"closed","priority":3,"issue_type":"feature","created_at":"2025-10-16T20:46:08.971822-07:00","updated_at":"2025-10-18T18:35:11.755588-07:00","closed_at":"2025-10-18T09:47:43.22126-07:00"}
{"id":"bd-18","title":"Optimize auto-flush to use incremental updates","description":"Every flush exports ALL issues and ALL dependencies, even if only one issue changed. For large projects (1000+ issues), this could be expensive. Current approach guarantees consistency, which is fine for MVP, but future optimization could track which issues changed and use incremental updates. Located in cmd/bd/main.go:255-276.","status":"closed","priority":3,"issue_type":"feature","created_at":"2025-10-16T20:46:08.971822-07:00","updated_at":"2025-10-18T18:35:11.755965-07:00","closed_at":"2025-10-14T02:51:52.200141-07:00"}
{"id":"bd-19","title":"Refactor duplicate flush logic in PersistentPostRun","description":"PersistentPostRun contains a complete copy of the flush logic instead of calling flushToJSONL(). This violates DRY principle and makes maintenance harder. Refactor to use flushToJSONL() with a force parameter to bypass isDirty check, or extract shared logic into a helper function. Located in cmd/bd/main.go:104-138.","status":"closed","priority":3,"issue_type":"task","created_at":"2025-10-16T20:46:08.971822-07:00","updated_at":"2025-10-18T18:35:11.756336-07:00","closed_at":"2025-10-18T09:44:24.167574-07:00"}

5
.gitignore vendored
View File

@@ -38,5 +38,10 @@ Thumbs.db
.beads/daemon.pid
.beads/bd.sock
# .beads directory files (keep JSONL only)
.beads/.gitignore
.beads/db.sqlite
.beads/bd.db
# Keep JSONL exports (source of truth for git)
!.beads/*.jsonl

View File

@@ -192,11 +192,32 @@ func TestSocketCleanup(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go server.Start(ctx)
time.Sleep(100 * time.Millisecond)
// Start server in goroutine
started := make(chan error, 1)
go func() {
err := server.Start(ctx)
if err != nil {
started <- err
}
}()
if _, err := os.Stat(socketPath); os.IsNotExist(err) {
t.Fatal("Socket file not created")
// Wait for socket to be created (with timeout)
timeout := time.After(5 * time.Second)
ticker := time.NewTicker(10 * time.Millisecond)
defer ticker.Stop()
socketReady := false
for !socketReady {
select {
case err := <-started:
t.Fatalf("Server failed to start: %v", err)
case <-timeout:
t.Fatal("Timeout waiting for socket creation")
case <-ticker.C:
if _, err := os.Stat(socketPath); err == nil {
socketReady = true
}
}
}
if err := server.Stop(); err != nil {

View File

@@ -94,23 +94,33 @@ func (s *Server) Start(ctx context.Context) error {
return fmt.Errorf("failed to remove old socket: %w", err)
}
var err error
s.listener, err = net.Listen("unix", s.socketPath)
listener, err := net.Listen("unix", s.socketPath)
if err != nil {
return fmt.Errorf("failed to listen on socket: %w", err)
}
// Set socket permissions to 0600 for security (owner only)
if err := os.Chmod(s.socketPath, 0600); err != nil {
s.listener.Close()
listener.Close()
return fmt.Errorf("failed to set socket permissions: %w", err)
}
// Store listener under lock
s.mu.Lock()
s.listener = listener
s.mu.Unlock()
go s.handleSignals()
go s.runCleanupLoop()
// Accept connections using listener
for {
conn, err := s.listener.Accept()
// Get listener under lock
s.mu.RLock()
listener := s.listener
s.mu.RUnlock()
conn, err := listener.Accept()
if err != nil {
s.mu.Lock()
shutdown := s.shutdown
@@ -152,8 +162,14 @@ func (s *Server) Stop() error {
}
}
if s.listener != nil {
if closeErr := s.listener.Close(); closeErr != nil {
// Close listener under lock
s.mu.Lock()
listener := s.listener
s.listener = nil
s.mu.Unlock()
if listener != nil {
if closeErr := listener.Close(); closeErr != nil {
err = fmt.Errorf("failed to close listener: %w", closeErr)
return
}