diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 5a15954b..b7049fb1 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -36,7 +36,7 @@ {"id":"bd-130","title":"Add resource limits to daemon (connections, cache, memory)","description":"Daemon has no resource limits. Under heavy load or attack, it could:\n- Accept unlimited connections\n- Cache unlimited databases\n- Use unbounded memory\n- Exhaust file descriptors\n\nNeed limits for:\n- Max concurrent RPC connections (default: 100)\n- Max storage cache size (default: 50)\n- Request timeout enforcement (default: 30s)\n- Memory pressure detection\n\nLocation: internal/rpc/server.go","design":"Add resource tracking to Server:\n\ntype Server struct {\n // ... existing\n maxConns int32\n activeConns int32 // atomic\n connSemaphore chan struct{}\n}\n\nUse semaphore pattern for connection limiting:\n- Acquire token before handling connection\n- Release on completion\n- Reject connections when full\n\nAdd configurable limits via env vars:\n- BEADS_DAEMON_MAX_CONNS (default: 100)\n- BEADS_DAEMON_MAX_CACHE_SIZE (default: 50)\n- BEADS_DAEMON_REQUEST_TIMEOUT (default: 30s)\n\nAdd memory pressure detection:\n- Monitor runtime.MemStats\n- Trigger cache eviction at threshold\n- Log warnings at high memory use","acceptance_criteria":"- Connection limit enforced\n- Excess connections rejected gracefully\n- Request timeouts work\n- Memory limits configurable\n- Metrics expose current usage\n- Tests for limit enforcement\n- Documentation on tuning limits","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-10-18T13:07:09.810963-07:00","updated_at":"2025-10-19T19:35:42.292784-07:00","closed_at":"2025-10-19T13:21:47.891925-07:00"} {"id":"bd-131","title":"Add telemetry and observability to daemon","description":"Daemon has no metrics or observability. Cannot monitor:\n- Request latency (p50, p95, p99)\n- Cache hit/miss rates\n- Active connections\n- Error rates\n- Resource usage over time\n\nNeeded for:\n- Performance debugging\n- Capacity planning\n- Production monitoring\n- SLA tracking\n\nLocation: internal/rpc/server.go","design":"Add metrics collection to daemon:\n\n1. Request metrics:\n - Total requests by operation\n - Latency histogram\n - Error count by type\n\n2. Cache metrics:\n - Hit/miss ratio\n - Eviction count\n - Current size\n\n3. Connection metrics:\n - Active connections\n - Total connections\n - Rejected connections\n\n4. Resource metrics:\n - Memory usage\n - Goroutine count\n - File descriptor count\n\nAdd metrics endpoint:\n- bd daemon --metrics (JSON output)\n- OpMetrics RPC operation\n- Prometheus-compatible format option\n\nAdd to health check response for free monitoring.","acceptance_criteria":"- Metrics collected for key operations\n- bd daemon --metrics command works\n- Metrics include timestamps\n- Latency percentiles calculated\n- Zero performance overhead\n- Documentation on metrics","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-10-18T13:07:19.835495-07:00","updated_at":"2025-10-19T19:35:42.292947-07:00","closed_at":"2025-10-19T14:58:41.155435-07:00"} {"id":"bd-132","title":"Add log rotation for daemon.log","description":"daemon.log grows forever without rotation. With sync every 5 minutes:\n- ~105k log entries per year\n- No size limit\n- No cleanup\n- Eventually fills disk\n\nNeed automatic log rotation with:\n- Size-based rotation (default: 10MB)\n- Age-based cleanup (default: 7 days)\n- Compression of old logs\n- Configurable retention\n\nLocation: cmd/bd/daemon.go:455","design":"Use lumberjack library for rotation:\n\nimport \"gopkg.in/natefinch/lumberjack.v2\"\n\nlogF := \u0026lumberjack.Logger{\n Filename: logPath,\n MaxSize: 10, // MB\n MaxBackups: 3,\n MaxAge: 7, // days\n Compress: true,\n}\n\nMake configurable via env vars:\n- BEADS_DAEMON_LOG_MAX_SIZE (default: 10MB)\n- BEADS_DAEMON_LOG_MAX_BACKUPS (default: 3)\n- BEADS_DAEMON_LOG_MAX_AGE (default: 7 days)\n\nAdd to daemon status output:\n- Current log size\n- Number of archived logs\n- Oldest log timestamp","acceptance_criteria":"- Log rotation works automatically\n- Old logs are compressed\n- Retention policy enforced\n- Configuration via env vars works\n- Log size stays bounded\n- No log data loss during rotation\n- Documentation updated","status":"closed","priority":1,"issue_type":"feature","created_at":"2025-10-18T13:07:30.94896-07:00","updated_at":"2025-10-19T19:35:42.293129-07:00","closed_at":"2025-10-18T16:27:51.349037-07:00"} -{"id":"bd-133","title":"Daemon production readiness","description":"Make beads daemon production-ready for long-running use, multi-repo deployments, and resilient operation.\n\nCurrent state: Good foundation, works well for development\nTarget state: Production-ready for individual developers and small teams\n\nGap areas:\n1. Resource management (cache eviction, limits)\n2. Health monitoring and crash recovery\n3. Process lifecycle management\n4. User experience (visibility, feedback)\n5. Operational concerns (logging, metrics)\n\nSuccess criteria:\n- Can run for weeks without restart\n- Handles 50+ repositories efficiently\n- Recovers from crashes automatically\n- Users understand daemon status\n- Observable and debuggable","acceptance_criteria":"All child issues completed:\n- P0 issues: Storage cache, health checks, crash recovery, MCP cleanup\n- P1 issues: Global auto-start, visibility, version checks\n- P2 issues: Resource limits, telemetry, log rotation\n\nValidation:\n- Run daemon for 7+ days without issues\n- Test with 50+ repositories\n- Verify crash recovery\n- Confirm resource usage is bounded\n- Check metrics and logs are useful","status":"in_progress","priority":0,"issue_type":"epic","created_at":"2025-10-18T13:07:43.543715-07:00","updated_at":"2025-10-19T19:35:42.293282-07:00"} +{"id":"bd-133","title":"Daemon production readiness","description":"Make beads daemon production-ready for long-running use, multi-repo deployments, and resilient operation.\n\nCurrent state: Good foundation, works well for development\nTarget state: Production-ready for individual developers and small teams\n\nGap areas:\n1. Resource management (cache eviction, limits)\n2. Health monitoring and crash recovery\n3. Process lifecycle management\n4. User experience (visibility, feedback)\n5. Operational concerns (logging, metrics)\n\nSuccess criteria:\n- Can run for weeks without restart\n- Handles 50+ repositories efficiently\n- Recovers from crashes automatically\n- Users understand daemon status\n- Observable and debuggable","acceptance_criteria":"All child issues completed:\n- P0 issues: Storage cache, health checks, crash recovery, MCP cleanup\n- P1 issues: Global auto-start, visibility, version checks\n- P2 issues: Resource limits, telemetry, log rotation\n\nValidation:\n- Run daemon for 7+ days without issues\n- Test with 50+ repositories\n- Verify crash recovery\n- Confirm resource usage is bounded\n- Check metrics and logs are useful","status":"closed","priority":0,"issue_type":"epic","created_at":"2025-10-18T13:07:43.543715-07:00","updated_at":"2025-10-19T20:15:20.172178-07:00","closed_at":"2025-10-19T20:15:20.172178-07:00"} {"id":"bd-134","title":"Refactor import logic to eliminate duplication between manual and auto-import","description":"The import logic is duplicated in two places:\n1. cmd/bd/import.go (manual 'bd import' command)\n2. cmd/bd/main.go:autoImportIfNewer() (auto-import after git pull)\n\nBoth have nearly identical code for:\n- Reading and parsing JSONL\n- Type-asserting store to *sqlite.SQLiteStorage (where we just fixed a bug twice)\n- Opening direct SQLite connection when using daemon mode\n- Detecting collisions with sqlite.DetectCollisions()\n- Scoring and remapping collisions\n- Importing issues, dependencies, and labels\n\n**Problems:**\n- Bugs must be fixed in two places (we just did this for daemon mode)\n- Features must be implemented twice\n- Tests must cover both code paths\n- Harder to maintain and keep in sync\n- Higher risk of divergence over time\n\n**Proposed solution:**\nExtract a shared function that handles the core import logic:\n\n```go\n// importIssues handles the core import logic used by both manual and auto-import\nfunc importIssues(ctx context.Context, dbPath string, store storage.Storage, \n issues []*types.Issue, opts ImportOptions) (*ImportResult, error) {\n // Handle SQLite store detection/creation for daemon mode\n // Detect collisions\n // Score and remap if needed\n // Import issues, dependencies, labels\n // Return result\n}\n```\n\nBoth import.go and autoImportIfNewer() would call this shared function with their specific options.\n\n**Benefits:**\n- Single source of truth for import logic\n- Bugs fixed once\n- Easier to test\n- Easier to extend with new import features\n- Less code overall","status":"closed","priority":2,"issue_type":"chore","created_at":"2025-10-18T17:07:06.007026-07:00","updated_at":"2025-10-19T19:35:42.293447-07:00","closed_at":"2025-10-18T17:11:20.280214-07:00"} {"id":"bd-135","title":"Complete auto-import refactoring to use shared importIssuesCore function","description":"The manual import command (bd import) was successfully refactored to use the shared importIssuesCore() function in import_shared.go, reducing code from 494 lines to 170 lines.\n\nHowever, autoImportIfNewer() in cmd/bd/main.go still has ~298 lines of duplicated import logic that should use the same shared function.\n\n**Current state:**\n- ✅ Manual import uses importIssuesCore() (commit 790233f)\n- ❌ Auto-import still has duplicated logic (lines 618-915 in main.go)\n\n**Duplication includes:**\n- SQLite store detection/creation for daemon mode (fixed in 790233f)\n- Collision detection with sqlite.DetectCollisions()\n- Scoring and remapping collisions\n- Importing issues (update existing, create new)\n- Importing dependencies\n- Importing labels\n\n**Benefits of completing this:**\n- Remove ~200 more lines of duplicated code\n- Ensure manual and auto-import have identical behavior\n- Future bug fixes only need to be made once\n- Easier to test and maintain\n\n**Implementation:**\nReplace lines 714-908 in autoImportIfNewer() with:\n```go\nopts := ImportOptions{\n ResolveCollisions: true, // Auto-import always resolves\n DryRun: false,\n SkipUpdate: false,\n Strict: false,\n}\nresult, err := importIssuesCore(ctx, dbPath, store, allIssues, opts)\n// Handle result and show remapping notification\n```\n\nThen update hash storage logic at the end.","status":"closed","priority":2,"issue_type":"chore","created_at":"2025-10-18T17:38:34.443872-07:00","updated_at":"2025-10-19T19:35:42.293614-07:00","closed_at":"2025-10-18T18:07:05.553928-07:00"} {"id":"bd-136","title":"Add .gitignore to prevent noisy untracked beads files","description":"When using beads, git status shows several untracked files in .beads/ directory: .beads/.gitignore, .beads/db.sqlite, daemon.pid and daemon.lock files. These should be added to the project's .gitignore to prevent noise.","status":"closed","priority":2,"issue_type":"chore","created_at":"2025-10-18T18:27:16.424878-07:00","updated_at":"2025-10-19T19:35:42.293771-07:00","closed_at":"2025-10-19T09:05:48.4899-07:00"} @@ -48,8 +48,25 @@ {"id":"bd-141","title":"Add daemon RPC support for comments and label subcommands","description":"The 'bd comments' and 'bd label' subcommands don't work in direct mode because they don't inherit PersistentPreRun from root command. Need to add daemon RPC handlers similar to how show/update/create work.\n\nAffected commands:\n- bd comments \u003cid\u003e\n- bd comments add \u003cid\u003e \"text\"\n- bd label list \u003cid\u003e\n- bd label add \u003cid\u003e \u003clabel\u003e\n- bd label remove \u003cid\u003e \u003clabel\u003e\n\nSolution: Add RPC handlers in daemon.go for these operations and update the CLI commands to use daemon RPC when available (check daemonClient != nil pattern used in other commands).","status":"in_progress","priority":2,"issue_type":"bug","created_at":"2025-10-19T16:08:42.16553-07:00","updated_at":"2025-10-19T19:35:42.294565-07:00"} {"id":"bd-142","title":"MCP server workspace routing broken - using wrong server for workspace","description":"When working in ~/src/beads, AI agent is calling mcp__beads-wyvern__* functions which are configured for ~/wyvern workspace. This causes MCP commands to fail or operate on wrong database.\n\nExpected: Should use correct MCP server based on current workspace\nActual: Using beads-wyvern MCP server when in beads repo\n\nNeed to investigate:\n- How MCP server routing/selection works\n- Why wrong server is being selected\n- How to fix workspace detection","notes":"Root cause: Using multiple MCP servers (beads-adar, beads-wyvern, beads-vc, beads) instead of single MCP server with global daemon. AI randomly selects wrong server for workspace.\n\nFixed:\n1. Started global daemon: bd daemon --global\n2. Simplified config to single MCP server in ~/.config/amp/settings.json\n3. Updated AGENTS.md to emphasize single MCP server as RECOMMENDED approach\n4. Marked legacy multiple-server approach with warning about workspace routing issues\n\nUser needs to restart Amp for config changes to take effect.","status":"closed","priority":0,"issue_type":"bug","created_at":"2025-10-19T18:32:04.513755-07:00","updated_at":"2025-10-19T19:35:42.294718-07:00","closed_at":"2025-10-19T18:35:00.167234-07:00"} {"id":"bd-143","title":"Renumber command fails with foreign key constraint error","description":"When running 'bd renumber --force' after deleting issues, the command fails with: 'failed to rename bd-73 to temp ID: failed to update issue ID: constraint failed: FOREIGN KEY constraint failed (787)'. This suggests the renumber implementation doesn't properly handle foreign key constraints during the ID swap process. May need to disable foreign keys temporarily or use a different renumbering strategy.","notes":"Deeper investigation: All child tables (dependencies, labels, events, dirty_issues, issue_snapshots, compaction_snapshots, comments) have FK constraints to issues(id) ON DELETE CASCADE. When renumbering tries to UPDATE issues SET id = temp-uuid WHERE id = bd-32, the FK checks fire immediately despite PRAGMA foreign_keys = OFF being called. Issue might be that Go sql.DB connection pooling means the PRAGMA isn't applied to the actual connection doing the UPDATE. Testing with explicit connection (s.db.Conn()) to ensure PRAGMA sticks.","status":"closed","priority":1,"issue_type":"bug","created_at":"2025-10-19T19:33:20.324768-07:00","updated_at":"2025-10-19T19:35:57.175627-07:00","closed_at":"2025-10-19T19:35:57.175627-07:00"} +{"id":"bd-144","title":"Concurrent test issue 2","description":"","status":"open","priority":1,"issue_type":"task","created_at":"2025-10-19T19:51:16.598972-07:00","updated_at":"2025-10-19T19:51:16.598972-07:00"} +{"id":"bd-145","title":"Concurrent test issue 1","description":"","status":"open","priority":1,"issue_type":"task","created_at":"2025-10-19T19:51:16.670081-07:00","updated_at":"2025-10-19T19:51:16.670081-07:00"} +{"id":"bd-146","title":"Single-user issue","description":"","status":"open","priority":2,"issue_type":"task","created_at":"2025-10-19T19:51:36.918878-07:00","updated_at":"2025-10-19T19:51:36.918878-07:00"} +{"id":"bd-147","title":"Direct mode test","description":"","status":"open","priority":2,"issue_type":"task","created_at":"2025-10-19T19:52:19.768858-07:00","updated_at":"2025-10-19T19:52:19.768858-07:00"} +{"id":"bd-148","title":"Direct mode test2","description":"","status":"open","priority":2,"issue_type":"task","created_at":"2025-10-19T19:52:25.87028-07:00","updated_at":"2025-10-19T19:52:25.87028-07:00"} +{"id":"bd-149","title":"Direct mode real test","description":"","status":"open","priority":2,"issue_type":"task","created_at":"2025-10-19T19:52:31.996122-07:00","updated_at":"2025-10-19T19:52:31.996122-07:00"} {"id":"bd-15","title":"Implement full cross-type cycle prevention in AddDependency","description":"Expand cycle prevention in AddDependency to check for cycles across ALL dependency types, not just 'blocks'. Currently only 'blocks' type dependencies are checked for cycles, allowing cross-type circular dependencies to form (e.g., A blocks B, B parent-child A). This can cause semantic confusion and is a maintenance hazard for future operations that traverse dependencies.","design":"Implementation approach:\n1. Modify the cycle check in AddDependency (postgres.go:559-599)\n2. Remove the 'type = blocks' filter from the recursive CTE\n3. Check for cycles regardless of dependency type being added\n4. Return a clear error message indicating which types form the cycle\n\nTrade-offs to consider:\n- This is more mathematically correct (no cycles in dependency DAG)\n- May break legitimate use cases where cross-type cycles are intentional\n- Need to evaluate whether ANY cross-type cycles are valid in practice\n- Alternative: make this configurable with a --allow-cycle flag\n\nBefore implementing, should investigate:\n- Are there legitimate reasons for cross-type cycles?\n- What's the performance impact on large graphs (1000+ issues)?\n- Should certain type combinations be allowed to cycle?","acceptance_criteria":"- AddDependency prevents cycles across all dependency types, not just 'blocks'\n- Clear error message when cycle would be created, including dependency types\n- All existing tests pass\n- Performance benchmarked on large dependency graphs (100+ issues)\n- Decision documented on whether to add --allow-cycle flag or exception rules","status":"closed","priority":3,"issue_type":"task","created_at":"2025-10-16T20:46:08.971822-07:00","updated_at":"2025-10-19T19:35:42.272629-07:00","closed_at":"2025-10-16T20:31:19.174534-07:00"} +{"id":"bd-150","title":"Direct mode test clean","description":"","status":"open","priority":2,"issue_type":"task","created_at":"2025-10-19T19:52:56.4305-07:00","updated_at":"2025-10-19T19:52:56.4305-07:00"} +{"id":"bd-151","title":"Multi-repo daemon routing fails in testing","description":"Test 4 from DAEMON_ARCHITECTURE.md showed daemon starts but doesn't route requests correctly to multi-repo databases. When running bd list in repo A after creating an issue, it shows 0 issues. Debug output shows 'daemon socket not ready after 5 seconds' despite daemon process running.","status":"closed","priority":1,"issue_type":"bug","created_at":"2025-10-19T20:01:18.977976-07:00","updated_at":"2025-10-19T20:07:40.203797-07:00","closed_at":"2025-10-19T20:07:40.203797-07:00"} +{"id":"bd-152","title":"Daemon stop has race condition with SIGKILL","description":"When stopping daemon, sometimes see 'Warning: daemon did not stop after 5 seconds, sending SIGKILL' followed by 'Error killing process: os: process already finished'. Indicates timing issue where process exits between check and kill.","status":"open","priority":2,"issue_type":"bug","created_at":"2025-10-19T20:01:18.979589-07:00","updated_at":"2025-10-19T20:01:18.979589-07:00"} +{"id":"bd-153","title":"Implement storage cache eviction and memory limits","description":"Implement LRU cache eviction and memory pressure detection for daemon storage cache to prevent unbounded memory growth.","status":"closed","priority":0,"issue_type":"task","created_at":"2025-10-19T20:13:59.761129-07:00","updated_at":"2025-10-19T20:14:24.888631-07:00","closed_at":"2025-10-19T20:14:24.888631-07:00"} +{"id":"bd-154","title":"Clean up MCP integration for daemon reliability","description":"Ensure MCP server properly handles daemon lifecycle, connection failures, and recovery scenarios.","status":"closed","priority":0,"issue_type":"task","created_at":"2025-10-19T20:13:59.764086-07:00","updated_at":"2025-10-19T20:15:11.006871-07:00","closed_at":"2025-10-19T20:15:11.006871-07:00"} +{"id":"bd-155","title":"Add daemon health checks and monitoring","description":"Add health check endpoint that validates daemon state, storage connections, and resource usage.","status":"closed","priority":0,"issue_type":"task","created_at":"2025-10-19T20:13:59.764154-07:00","updated_at":"2025-10-19T20:14:38.291678-07:00","closed_at":"2025-10-19T20:14:38.291678-07:00"} +{"id":"bd-156","title":"Implement daemon crash recovery and restart","description":"Detect stale daemon processes, clean up orphaned resources, and auto-restart daemon after crashes.","status":"closed","priority":0,"issue_type":"task","created_at":"2025-10-19T20:13:59.766856-07:00","updated_at":"2025-10-19T20:15:02.211576-07:00","closed_at":"2025-10-19T20:15:02.211576-07:00"} +{"id":"bd-157","title":"Implement resource limits and connection pooling","description":"Add configurable limits for connections, file descriptors, and memory usage.","status":"open","priority":2,"issue_type":"task","created_at":"2025-10-19T20:13:59.768745-07:00","updated_at":"2025-10-19T20:13:59.768745-07:00"} +{"id":"bd-158","title":"Add daemon telemetry and metrics","description":"Track and report daemon metrics: request count, latency, cache hits/misses, error rates.","status":"open","priority":2,"issue_type":"task","created_at":"2025-10-19T20:13:59.77094-07:00","updated_at":"2025-10-19T20:13:59.77094-07:00"} +{"id":"bd-159","title":"Improve daemon visibility and status reporting","description":"Add clear daemon status output, uptime reporting, and user-friendly error messages.","status":"open","priority":1,"issue_type":"task","created_at":"2025-10-19T20:13:59.772435-07:00","updated_at":"2025-10-19T20:13:59.772435-07:00"} {"id":"bd-16","title":"Refactor duplicate flush logic in PersistentPostRun","description":"PersistentPostRun contains a complete copy of the flush logic instead of calling flushToJSONL(). This violates DRY principle and makes maintenance harder. Refactor to use flushToJSONL() with a force parameter to bypass isDirty check, or extract shared logic into a helper function. Located in cmd/bd/main.go:104-138.","status":"closed","priority":3,"issue_type":"task","created_at":"2025-10-16T20:46:08.971822-07:00","updated_at":"2025-10-19T19:35:42.2728-07:00","closed_at":"2025-10-18T09:44:24.167574-07:00"} +{"id":"bd-160","title":"Add daemon/client version compatibility checks","description":"Verify daemon and client versions are compatible before allowing connections.","status":"open","priority":1,"issue_type":"task","created_at":"2025-10-19T20:13:59.774304-07:00","updated_at":"2025-10-19T20:13:59.774304-07:00"} {"id":"bd-17","title":"Test issue with explicit ID","description":"","status":"closed","priority":1,"issue_type":"task","created_at":"2025-10-16T20:46:08.971822-07:00","updated_at":"2025-10-19T19:35:42.273919-07:00","closed_at":"2025-10-16T10:07:34.124331-07:00"} {"id":"bd-18","title":"Critical bug","description":"","status":"closed","priority":0,"issue_type":"bug","created_at":"2025-10-16T20:46:08.971822-07:00","updated_at":"2025-10-19T19:35:42.274086-07:00","closed_at":"2025-10-14T14:16:08.107546-07:00"} {"id":"bd-19","title":"Verify auto-export works","description":"","status":"closed","priority":0,"issue_type":"task","created_at":"2025-10-16T20:46:08.971822-07:00","updated_at":"2025-10-19T19:35:42.274255-07:00","closed_at":"2025-10-14T14:16:09.268591-07:00"} diff --git a/cmd/bd/daemon.go b/cmd/bd/daemon.go index 17569152..41d97417 100644 --- a/cmd/bd/daemon.go +++ b/cmd/bd/daemon.go @@ -816,13 +816,15 @@ func runDaemonLoop(interval time.Duration, autoCommit, autoPush bool, logPath, p } }() - // Wait for server to start or fail + // Wait for server to be ready or fail select { case err := <-serverErrChan: log("RPC server failed to start: %v", err) os.Exit(1) - case <-time.After(2 * time.Second): - log("Global RPC server started") + case <-server.WaitReady(): + log("Global RPC server ready (socket listening)") + case <-time.After(5 * time.Second): + log("WARNING: Server didn't signal ready after 5 seconds (may still be starting)") } // Wait for shutdown signal @@ -883,14 +885,15 @@ func runDaemonLoop(interval time.Duration, autoCommit, autoPush bool, logPath, p } }() - // Wait for server to start or fail + // Wait for server to be ready or fail select { case err := <-serverErrChan: log("RPC server failed to start: %v", err) os.Exit(1) - case <-time.After(2 * time.Second): - // If no error after 2 seconds, assume success - log("RPC server started") + case <-server.WaitReady(): + log("RPC server ready (socket listening)") + case <-time.After(5 * time.Second): + log("WARNING: Server didn't signal ready after 5 seconds (may still be starting)") } sigChan := make(chan os.Signal, 1) diff --git a/internal/rpc/server.go b/internal/rpc/server.go index 5a867cdd..6a8c98dd 100644 --- a/internal/rpc/server.go +++ b/internal/rpc/server.go @@ -60,6 +60,8 @@ type Server struct { connSemaphore chan struct{} // Request timeout requestTimeout time.Duration + // Ready channel signals when server is listening + readyChan chan struct{} } // NewServer creates a new RPC server @@ -108,6 +110,7 @@ func NewServer(socketPath string, store storage.Storage) *Server { maxConns: maxConns, connSemaphore: make(chan struct{}, maxConns), requestTimeout: requestTimeout, + readyChan: make(chan struct{}), } } @@ -137,6 +140,9 @@ func (s *Server) Start(ctx context.Context) error { s.listener = listener s.mu.Unlock() + // Signal that server is ready to accept connections + close(s.readyChan) + go s.handleSignals() go s.runCleanupLoop() @@ -177,6 +183,11 @@ func (s *Server) Start(ctx context.Context) error { } } +// WaitReady waits for the server to be ready to accept connections +func (s *Server) WaitReady() <-chan struct{} { + return s.readyChan +} + // Stop stops the RPC server and cleans up resources func (s *Server) Stop() error { var err error