From 744563e87ff2bcbf9396c4828a72ed2e8754dfca Mon Sep 17 00:00:00 2001 From: Steve Yegge Date: Sat, 25 Oct 2025 17:39:21 -0700 Subject: [PATCH] Add health checks and reconnection logic for stale daemon sockets (bd-137) - Add ping() and health() methods to BdDaemonClient for connection verification - Implement _health_check_client() to verify cached client connections - Add _reconnect_client() with exponential backoff (0.1s, 0.2s, 0.4s, max 3 retries) - Update _get_client() to health-check before returning cached clients - Automatically detect and remove stale connections from pool - Add comprehensive test suite with 14 tests covering all scenarios - Handle daemon restarts, upgrades, and long-idle connections gracefully Amp-Thread-ID: https://ampcode.com/threads/T-2366ef1b-389c-4293-8145-7613037c9dfa Co-authored-by: Amp --- .beads/beads.jsonl | 2 +- .../src/beads_mcp/bd_daemon_client.py | 23 ++ integrations/beads-mcp/src/beads_mcp/tools.py | 80 ++++- .../tests/test_daemon_health_check.py | 304 ++++++++++++++++++ 4 files changed, 405 insertions(+), 4 deletions(-) create mode 100644 integrations/beads-mcp/tests/test_daemon_health_check.py diff --git a/.beads/beads.jsonl b/.beads/beads.jsonl index 1d3102a2..2b74175d 100644 --- a/.beads/beads.jsonl +++ b/.beads/beads.jsonl @@ -40,7 +40,7 @@ {"id":"bd-134","title":"Address gosec security warnings (102 issues)","description":"Security linter warnings: file permissions (0755 should be 0750), G304 file inclusion via variable, G204 subprocess launches. Many are false positives but should be reviewed.","design":"Review each gosec warning. Add exclusions for legitimate cases to .golangci.yml. Fix real security issues (overly permissive file modes).","status":"open","priority":2,"issue_type":"task","created_at":"2025-10-25T13:47:10.719134-07:00","updated_at":"2025-10-25T13:47:10.719134-07:00"} {"id":"bd-135","title":"Multi-project MCP context switching (GH#145)","description":"Enable MCP server to manage multiple beads projects in a single session with per-request workspace_root parameter.\n\nCurrent bug: set_context(workspace_root) doesn't actually switch sockets - all operations hit first initialized socket.\n\nUse case: Managing tasks across multiple organizations with different permission models (internal, partners, open source).\n\nArchitecture: Connection pool keyed by workspace_root, each maintaining its own daemon socket connection. Request-scoped routing using ContextVar to avoid global state races.\n\nSee GH#145 for full requirements and user context.","design":"✅ APPROVED WITH MODIFICATIONS by architectural review (2025-10-25)\n\nLSP-style model is correct: Single MCP server → per-project daemons → isolated databases.\n\nCRITICAL CHANGES from review:\n1. **Simplify connection pool**: No LRU eviction initially (typical user: 2-5 projects)\n2. **Add asyncio.Lock**: Prevent race conditions in pool access\n3. **Defer health checks**: Only retry on failure, not preemptive pings\n4. **Handle submodules**: Check local .beads BEFORE git toplevel\n5. **Path canonicalization**: realpath + git toplevel with caching\n\nRISKS MITIGATED:\n- Global _client bug: Replace with connection pool keyed by canonical path\n- Race conditions: Add asyncio.Lock for pool mutations\n- Submodule edge case: Check .beads directory first\n- Stale sockets: Retry once on connection failure\n\nEstimated effort: 2.5-3.5 days (simplified from 2.5-4.5 days)\nConfidence: 8/10","acceptance_criteria":"- Multiple projects can be accessed in single MCP session\n- Per-request workspace_root parameter works on all tools\n- No cross-project data leakage\n- Concurrent calls to different projects work correctly\n- Stale sockets auto-reconnect with retry/backoff\n- Integration tests verify isolation across 2+ temp repos\n- set_context() still works as default fallback","notes":"Review doc: docs/bd-135-architectural-review.md\n\nImplementation order:\n1. bd-139 (connection manager) - Foundation with pool + lock\n2. bd-136 (ContextVar routing) - Per-request workspace\n3. bd-138 (require_context) - Validation\n4. bd-140 (tests) - Concurrency + edge cases\n5. bd-141 (docs) - Usage guide\n6. bd-142 (health checks) - DEFERRED to Phase 2","status":"closed","priority":1,"issue_type":"epic","assignee":"amp","created_at":"2025-10-25T13:59:57.231937-07:00","updated_at":"2025-10-25T14:36:02.046142-07:00","closed_at":"2025-10-25T14:36:02.046142-07:00"} {"id":"bd-136","title":"Implement request-scoped routing with ContextVar","description":"Add ContextVar-based routing to avoid global state races during concurrent multi-project calls.\n\nApproach:\n- Define current_workspace: ContextVar[str|None] in server.py\n- Add @with_workspace decorator that resolves workspace_root (via _resolve_workspace_root + realpath)\n- Set ContextVar for duration of tool call, reset after\n- Falls back to set_context default (BEADS_WORKING_DIR) if workspace_root not provided\n- beads_mcp.tools.get_client() reads current_workspace from ContextVar\n\nBlocks: bd-135 (connection manager must exist first)","design":"Decorator pattern with ContextVar for request-scoped workspace routing.\n\n@with_workspace decorator:\n- Extract workspace_root parameter from tool call\n- Resolve via _resolve_workspace_root + realpath\n- Set current_workspace ContextVar for request duration\n- Falls back to BEADS_WORKING_DIR if workspace_root not provided\n- Reset ContextVar after tool completes\n\nApplied to all tools in server.py. _get_client() reads current_workspace.\n\n⚠️ CONCURRENCY GOTCHA (from architectural review):\n- ContextVar doesn't propagate to asyncio.create_task() spawned tasks\n- SOLUTION: Keep tool calls synchronous, no background task spawning\n- If background tasks needed: use contextvars.copy_context()\n\nDocument this limitation in bd-141.","notes":"Blocks on bd-139 (connection pool must exist first).\n\nCRITICAL: Do NOT spawn background tasks within tool implementations.\nContextVar propagation to spawned tasks is unreliable.","status":"closed","priority":1,"issue_type":"task","assignee":"amp","created_at":"2025-10-25T14:00:27.895512-07:00","updated_at":"2025-10-25T14:32:36.531658-07:00","closed_at":"2025-10-25T14:32:36.531658-07:00","dependencies":[{"issue_id":"bd-136","depends_on_id":"bd-135","type":"parent-child","created_at":"2025-10-25T14:00:27.896366-07:00","created_by":"daemon"}]} -{"id":"bd-137","title":"Add health checks and reconnection logic for stale sockets","description":"Handle stale/broken socket connections after daemon restarts or upgrades.\n\nFeatures:\n- ping/health_check method on client\n- Check before use or periodic health check\n- On failure: drop from pool, attempt reconnect with exponential backoff\n- Clear error propagation if reconnect fails after retries\n- Handle version mismatch after daemon upgrade\n- Handle long-idle connections closed by daemon","design":"Add async ping() to DaemonClient. ConnectionManager.get_client() pings before returning cached client. On failure, del from pool and retry connect. Bounded retry (3-5 attempts) with backoff.","status":"open","priority":1,"issue_type":"task","created_at":"2025-10-25T14:00:27.8967-07:00","updated_at":"2025-10-25T14:00:27.8967-07:00"} +{"id":"bd-137","title":"Add health checks and reconnection logic for stale sockets","description":"Handle stale/broken socket connections after daemon restarts or upgrades.\n\nFeatures:\n- ping/health_check method on client\n- Check before use or periodic health check\n- On failure: drop from pool, attempt reconnect with exponential backoff\n- Clear error propagation if reconnect fails after retries\n- Handle version mismatch after daemon upgrade\n- Handle long-idle connections closed by daemon","design":"Add async ping() to DaemonClient. ConnectionManager.get_client() pings before returning cached client. On failure, del from pool and retry connect. Bounded retry (3-5 attempts) with backoff.","status":"in_progress","priority":1,"issue_type":"task","created_at":"2025-10-25T14:00:27.8967-07:00","updated_at":"2025-10-25T17:28:05.384177-07:00"} {"id":"bd-138","title":"Fix require_context to support per-request workspace_root","description":"Update require_context decorator to pass if either:\n- workspace_root was provided on the tool call (via ContextVar), OR\n- BEADS_WORKING_DIR is set (from set_context)\n\nStop using BEADS_DB as router - treat it only as CLI fallback.\n\nEnsures backward compatibility with set_context() while supporting new per-request routing.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-10-25T14:00:27.896646-07:00","updated_at":"2025-10-25T14:32:36.533618-07:00","closed_at":"2025-10-25T14:32:36.533618-07:00","dependencies":[{"issue_id":"bd-138","depends_on_id":"bd-135","type":"parent-child","created_at":"2025-10-25T14:00:27.89804-07:00","created_by":"daemon"}]} {"id":"bd-139","title":"Add connection manager for per-project daemon sockets","description":"Create connection pool for managing multiple daemon socket connections in beads_mcp.tools.\n\nSIMPLIFIED APPROACH (per architectural review):\n- Connection pool dict[str, BdClientBase] keyed by canonical path\n- Add asyncio.Lock for thread-safe pool access (CRITICAL)\n- Path canonicalization with @lru_cache for performance\n- NO LRU eviction (start simple, add if monitoring shows memory issues)\n- NO preemptive health checks (only retry on failure)\n\nFeatures:\n- get_client(workspace_root) → returns client for that project\n- Canonicalization: realpath + git toplevel (handles symlinks)\n- Submodule-aware: Check local .beads BEFORE git toplevel\n- Auto-start daemon if socket missing (existing behavior)\n- Validate version on connect (existing behavior)\n\nCRITICAL: Replace global _client singleton with pool to fix set_context() bug.","design":"Implementation in tools.py:\n\n```python\nfrom contextvars import ContextVar\nimport asyncio\nfrom functools import lru_cache\n\ncurrent_workspace: ContextVar[str | None] = ContextVar('workspace', default=None)\n_connection_pool: Dict[str, BdClientBase] = {}\n_pool_lock = asyncio.Lock() # Prevent race conditions\n\n@lru_cache(maxsize=128)\ndef _canonicalize_path(path: str) -\u003e str:\n # 1. realpath to resolve symlinks\n real = os.path.realpath(path)\n \n # 2. Check local .beads (submodule edge case)\n if os.path.exists(os.path.join(real, \".beads\")):\n return real\n \n # 3. Try git toplevel\n return _resolve_workspace_root(real)\n\nasync def _get_client() -\u003e BdClientBase:\n workspace = current_workspace.get() or os.environ.get(\"BEADS_WORKING_DIR\")\n if not workspace:\n raise BdError(\"No workspace set\")\n \n canonical = _canonicalize_path(workspace)\n \n async with _pool_lock: # CRITICAL: prevent concurrent mutations\n if canonical not in _connection_pool:\n _connection_pool[canonical] = create_bd_client(\n prefer_daemon=True,\n working_dir=canonical\n )\n return _connection_pool[canonical]\n```\n\n~200 LOC total (simplified from ~400-500)","status":"closed","priority":1,"issue_type":"task","assignee":"amp","created_at":"2025-10-25T14:00:27.896896-07:00","updated_at":"2025-10-25T14:31:08.910695-07:00","closed_at":"2025-10-25T14:31:08.910695-07:00","dependencies":[{"issue_id":"bd-139","depends_on_id":"bd-135","type":"parent-child","created_at":"2025-10-25T14:00:27.89917-07:00","created_by":"daemon"}]} {"id":"bd-14","title":"Auto-flush writes test pollution and session work to git-tracked issues.jsonl","description":"Auto-flush exports ALL issues from DB to issues.jsonl every 5 seconds, including:\n- Test issues (bd-4053 through bd-4059 were version test junk)\n- Issues created during debugging sessions\n- Test pollution from stress tests\n- Temporary diagnostic issues\n\nThis pollutes the git-tracked issues.jsonl with garbage that shouldn't be committed.\n\nExample from today:\n- Git had 49 clean issues\n- Our DB grew to 100+ with test junk and session work\n- Auto-flush wrote all 100+ to issues.jsonl\n- Git status showed modified issues.jsonl with 50+ unwanted issues\n\nImpact:\n- Pollutes git history with test/debug garbage\n- Makes code review difficult (noise in diffs)\n- Can't distinguish real work from session artifacts\n- Other team members pull polluted issues\n\nSolutions to consider:\n1. Disable auto-flush by default (require explicit --enable-auto-flush)\n2. Add .beadsignore to exclude issue ID patterns\n3. Make auto-flush only export 'real' issues (exclude test-*)\n4. Require manual 'bd sync' for git commit\n5. Auto-flush to separate file (.beads/session.jsonl vs issues.jsonl)\n\nRelated: bd-117 (test pollution), isolation_test.go (test DB separation)","status":"closed","priority":1,"issue_type":"bug","created_at":"2025-10-22T00:05:10.788996-07:00","updated_at":"2025-10-24T13:51:54.437366-07:00","closed_at":"2025-10-22T01:05:59.459797-07:00"} diff --git a/integrations/beads-mcp/src/beads_mcp/bd_daemon_client.py b/integrations/beads-mcp/src/beads_mcp/bd_daemon_client.py index 08861007..5d10d22a 100644 --- a/integrations/beads-mcp/src/beads_mcp/bd_daemon_client.py +++ b/integrations/beads-mcp/src/beads_mcp/bd_daemon_client.py @@ -200,10 +200,33 @@ class BdDaemonClient(BdClientBase): Raises: DaemonNotRunningError: If daemon is not running + DaemonConnectionError: If connection fails + DaemonError: If request fails """ data = await self._send_request("ping", {}) return json.loads(data) if isinstance(data, str) else data + async def health(self) -> Dict[str, Any]: + """Get daemon health status. + + Returns: + Dict with health info including: + - status: "healthy" | "degraded" | "unhealthy" + - version: daemon version string + - uptime: uptime in seconds + - cache_size: number of cached databases + - db_response_time_ms: database ping time + - active_connections: number of active connections + - memory_bytes: memory usage + + Raises: + DaemonNotRunningError: If daemon is not running + DaemonConnectionError: If connection fails + DaemonError: If request fails + """ + data = await self._send_request("health", {}) + return json.loads(data) if isinstance(data, str) else data + async def init(self, params: Optional[InitParams] = None) -> str: """Initialize new beads database (not typically used via daemon). diff --git a/integrations/beads-mcp/src/beads_mcp/tools.py b/integrations/beads-mcp/src/beads_mcp/tools.py index d0454d15..60139d55 100644 --- a/integrations/beads-mcp/src/beads_mcp/tools.py +++ b/integrations/beads-mcp/src/beads_mcp/tools.py @@ -110,12 +110,76 @@ def _canonicalize_path(path: str) -> str: return _resolve_workspace_root(real) +async def _health_check_client(client: BdClientBase) -> bool: + """Check if a client is healthy and responsive. + + Args: + client: Client to health check + + Returns: + True if client is healthy, False otherwise + """ + # Only health check daemon clients + if not hasattr(client, 'ping'): + return True + + try: + await client.ping() + return True + except Exception: + # Any exception means the client is stale/unhealthy + return False + + +async def _reconnect_client(canonical: str, max_retries: int = 3) -> BdClientBase: + """Attempt to reconnect to daemon with exponential backoff. + + Args: + canonical: Canonical workspace path + max_retries: Maximum number of retry attempts (default: 3) + + Returns: + New client instance + + Raises: + BdError: If all reconnection attempts fail + """ + use_daemon = os.environ.get("BEADS_USE_DAEMON", "1") == "1" + + for attempt in range(max_retries): + try: + client = create_bd_client( + prefer_daemon=use_daemon, + working_dir=canonical + ) + + # Verify new client works + if await _health_check_client(client): + _register_client_for_cleanup(client) + return client + + except Exception: + if attempt < max_retries - 1: + # Exponential backoff: 0.1s, 0.2s, 0.4s + backoff = 0.1 * (2 ** attempt) + await asyncio.sleep(backoff) + continue + + raise BdError( + f"Failed to connect to daemon after {max_retries} attempts. " + "The daemon may be stopped or unresponsive." + ) + + async def _get_client() -> BdClientBase: """Get a BdClient instance for the current workspace. Uses connection pool to manage per-project daemon sockets. Workspace is determined by current_workspace ContextVar or BEADS_WORKING_DIR env. + Performs health check before returning cached client. + On failure, drops from pool and attempts reconnection with exponential backoff. + Performs version check on first connection to each workspace. Uses daemon client if available, falls back to CLI client. @@ -137,7 +201,19 @@ async def _get_client() -> BdClientBase: # Thread-safe connection pool access async with _pool_lock: - if canonical not in _connection_pool: + if canonical in _connection_pool: + # Health check cached client before returning + client = _connection_pool[canonical] + if not await _health_check_client(client): + # Stale connection - remove from pool and reconnect + del _connection_pool[canonical] + if canonical in _version_checked: + _version_checked.remove(canonical) + + # Attempt reconnection with backoff + client = await _reconnect_client(canonical) + _connection_pool[canonical] = client + else: # Create new client for this workspace use_daemon = os.environ.get("BEADS_USE_DAEMON", "1") == "1" @@ -151,8 +227,6 @@ async def _get_client() -> BdClientBase: # Add to pool _connection_pool[canonical] = client - - client = _connection_pool[canonical] # Check version once per workspace (only for CLI client) if canonical not in _version_checked: diff --git a/integrations/beads-mcp/tests/test_daemon_health_check.py b/integrations/beads-mcp/tests/test_daemon_health_check.py new file mode 100644 index 00000000..3bcae46d --- /dev/null +++ b/integrations/beads-mcp/tests/test_daemon_health_check.py @@ -0,0 +1,304 @@ +"""Tests for daemon health check and reconnection logic.""" + +import asyncio +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from beads_mcp.bd_client import BdError +from beads_mcp.bd_daemon_client import ( + BdDaemonClient, + DaemonConnectionError, + DaemonError, + DaemonNotRunningError, +) +from beads_mcp.tools import _get_client, _health_check_client, _reconnect_client + + +@pytest.mark.asyncio +async def test_daemon_client_ping_success(): + """Test successful ping to daemon.""" + client = BdDaemonClient(socket_path="/tmp/bd.sock", working_dir="/tmp/test") + + with patch.object(client, '_send_request', new_callable=AsyncMock) as mock_send: + mock_send.return_value = {"message": "pong", "version": "0.9.10"} + + result = await client.ping() + + assert result["message"] == "pong" + assert result["version"] == "0.9.10" + mock_send.assert_called_once_with("ping", {}) + + +@pytest.mark.asyncio +async def test_daemon_client_ping_connection_error(): + """Test ping when daemon connection fails.""" + client = BdDaemonClient(socket_path="/tmp/bd.sock", working_dir="/tmp/test") + + with patch.object(client, '_send_request', new_callable=AsyncMock) as mock_send: + mock_send.side_effect = DaemonConnectionError("Connection failed") + + with pytest.raises(DaemonConnectionError): + await client.ping() + + +@pytest.mark.asyncio +async def test_daemon_client_health_success(): + """Test successful health check to daemon.""" + client = BdDaemonClient(socket_path="/tmp/bd.sock", working_dir="/tmp/test") + + with patch.object(client, '_send_request', new_callable=AsyncMock) as mock_send: + mock_send.return_value = { + "status": "healthy", + "version": "0.9.10", + "uptime": 123.45, + "cache_size": 5, + "db_response_time_ms": 2.5, + "active_connections": 3, + "memory_bytes": 104857600, + } + + result = await client.health() + + assert result["status"] == "healthy" + assert result["version"] == "0.9.10" + assert result["uptime"] == 123.45 + mock_send.assert_called_once_with("health", {}) + + +@pytest.mark.asyncio +async def test_daemon_client_health_unhealthy(): + """Test health check when daemon is unhealthy.""" + client = BdDaemonClient(socket_path="/tmp/bd.sock", working_dir="/tmp/test") + + with patch.object(client, '_send_request', new_callable=AsyncMock) as mock_send: + mock_send.return_value = { + "status": "unhealthy", + "error": "Database connection failed", + } + + result = await client.health() + + assert result["status"] == "unhealthy" + assert "error" in result + + +@pytest.mark.asyncio +async def test_health_check_client_daemon_client_healthy(): + """Test health check for healthy daemon client.""" + client = BdDaemonClient(socket_path="/tmp/bd.sock", working_dir="/tmp/test") + + with patch.object(client, 'ping', new_callable=AsyncMock) as mock_ping: + mock_ping.return_value = {"message": "pong", "version": "0.9.10"} + + result = await _health_check_client(client) + + assert result is True + mock_ping.assert_called_once() + + +@pytest.mark.asyncio +async def test_health_check_client_daemon_client_unhealthy(): + """Test health check for unhealthy daemon client.""" + client = BdDaemonClient(socket_path="/tmp/bd.sock", working_dir="/tmp/test") + + with patch.object(client, 'ping', new_callable=AsyncMock) as mock_ping: + mock_ping.side_effect = DaemonConnectionError("Connection failed") + + result = await _health_check_client(client) + + assert result is False + + +@pytest.mark.asyncio +async def test_health_check_client_cli_client(): + """Test health check for CLI client (always returns True).""" + from beads_mcp.bd_client import BdClient + + client = BdClient(bd_path="/usr/bin/bd", beads_db="/tmp/test.db") + + result = await _health_check_client(client) + + # CLI clients don't have ping, so they're always considered healthy + assert result is True + + +@pytest.mark.asyncio +async def test_reconnect_client_success(): + """Test successful reconnection after failure.""" + from beads_mcp.bd_client import create_bd_client + + with ( + patch('beads_mcp.tools.create_bd_client') as mock_create, + patch('beads_mcp.tools._health_check_client', new_callable=AsyncMock) as mock_health, + patch('beads_mcp.tools._register_client_for_cleanup') as mock_register, + ): + mock_client = MagicMock() + mock_create.return_value = mock_client + mock_health.return_value = True + + result = await _reconnect_client("/tmp/test") + + assert result == mock_client + mock_create.assert_called_once_with(prefer_daemon=True, working_dir="/tmp/test") + mock_register.assert_called_once_with(mock_client) + + +@pytest.mark.asyncio +async def test_reconnect_client_retry_with_backoff(): + """Test reconnection with exponential backoff on failure.""" + # Need to patch asyncio.sleep in the actual module where it's called + import beads_mcp.tools as tools_module + + with ( + patch.object(tools_module, 'create_bd_client') as mock_create, + patch.object(tools_module, '_health_check_client', new_callable=AsyncMock) as mock_health, + patch.object(tools_module, '_register_client_for_cleanup') as mock_register, + ): + mock_client = MagicMock() + + # Raise exception first two times, succeed third time + mock_create.side_effect = [ + Exception("Connection failed"), + Exception("Connection failed"), + mock_client, + ] + mock_health.return_value = True + + # Mock asyncio.sleep to track calls + sleep_calls = [] + async def mock_sleep(duration): + sleep_calls.append(duration) + # Don't actually sleep in tests + return + + with patch.object(asyncio, 'sleep', side_effect=mock_sleep): + result = await _reconnect_client("/tmp/test", max_retries=3) + + assert result == mock_client + assert mock_create.call_count == 3 + assert len(sleep_calls) == 2 + + # Verify exponential backoff: 0.1s, 0.2s + assert sleep_calls[0] == 0.1 + assert sleep_calls[1] == 0.2 + + +@pytest.mark.asyncio +async def test_reconnect_client_max_retries_exceeded(): + """Test reconnection failure after max retries.""" + with ( + patch('beads_mcp.tools.create_bd_client') as mock_create, + patch('beads_mcp.tools._health_check_client', new_callable=AsyncMock) as mock_health, + patch('asyncio.sleep', new_callable=AsyncMock), + ): + mock_client = MagicMock() + mock_create.return_value = mock_client + mock_health.return_value = False # Always fail health check + + with pytest.raises(BdError, match="Failed to connect to daemon after 3 attempts"): + await _reconnect_client("/tmp/test", max_retries=3) + + assert mock_create.call_count == 3 + + +@pytest.mark.asyncio +async def test_get_client_uses_cached_healthy_client(monkeypatch): + """Test that _get_client returns cached client if healthy.""" + from beads_mcp import tools + + # Set up environment + monkeypatch.setenv("BEADS_WORKING_DIR", "/tmp/test") + + mock_client = MagicMock() + mock_client._check_version = AsyncMock() + + with ( + patch('beads_mcp.tools._canonicalize_path', return_value="/tmp/test"), + patch('beads_mcp.tools._health_check_client', new_callable=AsyncMock) as mock_health, + ): + mock_health.return_value = True + + # Add mock client to pool and mark as version checked + tools._connection_pool["/tmp/test"] = mock_client + tools._version_checked.add("/tmp/test") + + result = await _get_client() + + assert result == mock_client + mock_health.assert_called_once_with(mock_client) + + +@pytest.mark.asyncio +async def test_get_client_reconnects_on_stale_connection(monkeypatch): + """Test that _get_client reconnects when cached client is stale.""" + from beads_mcp import tools + + # Set up environment + monkeypatch.setenv("BEADS_WORKING_DIR", "/tmp/test") + + old_client = MagicMock() + new_client = MagicMock() + new_client._check_version = AsyncMock() + + with ( + patch('beads_mcp.tools._canonicalize_path', return_value="/tmp/test"), + patch('beads_mcp.tools._health_check_client', new_callable=AsyncMock) as mock_health, + patch('beads_mcp.tools._reconnect_client', new_callable=AsyncMock) as mock_reconnect, + ): + # First health check fails (stale), reconnect returns new client + mock_health.return_value = False + mock_reconnect.return_value = new_client + + # Add old client to pool + tools._connection_pool["/tmp/test"] = old_client + tools._version_checked.add("/tmp/test") + + result = await _get_client() + + assert result == new_client + assert tools._connection_pool["/tmp/test"] == new_client + # Version check is performed after reconnect, so it's back in the set + assert "/tmp/test" in tools._version_checked + mock_reconnect.assert_called_once_with("/tmp/test") + + +@pytest.mark.asyncio +async def test_get_client_creates_new_client_if_not_cached(monkeypatch): + """Test that _get_client creates new client if not in pool.""" + from beads_mcp import tools + + # Clear pool + tools._connection_pool.clear() + tools._version_checked.clear() + + # Set up environment + monkeypatch.setenv("BEADS_WORKING_DIR", "/tmp/test") + + mock_client = MagicMock() + mock_client._check_version = AsyncMock() + + with ( + patch('beads_mcp.tools._canonicalize_path', return_value="/tmp/test"), + patch('beads_mcp.tools.create_bd_client', return_value=mock_client) as mock_create, + patch('beads_mcp.tools._register_client_for_cleanup') as mock_register, + ): + result = await _get_client() + + assert result == mock_client + assert tools._connection_pool["/tmp/test"] == mock_client + mock_create.assert_called_once_with(prefer_daemon=True, working_dir="/tmp/test") + mock_register.assert_called_once_with(mock_client) + + +@pytest.mark.asyncio +async def test_get_client_no_workspace_error(): + """Test that _get_client raises error if no workspace is set.""" + from beads_mcp import tools + + # Clear context + tools.current_workspace.set(None) + + with patch.dict('os.environ', {}, clear=True): + with pytest.raises(BdError, match="No workspace set"): + await _get_client()