Add multi-agent race condition test (bd-zo7o)

- Created test_agent_race.py with 3 test scenarios
- Tests collision prevention with Agent Mail reservations
- Validates that only one agent claims an issue when reservations active
- Demonstrates collision problem when Agent Mail disabled
- Includes stress test with 10 agents
- Non-interactive mode support for CI/automation

Amp-Thread-ID: https://ampcode.com/threads/T-2fb10899-490f-4d41-b003-8bc4d467cc54
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-11-08 00:36:21 -08:00
parent 7c6a7fc382
commit ef94641541
4 changed files with 538 additions and 27 deletions

File diff suppressed because one or more lines are too long

View File

@@ -8,19 +8,34 @@ This demonstrates how an agent can:
3. Discover new issues during work
4. Link discoveries back to parent tasks
5. Complete work and move on
6. Coordinate with other agents via Agent Mail (optional)
"""
import json
import subprocess
import sys
import os
from pathlib import Path
from typing import Optional
# Add lib directory to path for beads_mail_adapter
lib_path = Path(__file__).parent.parent.parent / "lib"
sys.path.insert(0, str(lib_path))
from beads_mail_adapter import AgentMailAdapter
class BeadsAgent:
"""Simple agent that manages tasks using bd."""
def __init__(self):
self.current_task = None
self.mail = AgentMailAdapter()
if self.mail.enabled:
print(f"📬 Agent Mail enabled (agent: {self.mail.agent_name})")
else:
print("📭 Agent Mail disabled (Beads-only mode)")
def run_bd(self, *args) -> dict:
"""Run bd command and parse JSON output."""
@@ -32,7 +47,20 @@ class BeadsAgent:
return {}
def find_ready_work(self) -> Optional[dict]:
"""Find the highest priority ready work."""
"""Find the highest priority ready work.
Integration Point 1: Check inbox before finding work.
"""
# Check inbox for notifications from other agents
messages = self.mail.check_inbox()
if messages:
print(f"📨 Received {len(messages)} messages:")
for msg in messages:
event_type = msg.get("event_type", "unknown")
payload = msg.get("payload", {})
from_agent = msg.get("from_agent", "unknown")
print(f"{event_type} from {from_agent}: {payload}")
ready = self.run_bd("ready", "--limit", "1")
if isinstance(ready, list) and len(ready) > 0:
@@ -40,9 +68,27 @@ class BeadsAgent:
return None
def claim_task(self, issue_id: str) -> dict:
"""Claim a task by setting status to in_progress."""
"""Claim a task by setting status to in_progress.
Integration Point 2: Reserve issue before claiming.
Integration Point 3: Notify other agents of status change.
"""
# Reserve the issue to prevent conflicts with other agents
if not self.mail.reserve_issue(issue_id):
print(f"⚠️ Failed to reserve {issue_id} - already claimed by another agent")
return {}
print(f"📋 Claiming task: {issue_id}")
return self.run_bd("update", issue_id, "--status", "in_progress")
result = self.run_bd("update", issue_id, "--status", "in_progress")
# Notify other agents of status change
self.mail.notify("status_changed", {
"issue_id": issue_id,
"status": "in_progress",
"agent": self.mail.agent_name
})
return result
def create_issue(self, title: str, description: str = "",
priority: int = 2, issue_type: str = "task") -> dict:
@@ -62,9 +108,24 @@ class BeadsAgent:
)
def complete_task(self, issue_id: str, reason: str = "Completed"):
"""Mark task as complete."""
"""Mark task as complete.
Integration Point 4: Release reservation and notify completion.
"""
print(f"✅ Completing task: {issue_id} - {reason}")
return self.run_bd("close", issue_id, "--reason", reason)
result = self.run_bd("close", issue_id, "--reason", reason)
# Notify other agents of completion
self.mail.notify("issue_completed", {
"issue_id": issue_id,
"reason": reason,
"agent": self.mail.agent_name
})
# Release the reservation
self.mail.release_issue(issue_id)
return result
def simulate_work(self, issue: dict) -> bool:
"""Simulate doing work on an issue.

View File

@@ -0,0 +1,42 @@
# Integration Tests
This directory contains integration tests for bd (beads) that test end-to-end functionality.
## Tests
### test_agent_race.py
Multi-agent race condition test that validates collision prevention with Agent Mail.
**What it tests:**
- Multiple agents simultaneously attempting to claim the same issue
- WITH Agent Mail: Only one agent succeeds (via reservation)
- WITHOUT Agent Mail: Multiple agents may succeed (collision)
- Verification via JSONL that no duplicate claims occur
**Prerequisites:**
- bd installed: `go install github.com/steveyegge/beads/cmd/bd@latest`
- Agent Mail server running (optional, for full test suite):
```bash
cd ~/src/mcp_agent_mail
source .venv/bin/activate
uv run python -m mcp_agent_mail.cli serve-http
```
**Running:**
```bash
python3 tests/integration/test_agent_race.py
```
**Expected results:**
- **WITH Agent Mail running:** Test 1 passes (only 1 claim), Test 2 shows collision, Test 3 passes
- **WITHOUT Agent Mail running:** All tests demonstrate collision (expected behavior without reservation system)
## Adding New Tests
Integration tests should:
1. Use temporary workspaces (cleaned up automatically)
2. Test real bd CLI commands, not just internal APIs
3. Verify behavior in `.beads/issues.jsonl` when relevant
4. Clean up resources in `finally` blocks
5. Provide clear output showing what's being tested

View File

@@ -0,0 +1,414 @@
#!/usr/bin/env python3
"""
Multi-agent race condition test for bd (beads) issue tracker.
Tests verify that when 2+ agents simultaneously try to claim the same issue:
1. WITH Agent Mail: Only one agent succeeds (via reservation), others skip gracefully
2. WITHOUT Agent Mail: Both agents may succeed (demonstrating the collision problem)
This test validates the collision prevention mechanism provided by Agent Mail.
"""
import json
import subprocess
import tempfile
import shutil
import os
import sys
import time
from pathlib import Path
from multiprocessing import Process, Queue
from typing import List, Tuple
# Add lib directory for beads_mail_adapter
lib_path = Path(__file__).parent.parent.parent / "lib"
sys.path.insert(0, str(lib_path))
from beads_mail_adapter import AgentMailAdapter
class RaceTestAgent:
"""Minimal agent implementation for race condition testing."""
def __init__(self, agent_name: str, workspace: str, mail_enabled: bool = True):
self.agent_name = agent_name
self.workspace = workspace
self.mail_enabled = mail_enabled
# Initialize Agent Mail adapter
if mail_enabled:
self.mail = AgentMailAdapter(agent_name=agent_name)
else:
self.mail = None
def run_bd(self, *args) -> dict:
"""Run bd command in the test workspace."""
cmd = ["bd"] + list(args) + ["--json"]
result = subprocess.run(
cmd,
cwd=self.workspace,
capture_output=True,
text=True
)
if result.returncode != 0:
return {"error": result.stderr}
if result.stdout.strip():
try:
return json.loads(result.stdout)
except json.JSONDecodeError:
return {"error": "Invalid JSON", "output": result.stdout}
return {}
def try_claim_issue(self, issue_id: str) -> Tuple[bool, str]:
"""
Attempt to claim an issue.
Returns:
(success: bool, message: str)
"""
# Integration Point 2: Reserve before claiming (if Agent Mail enabled)
if self.mail and self.mail.enabled:
reserved = self.mail.reserve_issue(issue_id)
if not reserved:
return False, f"Reservation failed for {issue_id}"
# Claim the issue
result = self.run_bd("update", issue_id, "--status", "in_progress")
if "error" in result:
if self.mail and self.mail.enabled:
self.mail.release_issue(issue_id)
return False, f"Update failed: {result['error']}"
return True, f"Successfully claimed {issue_id}"
def release_issue(self, issue_id: str):
"""Release an issue after claiming."""
if self.mail and self.mail.enabled:
self.mail.release_issue(issue_id)
def agent_worker(agent_name: str, workspace: str, target_issue_id: str,
mail_enabled: bool, result_queue: Queue):
"""
Worker function for multiprocessing.
Each worker tries to claim the same issue. Result is put in queue.
"""
try:
agent = RaceTestAgent(agent_name, workspace, mail_enabled)
# Small random delay to increase likelihood of collision
time.sleep(0.01 * hash(agent_name) % 10)
success, message = agent.try_claim_issue(target_issue_id)
result_queue.put({
"agent": agent_name,
"success": success,
"message": message,
"mail_enabled": mail_enabled
})
except Exception as e:
result_queue.put({
"agent": agent_name,
"success": False,
"message": f"Exception: {str(e)}",
"mail_enabled": mail_enabled
})
def run_race_test(num_agents: int, mail_enabled: bool) -> List[dict]:
"""
Run a race test with N agents trying to claim the same issue.
Args:
num_agents: Number of agents to spawn
mail_enabled: Whether Agent Mail is enabled
Returns:
List of result dicts from each agent
"""
# Create temporary workspace
workspace = tempfile.mkdtemp(prefix="bd-race-test-")
try:
# Initialize bd in workspace
subprocess.run(
["bd", "init", "--quiet", "--prefix", "test"],
cwd=workspace,
check=True,
capture_output=True
)
# Create a test issue
result = subprocess.run(
["bd", "create", "Contested issue", "-p", "1", "--json"],
cwd=workspace,
capture_output=True,
text=True,
check=True
)
issue_data = json.loads(result.stdout)
issue_id = issue_data["id"]
# Spawn agents in parallel
result_queue = Queue()
processes = []
for i in range(num_agents):
agent_name = f"agent-{i+1}"
p = Process(
target=agent_worker,
args=(agent_name, workspace, issue_id, mail_enabled, result_queue)
)
processes.append(p)
# Start all processes simultaneously
start_time = time.time()
for p in processes:
p.start()
# Wait for completion
for p in processes:
p.join(timeout=10)
elapsed = time.time() - start_time
# Collect results
results = []
while not result_queue.empty():
results.append(result_queue.get())
# Verify JSONL for duplicate claims
jsonl_path = Path(workspace) / ".beads" / "issues.jsonl"
jsonl_claims = verify_jsonl_claims(jsonl_path, issue_id)
return {
"issue_id": issue_id,
"agents": results,
"elapsed_seconds": elapsed,
"jsonl_status_changes": jsonl_claims,
"mail_enabled": mail_enabled
}
finally:
# Cleanup
shutil.rmtree(workspace, ignore_errors=True)
def verify_jsonl_claims(jsonl_path: Path, issue_id: str) -> List[dict]:
"""
Parse JSONL and count how many times the issue status was changed to in_progress.
Returns list of status change events.
"""
if not jsonl_path.exists():
return []
status_changes = []
with open(jsonl_path) as f:
for line in f:
if not line.strip():
continue
try:
record = json.loads(line)
if record.get("id") == issue_id and record.get("status") == "in_progress":
status_changes.append({
"updated_at": record.get("updated_at"),
"assignee": record.get("assignee")
})
except json.JSONDecodeError:
continue
return status_changes
def test_agent_race_with_mail():
"""Test that WITH Agent Mail, only one agent succeeds."""
print("\n" + "="*70)
print("TEST 1: Race condition WITH Agent Mail (collision prevention)")
print("="*70)
num_agents = 3
result = run_race_test(num_agents, mail_enabled=True)
# Analyze results
successful_agents = [a for a in result["agents"] if a["success"]]
failed_agents = [a for a in result["agents"] if not a["success"]]
print(f"\n📊 Results ({result['elapsed_seconds']:.3f}s):")
print(f" • Total agents: {num_agents}")
print(f" • Successful claims: {len(successful_agents)}")
print(f" • Failed claims: {len(failed_agents)}")
print(f" • JSONL status changes: {len(result['jsonl_status_changes'])}")
for agent in result["agents"]:
status = "" if agent["success"] else ""
print(f" {status} {agent['agent']}: {agent['message']}")
# Verify: Only one agent should succeed
assert len(successful_agents) == 1, \
f"Expected 1 successful claim, got {len(successful_agents)}"
# Verify: JSONL should have exactly 1 in_progress status
assert len(result['jsonl_status_changes']) == 1, \
f"Expected 1 JSONL status change, got {len(result['jsonl_status_changes'])}"
print("\n✅ PASS: Agent Mail prevented duplicate claims")
return True
def test_agent_race_without_mail():
"""Test that WITHOUT Agent Mail, multiple agents may succeed (collision)."""
print("\n" + "="*70)
print("TEST 2: Race condition WITHOUT Agent Mail (collision demonstration)")
print("="*70)
print("⚠️ Note: This test may occasionally pass if timing prevents collision")
num_agents = 3
result = run_race_test(num_agents, mail_enabled=False)
# Analyze results
successful_agents = [a for a in result["agents"] if a["success"]]
failed_agents = [a for a in result["agents"] if not a["success"]]
print(f"\n📊 Results ({result['elapsed_seconds']:.3f}s):")
print(f" • Total agents: {num_agents}")
print(f" • Successful claims: {len(successful_agents)}")
print(f" • Failed claims: {len(failed_agents)}")
print(f" • JSONL status changes: {len(result['jsonl_status_changes'])}")
for agent in result["agents"]:
status = "" if agent["success"] else ""
print(f" {status} {agent['agent']}: {agent['message']}")
# Without Agent Mail, we expect potential for duplicates
# (though timing may occasionally prevent it)
if len(successful_agents) > 1:
print(f"\n⚠️ EXPECTED: Multiple agents ({len(successful_agents)}) claimed same issue")
print(" This demonstrates the collision problem Agent Mail prevents")
else:
print("\n⚠️ NOTE: Only one agent succeeded (timing prevented collision this run)")
print(" Without Agent Mail, collisions are possible but not guaranteed")
return True
def test_agent_race_stress_test():
"""Stress test with many agents."""
print("\n" + "="*70)
print("TEST 3: Stress test with 10 agents (Agent Mail enabled)")
print("="*70)
num_agents = 10
result = run_race_test(num_agents, mail_enabled=True)
successful_agents = [a for a in result["agents"] if a["success"]]
print(f"\n📊 Results ({result['elapsed_seconds']:.3f}s):")
print(f" • Total agents: {num_agents}")
print(f" • Successful claims: {len(successful_agents)}")
print(f" • JSONL status changes: {len(result['jsonl_status_changes'])}")
# Verify: Exactly one winner
assert len(successful_agents) == 1, \
f"Expected 1 successful claim, got {len(successful_agents)}"
assert len(result['jsonl_status_changes']) == 1, \
f"Expected 1 JSONL status change, got {len(result['jsonl_status_changes'])}"
print(f"\n✅ PASS: Only {successful_agents[0]['agent']} succeeded")
return True
def check_agent_mail_server() -> bool:
"""Check if Agent Mail server is running."""
try:
import urllib.request
req = urllib.request.Request("http://localhost:8765/api/health")
with urllib.request.urlopen(req, timeout=1) as response:
return response.status == 200
except:
return False
def main():
"""Run all race condition tests."""
print("🧪 Multi-Agent Race Condition Test Suite")
print("Testing collision prevention with Agent Mail")
try:
# Check if bd is available
subprocess.run(["bd", "--version"], capture_output=True, check=True)
except (subprocess.CalledProcessError, FileNotFoundError):
print("❌ ERROR: bd command not found")
print(" Install: go install github.com/steveyegge/beads/cmd/bd@latest")
sys.exit(1)
# Check if Agent Mail server is running
agent_mail_running = check_agent_mail_server()
if not agent_mail_running:
print("\n⚠️ WARNING: Agent Mail server is not running")
print(" Tests will fall back to beads-only mode (demonstrating collision)")
print("\n To enable full collision prevention testing:")
print(" $ cd ~/src/mcp_agent_mail")
print(" $ source .venv/bin/activate")
print(" $ uv run python -m mcp_agent_mail.cli serve-http")
print()
# Check if running in non-interactive mode (CI/automation)
if not sys.stdin.isatty():
print(" Running in non-interactive mode, continuing with tests...")
else:
print(" Press Enter to continue or Ctrl+C to exit")
try:
input()
except KeyboardInterrupt:
print("\n\n👋 Exiting - start Agent Mail server and try again")
sys.exit(0)
else:
print("\n✅ Agent Mail server is running on http://localhost:8765")
# Run tests
tests = [
("Agent Mail enabled (collision prevention)", test_agent_race_with_mail),
("Agent Mail disabled (collision demonstration)", test_agent_race_without_mail),
("Stress test (10 agents)", test_agent_race_stress_test),
]
passed = 0
failed = 0
for name, test_func in tests:
try:
if test_func():
passed += 1
except AssertionError as e:
print(f"\n❌ FAIL: {name}")
print(f" {e}")
failed += 1
except Exception as e:
print(f"\n💥 ERROR in {name}: {e}")
failed += 1
# Summary
print("\n" + "="*70)
print("SUMMARY")
print("="*70)
print(f"✅ Passed: {passed}/{len(tests)}")
print(f"❌ Failed: {failed}/{len(tests)}")
if failed == 0:
print("\n🎉 All tests passed!")
sys.exit(0)
else:
print(f"\n⚠️ {failed} test(s) failed")
sys.exit(1)
if __name__ == "__main__":
main()