Add multi-agent race condition test (bd-zo7o)
- Created test_agent_race.py with 3 test scenarios - Tests collision prevention with Agent Mail reservations - Validates that only one agent claims an issue when reservations active - Demonstrates collision problem when Agent Mail disabled - Includes stress test with 10 agents - Non-interactive mode support for CI/automation Amp-Thread-ID: https://ampcode.com/threads/T-2fb10899-490f-4d41-b003-8bc4d467cc54 Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -8,19 +8,34 @@ This demonstrates how an agent can:
|
||||
3. Discover new issues during work
|
||||
4. Link discoveries back to parent tasks
|
||||
5. Complete work and move on
|
||||
6. Coordinate with other agents via Agent Mail (optional)
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
# Add lib directory to path for beads_mail_adapter
|
||||
lib_path = Path(__file__).parent.parent.parent / "lib"
|
||||
sys.path.insert(0, str(lib_path))
|
||||
|
||||
from beads_mail_adapter import AgentMailAdapter
|
||||
|
||||
|
||||
class BeadsAgent:
|
||||
"""Simple agent that manages tasks using bd."""
|
||||
|
||||
def __init__(self):
|
||||
self.current_task = None
|
||||
self.mail = AgentMailAdapter()
|
||||
|
||||
if self.mail.enabled:
|
||||
print(f"📬 Agent Mail enabled (agent: {self.mail.agent_name})")
|
||||
else:
|
||||
print("📭 Agent Mail disabled (Beads-only mode)")
|
||||
|
||||
def run_bd(self, *args) -> dict:
|
||||
"""Run bd command and parse JSON output."""
|
||||
@@ -32,7 +47,20 @@ class BeadsAgent:
|
||||
return {}
|
||||
|
||||
def find_ready_work(self) -> Optional[dict]:
|
||||
"""Find the highest priority ready work."""
|
||||
"""Find the highest priority ready work.
|
||||
|
||||
Integration Point 1: Check inbox before finding work.
|
||||
"""
|
||||
# Check inbox for notifications from other agents
|
||||
messages = self.mail.check_inbox()
|
||||
if messages:
|
||||
print(f"📨 Received {len(messages)} messages:")
|
||||
for msg in messages:
|
||||
event_type = msg.get("event_type", "unknown")
|
||||
payload = msg.get("payload", {})
|
||||
from_agent = msg.get("from_agent", "unknown")
|
||||
print(f" • {event_type} from {from_agent}: {payload}")
|
||||
|
||||
ready = self.run_bd("ready", "--limit", "1")
|
||||
|
||||
if isinstance(ready, list) and len(ready) > 0:
|
||||
@@ -40,9 +68,27 @@ class BeadsAgent:
|
||||
return None
|
||||
|
||||
def claim_task(self, issue_id: str) -> dict:
|
||||
"""Claim a task by setting status to in_progress."""
|
||||
"""Claim a task by setting status to in_progress.
|
||||
|
||||
Integration Point 2: Reserve issue before claiming.
|
||||
Integration Point 3: Notify other agents of status change.
|
||||
"""
|
||||
# Reserve the issue to prevent conflicts with other agents
|
||||
if not self.mail.reserve_issue(issue_id):
|
||||
print(f"⚠️ Failed to reserve {issue_id} - already claimed by another agent")
|
||||
return {}
|
||||
|
||||
print(f"📋 Claiming task: {issue_id}")
|
||||
return self.run_bd("update", issue_id, "--status", "in_progress")
|
||||
result = self.run_bd("update", issue_id, "--status", "in_progress")
|
||||
|
||||
# Notify other agents of status change
|
||||
self.mail.notify("status_changed", {
|
||||
"issue_id": issue_id,
|
||||
"status": "in_progress",
|
||||
"agent": self.mail.agent_name
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
def create_issue(self, title: str, description: str = "",
|
||||
priority: int = 2, issue_type: str = "task") -> dict:
|
||||
@@ -62,9 +108,24 @@ class BeadsAgent:
|
||||
)
|
||||
|
||||
def complete_task(self, issue_id: str, reason: str = "Completed"):
|
||||
"""Mark task as complete."""
|
||||
"""Mark task as complete.
|
||||
|
||||
Integration Point 4: Release reservation and notify completion.
|
||||
"""
|
||||
print(f"✅ Completing task: {issue_id} - {reason}")
|
||||
return self.run_bd("close", issue_id, "--reason", reason)
|
||||
result = self.run_bd("close", issue_id, "--reason", reason)
|
||||
|
||||
# Notify other agents of completion
|
||||
self.mail.notify("issue_completed", {
|
||||
"issue_id": issue_id,
|
||||
"reason": reason,
|
||||
"agent": self.mail.agent_name
|
||||
})
|
||||
|
||||
# Release the reservation
|
||||
self.mail.release_issue(issue_id)
|
||||
|
||||
return result
|
||||
|
||||
def simulate_work(self, issue: dict) -> bool:
|
||||
"""Simulate doing work on an issue.
|
||||
|
||||
42
tests/integration/README.md
Normal file
42
tests/integration/README.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# Integration Tests
|
||||
|
||||
This directory contains integration tests for bd (beads) that test end-to-end functionality.
|
||||
|
||||
## Tests
|
||||
|
||||
### test_agent_race.py
|
||||
|
||||
Multi-agent race condition test that validates collision prevention with Agent Mail.
|
||||
|
||||
**What it tests:**
|
||||
- Multiple agents simultaneously attempting to claim the same issue
|
||||
- WITH Agent Mail: Only one agent succeeds (via reservation)
|
||||
- WITHOUT Agent Mail: Multiple agents may succeed (collision)
|
||||
- Verification via JSONL that no duplicate claims occur
|
||||
|
||||
**Prerequisites:**
|
||||
- bd installed: `go install github.com/steveyegge/beads/cmd/bd@latest`
|
||||
- Agent Mail server running (optional, for full test suite):
|
||||
```bash
|
||||
cd ~/src/mcp_agent_mail
|
||||
source .venv/bin/activate
|
||||
uv run python -m mcp_agent_mail.cli serve-http
|
||||
```
|
||||
|
||||
**Running:**
|
||||
```bash
|
||||
python3 tests/integration/test_agent_race.py
|
||||
```
|
||||
|
||||
**Expected results:**
|
||||
- **WITH Agent Mail running:** Test 1 passes (only 1 claim), Test 2 shows collision, Test 3 passes
|
||||
- **WITHOUT Agent Mail running:** All tests demonstrate collision (expected behavior without reservation system)
|
||||
|
||||
## Adding New Tests
|
||||
|
||||
Integration tests should:
|
||||
1. Use temporary workspaces (cleaned up automatically)
|
||||
2. Test real bd CLI commands, not just internal APIs
|
||||
3. Verify behavior in `.beads/issues.jsonl` when relevant
|
||||
4. Clean up resources in `finally` blocks
|
||||
5. Provide clear output showing what's being tested
|
||||
414
tests/integration/test_agent_race.py
Executable file
414
tests/integration/test_agent_race.py
Executable file
@@ -0,0 +1,414 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Multi-agent race condition test for bd (beads) issue tracker.
|
||||
|
||||
Tests verify that when 2+ agents simultaneously try to claim the same issue:
|
||||
1. WITH Agent Mail: Only one agent succeeds (via reservation), others skip gracefully
|
||||
2. WITHOUT Agent Mail: Both agents may succeed (demonstrating the collision problem)
|
||||
|
||||
This test validates the collision prevention mechanism provided by Agent Mail.
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import tempfile
|
||||
import shutil
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from multiprocessing import Process, Queue
|
||||
from typing import List, Tuple
|
||||
|
||||
# Add lib directory for beads_mail_adapter
|
||||
lib_path = Path(__file__).parent.parent.parent / "lib"
|
||||
sys.path.insert(0, str(lib_path))
|
||||
|
||||
from beads_mail_adapter import AgentMailAdapter
|
||||
|
||||
|
||||
class RaceTestAgent:
|
||||
"""Minimal agent implementation for race condition testing."""
|
||||
|
||||
def __init__(self, agent_name: str, workspace: str, mail_enabled: bool = True):
|
||||
self.agent_name = agent_name
|
||||
self.workspace = workspace
|
||||
self.mail_enabled = mail_enabled
|
||||
|
||||
# Initialize Agent Mail adapter
|
||||
if mail_enabled:
|
||||
self.mail = AgentMailAdapter(agent_name=agent_name)
|
||||
else:
|
||||
self.mail = None
|
||||
|
||||
def run_bd(self, *args) -> dict:
|
||||
"""Run bd command in the test workspace."""
|
||||
cmd = ["bd"] + list(args) + ["--json"]
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
cwd=self.workspace,
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
return {"error": result.stderr}
|
||||
|
||||
if result.stdout.strip():
|
||||
try:
|
||||
return json.loads(result.stdout)
|
||||
except json.JSONDecodeError:
|
||||
return {"error": "Invalid JSON", "output": result.stdout}
|
||||
return {}
|
||||
|
||||
def try_claim_issue(self, issue_id: str) -> Tuple[bool, str]:
|
||||
"""
|
||||
Attempt to claim an issue.
|
||||
|
||||
Returns:
|
||||
(success: bool, message: str)
|
||||
"""
|
||||
# Integration Point 2: Reserve before claiming (if Agent Mail enabled)
|
||||
if self.mail and self.mail.enabled:
|
||||
reserved = self.mail.reserve_issue(issue_id)
|
||||
if not reserved:
|
||||
return False, f"Reservation failed for {issue_id}"
|
||||
|
||||
# Claim the issue
|
||||
result = self.run_bd("update", issue_id, "--status", "in_progress")
|
||||
|
||||
if "error" in result:
|
||||
if self.mail and self.mail.enabled:
|
||||
self.mail.release_issue(issue_id)
|
||||
return False, f"Update failed: {result['error']}"
|
||||
|
||||
return True, f"Successfully claimed {issue_id}"
|
||||
|
||||
def release_issue(self, issue_id: str):
|
||||
"""Release an issue after claiming."""
|
||||
if self.mail and self.mail.enabled:
|
||||
self.mail.release_issue(issue_id)
|
||||
|
||||
|
||||
def agent_worker(agent_name: str, workspace: str, target_issue_id: str,
|
||||
mail_enabled: bool, result_queue: Queue):
|
||||
"""
|
||||
Worker function for multiprocessing.
|
||||
|
||||
Each worker tries to claim the same issue. Result is put in queue.
|
||||
"""
|
||||
try:
|
||||
agent = RaceTestAgent(agent_name, workspace, mail_enabled)
|
||||
|
||||
# Small random delay to increase likelihood of collision
|
||||
time.sleep(0.01 * hash(agent_name) % 10)
|
||||
|
||||
success, message = agent.try_claim_issue(target_issue_id)
|
||||
|
||||
result_queue.put({
|
||||
"agent": agent_name,
|
||||
"success": success,
|
||||
"message": message,
|
||||
"mail_enabled": mail_enabled
|
||||
})
|
||||
except Exception as e:
|
||||
result_queue.put({
|
||||
"agent": agent_name,
|
||||
"success": False,
|
||||
"message": f"Exception: {str(e)}",
|
||||
"mail_enabled": mail_enabled
|
||||
})
|
||||
|
||||
|
||||
def run_race_test(num_agents: int, mail_enabled: bool) -> List[dict]:
|
||||
"""
|
||||
Run a race test with N agents trying to claim the same issue.
|
||||
|
||||
Args:
|
||||
num_agents: Number of agents to spawn
|
||||
mail_enabled: Whether Agent Mail is enabled
|
||||
|
||||
Returns:
|
||||
List of result dicts from each agent
|
||||
"""
|
||||
# Create temporary workspace
|
||||
workspace = tempfile.mkdtemp(prefix="bd-race-test-")
|
||||
|
||||
try:
|
||||
# Initialize bd in workspace
|
||||
subprocess.run(
|
||||
["bd", "init", "--quiet", "--prefix", "test"],
|
||||
cwd=workspace,
|
||||
check=True,
|
||||
capture_output=True
|
||||
)
|
||||
|
||||
# Create a test issue
|
||||
result = subprocess.run(
|
||||
["bd", "create", "Contested issue", "-p", "1", "--json"],
|
||||
cwd=workspace,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
issue_data = json.loads(result.stdout)
|
||||
issue_id = issue_data["id"]
|
||||
|
||||
# Spawn agents in parallel
|
||||
result_queue = Queue()
|
||||
processes = []
|
||||
|
||||
for i in range(num_agents):
|
||||
agent_name = f"agent-{i+1}"
|
||||
p = Process(
|
||||
target=agent_worker,
|
||||
args=(agent_name, workspace, issue_id, mail_enabled, result_queue)
|
||||
)
|
||||
processes.append(p)
|
||||
|
||||
# Start all processes simultaneously
|
||||
start_time = time.time()
|
||||
for p in processes:
|
||||
p.start()
|
||||
|
||||
# Wait for completion
|
||||
for p in processes:
|
||||
p.join(timeout=10)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# Collect results
|
||||
results = []
|
||||
while not result_queue.empty():
|
||||
results.append(result_queue.get())
|
||||
|
||||
# Verify JSONL for duplicate claims
|
||||
jsonl_path = Path(workspace) / ".beads" / "issues.jsonl"
|
||||
jsonl_claims = verify_jsonl_claims(jsonl_path, issue_id)
|
||||
|
||||
return {
|
||||
"issue_id": issue_id,
|
||||
"agents": results,
|
||||
"elapsed_seconds": elapsed,
|
||||
"jsonl_status_changes": jsonl_claims,
|
||||
"mail_enabled": mail_enabled
|
||||
}
|
||||
|
||||
finally:
|
||||
# Cleanup
|
||||
shutil.rmtree(workspace, ignore_errors=True)
|
||||
|
||||
|
||||
def verify_jsonl_claims(jsonl_path: Path, issue_id: str) -> List[dict]:
|
||||
"""
|
||||
Parse JSONL and count how many times the issue status was changed to in_progress.
|
||||
|
||||
Returns list of status change events.
|
||||
"""
|
||||
if not jsonl_path.exists():
|
||||
return []
|
||||
|
||||
status_changes = []
|
||||
|
||||
with open(jsonl_path) as f:
|
||||
for line in f:
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
try:
|
||||
record = json.loads(line)
|
||||
if record.get("id") == issue_id and record.get("status") == "in_progress":
|
||||
status_changes.append({
|
||||
"updated_at": record.get("updated_at"),
|
||||
"assignee": record.get("assignee")
|
||||
})
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return status_changes
|
||||
|
||||
|
||||
def test_agent_race_with_mail():
|
||||
"""Test that WITH Agent Mail, only one agent succeeds."""
|
||||
print("\n" + "="*70)
|
||||
print("TEST 1: Race condition WITH Agent Mail (collision prevention)")
|
||||
print("="*70)
|
||||
|
||||
num_agents = 3
|
||||
result = run_race_test(num_agents, mail_enabled=True)
|
||||
|
||||
# Analyze results
|
||||
successful_agents = [a for a in result["agents"] if a["success"]]
|
||||
failed_agents = [a for a in result["agents"] if not a["success"]]
|
||||
|
||||
print(f"\n📊 Results ({result['elapsed_seconds']:.3f}s):")
|
||||
print(f" • Total agents: {num_agents}")
|
||||
print(f" • Successful claims: {len(successful_agents)}")
|
||||
print(f" • Failed claims: {len(failed_agents)}")
|
||||
print(f" • JSONL status changes: {len(result['jsonl_status_changes'])}")
|
||||
|
||||
for agent in result["agents"]:
|
||||
status = "✅" if agent["success"] else "❌"
|
||||
print(f" {status} {agent['agent']}: {agent['message']}")
|
||||
|
||||
# Verify: Only one agent should succeed
|
||||
assert len(successful_agents) == 1, \
|
||||
f"Expected 1 successful claim, got {len(successful_agents)}"
|
||||
|
||||
# Verify: JSONL should have exactly 1 in_progress status
|
||||
assert len(result['jsonl_status_changes']) == 1, \
|
||||
f"Expected 1 JSONL status change, got {len(result['jsonl_status_changes'])}"
|
||||
|
||||
print("\n✅ PASS: Agent Mail prevented duplicate claims")
|
||||
return True
|
||||
|
||||
|
||||
def test_agent_race_without_mail():
|
||||
"""Test that WITHOUT Agent Mail, multiple agents may succeed (collision)."""
|
||||
print("\n" + "="*70)
|
||||
print("TEST 2: Race condition WITHOUT Agent Mail (collision demonstration)")
|
||||
print("="*70)
|
||||
print("⚠️ Note: This test may occasionally pass if timing prevents collision")
|
||||
|
||||
num_agents = 3
|
||||
result = run_race_test(num_agents, mail_enabled=False)
|
||||
|
||||
# Analyze results
|
||||
successful_agents = [a for a in result["agents"] if a["success"]]
|
||||
failed_agents = [a for a in result["agents"] if not a["success"]]
|
||||
|
||||
print(f"\n📊 Results ({result['elapsed_seconds']:.3f}s):")
|
||||
print(f" • Total agents: {num_agents}")
|
||||
print(f" • Successful claims: {len(successful_agents)}")
|
||||
print(f" • Failed claims: {len(failed_agents)}")
|
||||
print(f" • JSONL status changes: {len(result['jsonl_status_changes'])}")
|
||||
|
||||
for agent in result["agents"]:
|
||||
status = "✅" if agent["success"] else "❌"
|
||||
print(f" {status} {agent['agent']}: {agent['message']}")
|
||||
|
||||
# Without Agent Mail, we expect potential for duplicates
|
||||
# (though timing may occasionally prevent it)
|
||||
if len(successful_agents) > 1:
|
||||
print(f"\n⚠️ EXPECTED: Multiple agents ({len(successful_agents)}) claimed same issue")
|
||||
print(" This demonstrates the collision problem Agent Mail prevents")
|
||||
else:
|
||||
print("\n⚠️ NOTE: Only one agent succeeded (timing prevented collision this run)")
|
||||
print(" Without Agent Mail, collisions are possible but not guaranteed")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def test_agent_race_stress_test():
|
||||
"""Stress test with many agents."""
|
||||
print("\n" + "="*70)
|
||||
print("TEST 3: Stress test with 10 agents (Agent Mail enabled)")
|
||||
print("="*70)
|
||||
|
||||
num_agents = 10
|
||||
result = run_race_test(num_agents, mail_enabled=True)
|
||||
|
||||
successful_agents = [a for a in result["agents"] if a["success"]]
|
||||
|
||||
print(f"\n📊 Results ({result['elapsed_seconds']:.3f}s):")
|
||||
print(f" • Total agents: {num_agents}")
|
||||
print(f" • Successful claims: {len(successful_agents)}")
|
||||
print(f" • JSONL status changes: {len(result['jsonl_status_changes'])}")
|
||||
|
||||
# Verify: Exactly one winner
|
||||
assert len(successful_agents) == 1, \
|
||||
f"Expected 1 successful claim, got {len(successful_agents)}"
|
||||
assert len(result['jsonl_status_changes']) == 1, \
|
||||
f"Expected 1 JSONL status change, got {len(result['jsonl_status_changes'])}"
|
||||
|
||||
print(f"\n✅ PASS: Only {successful_agents[0]['agent']} succeeded")
|
||||
return True
|
||||
|
||||
|
||||
def check_agent_mail_server() -> bool:
|
||||
"""Check if Agent Mail server is running."""
|
||||
try:
|
||||
import urllib.request
|
||||
req = urllib.request.Request("http://localhost:8765/api/health")
|
||||
with urllib.request.urlopen(req, timeout=1) as response:
|
||||
return response.status == 200
|
||||
except:
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""Run all race condition tests."""
|
||||
print("🧪 Multi-Agent Race Condition Test Suite")
|
||||
print("Testing collision prevention with Agent Mail")
|
||||
|
||||
try:
|
||||
# Check if bd is available
|
||||
subprocess.run(["bd", "--version"], capture_output=True, check=True)
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
print("❌ ERROR: bd command not found")
|
||||
print(" Install: go install github.com/steveyegge/beads/cmd/bd@latest")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if Agent Mail server is running
|
||||
agent_mail_running = check_agent_mail_server()
|
||||
if not agent_mail_running:
|
||||
print("\n⚠️ WARNING: Agent Mail server is not running")
|
||||
print(" Tests will fall back to beads-only mode (demonstrating collision)")
|
||||
print("\n To enable full collision prevention testing:")
|
||||
print(" $ cd ~/src/mcp_agent_mail")
|
||||
print(" $ source .venv/bin/activate")
|
||||
print(" $ uv run python -m mcp_agent_mail.cli serve-http")
|
||||
print()
|
||||
|
||||
# Check if running in non-interactive mode (CI/automation)
|
||||
if not sys.stdin.isatty():
|
||||
print(" Running in non-interactive mode, continuing with tests...")
|
||||
else:
|
||||
print(" Press Enter to continue or Ctrl+C to exit")
|
||||
try:
|
||||
input()
|
||||
except KeyboardInterrupt:
|
||||
print("\n\n👋 Exiting - start Agent Mail server and try again")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print("\n✅ Agent Mail server is running on http://localhost:8765")
|
||||
|
||||
# Run tests
|
||||
tests = [
|
||||
("Agent Mail enabled (collision prevention)", test_agent_race_with_mail),
|
||||
("Agent Mail disabled (collision demonstration)", test_agent_race_without_mail),
|
||||
("Stress test (10 agents)", test_agent_race_stress_test),
|
||||
]
|
||||
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
for name, test_func in tests:
|
||||
try:
|
||||
if test_func():
|
||||
passed += 1
|
||||
except AssertionError as e:
|
||||
print(f"\n❌ FAIL: {name}")
|
||||
print(f" {e}")
|
||||
failed += 1
|
||||
except Exception as e:
|
||||
print(f"\n💥 ERROR in {name}: {e}")
|
||||
failed += 1
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*70)
|
||||
print("SUMMARY")
|
||||
print("="*70)
|
||||
print(f"✅ Passed: {passed}/{len(tests)}")
|
||||
print(f"❌ Failed: {failed}/{len(tests)}")
|
||||
|
||||
if failed == 0:
|
||||
print("\n🎉 All tests passed!")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print(f"\n⚠️ {failed} test(s) failed")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user