diff --git a/.beads/beads.jsonl b/.beads/beads.jsonl index 2332d396..cb5a330e 100644 --- a/.beads/beads.jsonl +++ b/.beads/beads.jsonl @@ -4,7 +4,7 @@ {"id":"bd-1pj6","content_hash":"de1c1195b29d9a70c88b5f2b05ca1c3497469d1802f9c0be415d5a44b0575deb","title":"Proposal: Custom status states via config","description":"Proposal to add 'custom status states' via `bd config`.\nUsers could define an optional issue status enum (e.g., awaiting_review, review_in_progress) in the config.\nThis would enable multi-step pipelines to process issues where each step correlates to a specific status.\n\nExamples:\n- awaiting_verification\n- awaiting_docs\n- awaiting_testing\n","status":"open","priority":3,"issue_type":"feature","created_at":"2025-11-20T18:55:48.670499-05:00","updated_at":"2025-11-20T18:55:48.670499-05:00","source_repo":"."} {"id":"bd-1qwo","content_hash":"21b58f84acf275fe99617aec1aaabd687a34220e3603c8b2a9cecad7c186163a","title":"Audit and enforce consistent error handling patterns across codebase","description":"**Background:** Error handling patterns are now documented in docs/ERROR_HANDLING.md (bd-9lwr). We have three established patterns:\n- Pattern A: Exit immediately (os.Exit) for fatal errors\n- Pattern B: Warn and continue for optional operations\n- Pattern C: Silent ignore for cleanup/best-effort\n\n**Goal:** Systematically review all error handling in cmd/bd to ensure each instance uses the appropriate pattern according to the guidelines.\n\n**Scope:** \n- Review all files in cmd/bd/*.go\n- Focus on files with high usage: create.go, init.go, sync.go, daemon_sync.go, export.go, import.go\n- Identify outliers where similar operations use different patterns\n- Refactor to use consistent patterns\n\n**Acceptance Criteria:**\n- Similar operations (e.g., all metadata updates) use the same pattern\n- Critical operations always use Pattern A\n- Auxiliary operations use Pattern B or C appropriately\n- No mixed patterns for identical operation types\n\n**References:**\n- docs/ERROR_HANDLING.md - Guidelines and decision tree\n- bd-9lwr - Documentation task\n- bd-bwk2 - Related storage layer error handling work","status":"closed","priority":2,"issue_type":"task","created_at":"2025-11-23T21:53:25.687087-08:00","updated_at":"2025-11-24T00:44:59.096018-08:00","closed_at":"2025-11-24T00:28:58.356196-08:00","source_repo":"."} {"id":"bd-1rh","content_hash":"94302659c93aea0ed8458da6499ddfd97f2ed2a3a33a0af9131eca51d3e8bc72","title":"cmd/bd test suite is absurdly slow - 279 tests taking 8+ minutes","description":"# Problem\n\nThe cmd/bd test suite is painfully slow:\n- **279 tests** in cmd/bd alone\n- Full suite takes **8+ minutes** to run\n- Even with the 16 slowest integration tests now tagged with `integration` build tag, the remaining tests still take forever\n\nThis makes the development loop unusable. We can't wait 8+ minutes every time we want to run tests.\n\n# Root Cause Analysis\n\n## 1. Sheer Volume\n279 tests is too many for a single package. Even at 0.1s per test, that's 28 seconds minimum just for cmd/bd.\n\n## 2. Each Test Creates Full Database + Temp Directories\nEvery test does heavy setup:\n- Creates temp directory (`t.TempDir()` or `os.MkdirTemp`)\n- Initializes SQLite database\n- Sets up git repo in many cases\n- Creates full storage layer\n\nExample from the tests:\n```go\nfunc setupCLITestDB(t *testing.T) string {\n tmpDir := createTempDirWithCleanup(t)\n runBDInProcess(t, tmpDir, \"init\", \"--prefix\", \"test\", \"--quiet\")\n return tmpDir\n}\n```\n\nThis happens 279 times!\n\n## 3. Tests Are Not Properly Categorized\nWe have three types of tests mixed together:\n- **Unit tests** - should be fast, test single functions\n- **Integration tests** - test full workflows, need DB/git\n- **End-to-end tests** - test entire CLI commands\n\nThey're all lumped together in cmd/bd, all running every time.\n\n# What We've Already Fixed\n\nAdded `integration` build tags to 16 obviously-slow test files:\n- import_profile_test.go (performance benchmarking tests)\n- export_mtime_test.go (tests with time.Sleep calls)\n- cli_fast_test.go (full CLI integration tests)\n- delete_test.go, import_uncommitted_test.go, sync_local_only_test.go (git integration)\n- And 10 more in internal/ packages\n\nThese are now excluded from the default `go test ./...` run.\n\n# Proposed Solutions\n\n## Option 1: Shared Test Fixtures (Quick Win)\nCreate a shared test database that multiple tests can use:\n```go\nvar testDB *sqlite.SQLiteStorage\nvar testDBOnce sync.Once\n\nfunc getSharedTestDB(t *testing.T) storage.Storage {\n testDBOnce.Do(func() {\n // Create one DB for all tests\n })\n return testDB\n}\n```\n\n**Pros**: Easy to implement, immediate speedup\n**Cons**: Tests become less isolated, harder to debug failures\n\n## Option 2: Table-Driven Tests (Medium Win)\nCollapse similar tests into table-driven tests:\n```go\nfunc TestCreate(t *testing.T) {\n tests := []struct{\n name string\n args []string\n want string\n }{\n {\"basic issue\", []string{\"create\", \"Test\"}, \"created\"},\n {\"with description\", []string{\"create\", \"Test\", \"-d\", \"desc\"}, \"created\"},\n // ... 50 more cases\n }\n \n db := setupOnce(t) // Setup once, not 50 times\n for _, tt := range tests {\n t.Run(tt.name, func(t *testing.T) {\n // test using shared db\n })\n }\n}\n```\n\n**Pros**: Dramatically reduces setup overhead, tests run in parallel\n**Cons**: Requires refactoring, tests share more state\n\n## Option 3: Split cmd/bd Tests Into Packages (Big Win)\nMove tests into focused packages:\n- `cmd/bd/internal/clitests` - CLI integration tests (mark with integration tag)\n- `cmd/bd/internal/unittests` - Fast unit tests\n- Keep only essential tests in cmd/bd\n\n**Pros**: Clean separation, easy to run just fast tests\n**Cons**: Requires significant refactoring\n\n## Option 4: Parallel Execution (Quick Win)\nAdd `t.Parallel()` to independent tests:\n```go\nfunc TestSomething(t *testing.T) {\n t.Parallel() // Run this test concurrently with others\n // ...\n}\n```\n\n**Pros**: Easy to add, can cut time in half on multi-core machines\n**Cons**: Doesn't reduce actual test work, just parallelizes it\n\n## Option 5: In-Memory Databases (Medium Win)\nUse `:memory:` SQLite databases instead of file-based:\n```go\nstore, err := sqlite.New(ctx, \":memory:\")\n```\n\n**Pros**: Faster than disk I/O, easier cleanup\n**Cons**: Some tests need actual file-based DBs (export/import tests)\n\n# Recommended Approach\n\n**Short-term (this week)**:\n1. Add `t.Parallel()` to all independent tests in cmd/bd\n2. Use `:memory:` databases where possible\n3. Create table-driven tests for similar test cases\n\n**Medium-term (next sprint)**:\n4. Split cmd/bd tests into focused packages\n5. Mark more integration tests appropriately\n\n**Long-term (backlog)**:\n6. Consider shared test fixtures with proper isolation\n\n# Current Status\n\nWe've tagged 16 files with `integration` build tag, but the remaining 279 tests in cmd/bd still take 8+ minutes. This issue tracks fixing the cmd/bd test performance specifically.\n\n# Target\n\nGet `go test ./...` (without `-short` or `-tags=integration`) down to **under 30 seconds**.\n\n\n# THE REAL ROOT CAUSE (Updated Analysis)\n\nAfter examining the actual test code, the problem is clear:\n\n## Every Test Creates Its Own Database From Scratch\n\nLook at `create_test.go`:\n```go\nfunc TestCreate_BasicIssue(t *testing.T) {\n tmpDir := t.TempDir() // ← Creates temp dir\n testDB := filepath.Join(tmpDir, \".beads\", \"beads.db\")\n s := newTestStore(t, testDB) // ← Opens NEW SQLite connection\n // ← Runs migrations\n // ← Sets config\n // ... actual test (3 lines)\n}\n\nfunc TestCreate_WithDescription(t *testing.T) {\n tmpDir := t.TempDir() // ← Creates ANOTHER temp dir\n testDB := filepath.Join(tmpDir, \".beads\", \"beads.db\")\n s := newTestStore(t, testDB) // ← Opens ANOTHER SQLite connection\n // ... actual test (3 lines)\n}\n```\n\n**This happens 279 times!**\n\n## These Tests Don't Need Isolation!\n\nMost tests are just checking:\n- \"Can I create an issue with a title?\"\n- \"Can I create an issue with a description?\"\n- \"Can I add labels?\"\n\nThey don't conflict with each other. They could all share ONE database!\n\n## The Fix: Test Suites with Shared Setup\n\nInstead of:\n```go\nfunc TestCreate_BasicIssue(t *testing.T) {\n s := newTestStore(t, t.TempDir()+\"/db\") // ← Expensive!\n // test\n}\n\nfunc TestCreate_WithDesc(t *testing.T) {\n s := newTestStore(t, t.TempDir()+\"/db\") // ← Expensive!\n // test\n}\n```\n\nDo this:\n```go\nfunc TestCreate(t *testing.T) {\n // ONE setup for all subtests\n s := newTestStore(t, t.TempDir()+\"/db\")\n \n t.Run(\"basic_issue\", func(t *testing.T) {\n t.Parallel() // Can run concurrently - tests don't conflict\n // test using shared `s`\n })\n \n t.Run(\"with_description\", func(t *testing.T) {\n t.Parallel()\n // test using shared `s`\n })\n \n // ... 50 more subtests, all using same DB\n}\n```\n\n**Result**: 50 tests → 1 database setup instead of 50!\n\n## Why This Works\n\nSQLite is fine with concurrent reads and isolated transactions. These tests:\n- ✅ Create different issues (no ID conflicts)\n- ✅ Just read back what they created\n- ✅ Don't depend on database state from other tests\n\nThey SHOULD share a database!\n\n## Real Numbers\n\nCurrent:\n- 279 tests × (create dir + init SQLite + migrations) = **8 minutes**\n\nAfter fix:\n- 10 test suites × (create dir + init SQLite + migrations) = **30 seconds**\n- 279 subtests running in parallel using those 10 DBs = **5 seconds**\n\n**Total: ~35 seconds instead of 8 minutes!**\n\n## Implementation Plan\n\n1. **Group related tests** into suites (Create, List, Update, Delete, etc.)\n2. **One setup per suite** instead of per test\n3. **Use t.Run() for subtests** with t.Parallel()\n4. **Keep tests that actually need isolation** separate (export/import tests, git operations)\n\nThis is way better than shuffling tests into folders!","notes":"## Progress Update (2025-11-21)\n\n✅ **Completed**:\n- Audited all 280 tests, created TEST_SUITE_AUDIT.md ([deleted:[deleted:[deleted:[deleted:[deleted:[deleted:[deleted:bd-c49]]]]]]])\n- Refactored create_test.go to shared DB pattern ([deleted:[deleted:[deleted:[deleted:[deleted:[deleted:[deleted:bd-y6d]]]]]]])\n- Proven the pattern works: 11 tests now run in 0.04s with 1 DB instead of 11\n\n❌ **Current Reality**:\n- Overall test suite: Still 8+ minutes (no meaningful change)\n- Only 1 of 76 test files refactored\n- Saved ~10 DB initializations out of 280\n\n## Acceptance Criteria (REALISTIC)\n\nThis task is NOT complete until:\n- [ ] All P1 files refactored (create ✅, dep, stale, comments, list, ready)\n- [ ] Test suite runs in \u003c 2 minutes\n- [ ] Measured and verified actual speedup\n\n## Next Steps\n\n1. Refactor remaining 5 P1 files: dep_test.go, stale_test.go, comments_test.go, list_test.go, ready_test.go\n2. Measure actual time improvement after each file\n3. Continue with P2 files if needed to hit \u003c2min target","status":"closed","priority":1,"issue_type":"bug","created_at":"2025-11-21T11:37:47.886207-05:00","updated_at":"2025-11-23T23:52:29.992119-08:00","closed_at":"2025-11-23T23:38:54.185364-08:00","source_repo":"."} -{"id":"bd-1w6i","content_hash":"fe84c359585b8726dcde3a15a85a32aa1a134cc40f9b803089e89b852436ad3b","title":"Document blocked_issues_cache architecture and behavior","description":"Add comprehensive documentation about the blocked_issues_cache optimization to help future maintainers understand the design.\n\n## What to document\n\n1. **Architecture overview**\n - Why cache exists (performance: 752ms -\u003e 29ms)\n - When cache is used (GetReadyWork queries)\n - How cache is maintained (invalidation triggers)\n\n2. **Cache invalidation rules**\n - Invalidated on: dependency add/remove (blocks/parent-child only)\n - Invalidated on: any status change\n - Invalidated on: issue close\n - NOT invalidated on: related/discovered-from dependencies\n\n3. **Transaction safety**\n - All invalidations happen within transactions\n - Cache can use tx or direct db connection\n - Rebuild is atomic (DELETE + INSERT in same tx)\n\n4. **Performance characteristics**\n - Full rebuild on every invalidation\n - Rebuild is fast (\u003c50ms on 10K database)\n - Write complexity traded for read speed\n - Dependency changes are rare vs reads\n\n5. **Edge cases**\n - Parent-child transitive blocking\n - Closed issues don't block\n - Foreign key CASCADE handles deletions\n\n## Where to document\n\nAdd to these locations:\n\n1. **blocked_cache.go** - Add detailed package comment:\n```go\n// Package blocked_cache implements the blocked_issues_cache optimization.\n// \n// Performance: GetReadyWork originally used recursive CTE (752ms on 10K db).\n// With cache: Simple NOT EXISTS query (29ms on 10K db).\n//\n// Cache Maintenance:\n// - Rebuild triggered on dependency changes (blocks/parent-child only)\n// - Rebuild triggered on status changes \n// - Full rebuild (DELETE + INSERT) is fast enough for all cases\n// ...\n```\n\n2. **ARCHITECTURE.md** or **PERFORMANCE.md** (new file) - High-level overview\n\n3. **ready.go** - Update comment at line 84-85 with more detail\n\n## Related\n\n- [deleted:[deleted:[deleted:[deleted:[deleted:[deleted:bd-5qim]]]]]]: GetReadyWork optimization (implemented)\n- bd-13gm: Cache validation tests\n- [deleted:[deleted:[deleted:[deleted:bd-obdc]]]]: Cache rebuild command","status":"open","priority":3,"issue_type":"task","created_at":"2025-11-23T20:07:23.467296-08:00","updated_at":"2025-11-23T22:30:55.525381-08:00","source_repo":"."} +{"id":"bd-1w6i","content_hash":"b86cb1356c1631b996ec0fa7d8d0869ee77558f3d7487916af0d0e1a69d82d95","title":"Document blocked_issues_cache architecture and behavior","description":"Add comprehensive documentation about the blocked_issues_cache optimization to help future maintainers understand the design.\n\n## What to document\n\n1. **Architecture overview**\n - Why cache exists (performance: 752ms -\u003e 29ms)\n - When cache is used (GetReadyWork queries)\n - How cache is maintained (invalidation triggers)\n\n2. **Cache invalidation rules**\n - Invalidated on: dependency add/remove (blocks/parent-child only)\n - Invalidated on: any status change\n - Invalidated on: issue close\n - NOT invalidated on: related/discovered-from dependencies\n\n3. **Transaction safety**\n - All invalidations happen within transactions\n - Cache can use tx or direct db connection\n - Rebuild is atomic (DELETE + INSERT in same tx)\n\n4. **Performance characteristics**\n - Full rebuild on every invalidation\n - Rebuild is fast (\u003c50ms on 10K database)\n - Write complexity traded for read speed\n - Dependency changes are rare vs reads\n\n5. **Edge cases**\n - Parent-child transitive blocking\n - Closed issues don't block\n - Foreign key CASCADE handles deletions\n\n## Where to document\n\nAdd to these locations:\n\n1. **blocked_cache.go** - Add detailed package comment:\n```go\n// Package blocked_cache implements the blocked_issues_cache optimization.\n// \n// Performance: GetReadyWork originally used recursive CTE (752ms on 10K db).\n// With cache: Simple NOT EXISTS query (29ms on 10K db).\n//\n// Cache Maintenance:\n// - Rebuild triggered on dependency changes (blocks/parent-child only)\n// - Rebuild triggered on status changes \n// - Full rebuild (DELETE + INSERT) is fast enough for all cases\n// ...\n```\n\n2. **ARCHITECTURE.md** or **PERFORMANCE.md** (new file) - High-level overview\n\n3. **ready.go** - Update comment at line 84-85 with more detail\n\n## Related\n\n- [deleted:[deleted:[deleted:[deleted:[deleted:[deleted:bd-5qim]]]]]]: GetReadyWork optimization (implemented)\n- bd-13gm: Cache validation tests\n- [deleted:[deleted:[deleted:[deleted:bd-obdc]]]]: Cache rebuild command","status":"in_progress","priority":3,"issue_type":"task","created_at":"2025-11-23T20:07:23.467296-08:00","updated_at":"2025-11-24T01:12:20.902545-08:00","source_repo":"."} {"id":"bd-379","content_hash":"d1edf5009291680270e9bad61ef0d6e80fe1e24fa90f71fc80748a8bd52b32d2","title":"Implement `bd setup cursor` for Cursor IDE integration","description":"Create a `bd setup cursor` command that integrates Beads workflow into Cursor IDE via .cursorrules file. Unlike Claude Code (which has hooks), Cursor uses a static rules file to provide context to its AI.","design":"## Implementation\n\nCreate `cursor` subcommand in `cmd/bd/setup.go` that manages `.cursorrules` integration:\n\n### Command Interface\n```bash\nbd setup cursor # Install/update Cursor integration\nbd setup cursor --check # Verify .cursorrules has bd section\nbd setup cursor --remove # Remove bd section from .cursorrules\n```\n\n### Behavior\n\n**If `.cursorrules` doesn't exist:**\n- Create new file with complete bd rules template\n- Mark sections for easy identification\n\n**If `.cursorrules` exists:**\n- Check if bd section already exists (look for marker comments)\n- If not exists: append bd section\n- If exists: update in place (preserve user customizations outside bd section)\n- Backup original with `.cursorrules.backup` suffix\n\n### .cursorrules Template\n\n```markdown\n# Beads Issue Tracking\n# Auto-generated by 'bd setup cursor' - do not remove these markers\n# BEGIN BEADS INTEGRATION\n\nThis project uses [Beads (bd)](https://github.com/steveyegge/beads) for issue tracking.\n\n## Core Rules\n- Track ALL work in bd (never use markdown TODOs or comment-based task lists)\n- Use `bd ready` to find available work\n- Use `bd create` to track new issues/tasks/bugs\n- Use `bd sync` at end of session to sync with git remote\n- Git hooks auto-sync on commit/merge\n\n## Quick Reference\n```bash\nbd prime # Load complete workflow context\nbd ready # Show issues ready to work (no blockers)\nbd list --status=open # List all open issues\nbd create --title=\"...\" --type=task # Create new issue\nbd update \u003cid\u003e --status=in_progress # Claim work\nbd close \u003cid\u003e # Mark complete\nbd dep \u003cfrom\u003e \u003cto\u003e # Add dependency (from blocks to)\nbd sync # Sync with git remote\n```\n\n## Workflow\n1. Check for ready work: `bd ready`\n2. Claim an issue: `bd update \u003cid\u003e --status=in_progress`\n3. Do the work\n4. Mark complete: `bd close \u003cid\u003e`\n5. Sync: `bd sync` (or let git hooks handle it)\n\n## Context Loading\nRun `bd prime` to get complete workflow documentation in AI-optimized format (~1-2k tokens).\n\nFor detailed docs: see AGENTS.md, QUICKSTART.md, or run `bd --help`\n\n# END BEADS INTEGRATION\n```\n\n### Detection Logic\n\n```go\nfunc setupCursor() error {\n cursorRulesPath := \".cursorrules\"\n \n // Check if file exists\n content, err := os.ReadFile(cursorRulesPath)\n if err != nil {\n if os.IsNotExist(err) {\n // Create new file\n return createCursorRules(cursorRulesPath)\n }\n return err\n }\n \n // Check if bd section exists\n if hasBeadsSection(string(content)) {\n // Update existing section\n return updateBeadsSection(cursorRulesPath, string(content))\n } else {\n // Append new section\n return appendBeadsSection(cursorRulesPath, string(content))\n }\n}\n\nfunc hasBeadsSection(content string) bool {\n return strings.Contains(content, \"BEGIN BEADS INTEGRATION\")\n}\n```\n\n## Files\n- `cmd/bd/setup.go` - Add cursor subcommand\n- `cmd/bd/setup_cursor.go` - Cursor-specific logic\n- `cmd/bd/setup_cursor_test.go` - Tests\n- Template stored as Go string constant\n\n## Differences from Claude Setup\n\n| Aspect | Claude | Cursor |\n|--------|--------|--------|\n| **Integration file** | `.claude/commands/`, `.claude/hooks/` | `.cursorrules` |\n| **Auto-refresh** | Hooks call `bd prime` | Static rules (manual refresh) |\n| **Setup complexity** | Multiple files | Single file |\n| **Update frequency** | Dynamic (hooks) | Static (updated via `bd setup cursor`) |","acceptance_criteria":"- `bd setup cursor` creates/updates .cursorrules\n- Idempotent (safe to run multiple times)\n- Preserves non-bd content in .cursorrules\n- Backs up existing .cursorrules before modifying\n- `bd setup cursor --check` verifies integration\n- Markers allow updating bd section without affecting user content\n- Unit tests for template insertion/update logic\n- Documentation in AGENTS.md mentions Cursor setup","status":"open","priority":3,"issue_type":"feature","created_at":"2025-11-11T23:32:22.170083-08:00","updated_at":"2025-11-11T23:32:22.170083-08:00","source_repo":"."} {"id":"bd-3852","content_hash":"0dd9464b12b424b8513724aa7ef342e3658532b72403d81a740955557c03e30c","title":"Add orphan detection migration","description":"Create migration to detect orphaned children in existing databases. Query: SELECT id FROM issues WHERE id LIKE '%.%' AND substr(id, 1, instr(id || '.', '.') - 1) NOT IN (SELECT id FROM issues). Log results, let user decide action (delete orphans or convert to top-level).","status":"closed","priority":2,"issue_type":"task","created_at":"2025-11-04T12:32:30.727044-08:00","updated_at":"2025-11-24T00:31:16.479343-08:00","closed_at":"2025-11-24T00:31:16.479343-08:00","source_repo":"."} {"id":"bd-39o","content_hash":"36d58121cc9218718d262a1991ee84695af722d2823cf9c8415c2dfdd44fb390","title":"Rename last_import_hash metadata key to jsonl_content_hash","description":"The metadata key 'last_import_hash' is misleading because it's updated on both import AND export (sync.go:614, import.go:320).\n\nBetter names:\n- jsonl_content_hash (more accurate)\n- last_sync_hash (clearer intent)\n\nThis is a breaking change requiring migration of existing metadata values.","status":"open","priority":2,"issue_type":"task","created_at":"2025-11-20T21:31:07.568739-05:00","updated_at":"2025-11-20T21:31:07.568739-05:00","source_repo":"."} diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index b755a94d..3f1a0e07 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -192,6 +192,150 @@ Hash-based comparison (not mtime) prevents git pull false positives (issue bd-84 - **Memory overhead:** One goroutine + minimal channel buffers per command execution - **Flush latency:** Debounce duration + JSONL write time (typically <100ms for incremental) +## Blocked Issues Cache (bd-5qim) + +### Problem Statement + +The `bd ready` command originally computed blocked issues using a recursive CTE on every query. On a 10K issue database, each query took ~752ms, making the command feel sluggish and impractical for large projects. + +### Solution: Materialized Cache Table + +The `blocked_issues_cache` table materializes the blocking computation, storing issue IDs for all currently blocked issues. Queries now use a simple `NOT EXISTS` check against this cache, completing in ~29ms (25x speedup). + +### Architecture + +``` +┌─────────────────────────────────────────────────────────┐ +│ GetReadyWork Query │ +│ │ +│ SELECT ... FROM issues WHERE status IN (...) │ +│ AND NOT EXISTS ( │ +│ SELECT 1 FROM blocked_issues_cache │ +│ WHERE issue_id = issues.id │ +│ ) │ +│ │ +│ Performance: 29ms (was 752ms with recursive CTE) │ +└─────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────┐ +│ Cache Invalidation Triggers │ +│ │ +│ 1. AddDependency (blocks/parent-child only) │ +│ 2. RemoveDependency (blocks/parent-child only) │ +│ 3. UpdateIssue (on any status change) │ +│ 4. CloseIssue (changes status to closed) │ +│ │ +│ NOT triggered by: related, discovered-from deps │ +└─────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────┐ +│ Cache Rebuild Process │ +│ │ +│ 1. DELETE FROM blocked_issues_cache │ +│ 2. INSERT INTO blocked_issues_cache │ +│ WITH RECURSIVE CTE: │ +│ - Find directly blocked issues (blocks deps) │ +│ - Propagate to children (parent-child deps) │ +│ 3. Happens in same transaction as triggering change │ +│ │ +│ Performance: <50ms full rebuild on 10K database │ +└─────────────────────────────────────────────────────────┘ +``` + +### Blocking Semantics + +An issue is blocked if: + +1. **Direct blocking**: Has a `blocks` dependency on an open/in_progress/blocked issue +2. **Transitive blocking**: Parent is blocked and issue is connected via `parent-child` dependency + +Closed issues never block others. Related and discovered-from dependencies don't affect blocking. + +### Cache Invalidation Strategy + +**Full rebuild on every change** + +Instead of incremental updates, the cache is completely rebuilt (DELETE + INSERT) on any triggering change. This approach is chosen because: + +- Rebuild is fast (<50ms even on 10K issues) due to optimized CTE +- Simpler implementation with no risk of partial/stale updates +- Dependency changes are rare compared to reads +- Guarantees consistency - cache matches database state exactly + +**Transaction safety** + +All cache operations happen within the same transaction as the triggering change: +- Uses transaction if provided, otherwise direct db connection +- Cache can never be in an inconsistent state visible to queries +- Foreign key CASCADE ensures cache entries deleted when issues are deleted + +**Selective invalidation** + +Only `blocks` and `parent-child` dependencies trigger rebuilds since they affect blocking semantics. Related and discovered-from dependencies don't trigger invalidation, avoiding unnecessary work. + +### Performance Characteristics + +**Query performance (GetReadyWork):** +- Before cache: ~752ms (recursive CTE) +- With cache: ~29ms (NOT EXISTS) +- Speedup: 25x + +**Write overhead:** +- Cache rebuild: <50ms +- Only triggered on dependency/status changes (rare operations) +- Trade-off: slower writes for much faster reads + +### Edge Cases + +1. **Parent-child transitive blocking** + - Children of blocked parents are automatically marked as blocked + - Propagates through arbitrary depth hierarchies (limited to depth 50 for safety) + +2. **Multiple blockers** + - Issue blocked by multiple open issues stays blocked until all are closed + - DISTINCT in CTE ensures issue appears once in cache + +3. **Status changes** + - Closing a blocker removes all blocked descendants from cache + - Reopening a blocker adds them back + +4. **Dependency removal** + - Removing last blocker unblocks the issue + - Removing parent-child link unblocks orphaned subtree + +5. **Foreign key cascades** + - Cache entries automatically deleted when issue is deleted + - No manual cleanup needed + +### Testing + +Comprehensive test coverage in `blocked_cache_test.go`: +- Cache invalidation on dependency add/remove +- Cache updates on status changes +- Multiple blockers +- Deep hierarchies +- Transitive blocking via parent-child +- Related dependencies (should NOT affect cache) + +Run tests: `go test -v ./internal/storage/sqlite -run TestCache` + +### Implementation Files + +- `internal/storage/sqlite/blocked_cache.go` - Cache rebuild and invalidation +- `internal/storage/sqlite/ready.go` - Uses cache in GetReadyWork queries +- `internal/storage/sqlite/dependencies.go` - Invalidates on dep changes +- `internal/storage/sqlite/queries.go` - Invalidates on status changes +- `internal/storage/sqlite/migrations/015_blocked_issues_cache.go` - Schema and initial population + +### Future Optimizations + +If rebuild becomes a bottleneck in very large databases (>100K issues): +- Consider incremental updates for specific dependency types +- Add indexes to dependencies table for CTE performance +- Implement dirty tracking to avoid rebuilds when cache is unchanged + +However, current performance is excellent for realistic workloads. + ## Future Improvements Potential enhancements for multi-agent scenarios: diff --git a/internal/storage/sqlite/blocked_cache.go b/internal/storage/sqlite/blocked_cache.go index c50382bd..98cb21e1 100644 --- a/internal/storage/sqlite/blocked_cache.go +++ b/internal/storage/sqlite/blocked_cache.go @@ -1,3 +1,89 @@ +// Package sqlite provides the blocked_issues_cache optimization for GetReadyWork performance. +// +// # Performance Impact +// +// GetReadyWork originally used a recursive CTE to compute blocked issues on every query, +// taking ~752ms on a 10K issue database. With the cache, queries complete in ~29ms +// (25x speedup) by using a simple NOT EXISTS check against the materialized cache table. +// +// # Cache Architecture +// +// The blocked_issues_cache table stores issue_id values for all issues that are currently +// blocked. An issue is blocked if: +// - It has a 'blocks' dependency on an open/in_progress/blocked issue (direct blocking) +// - Its parent is blocked and it's connected via 'parent-child' dependency (transitive blocking) +// +// The cache is maintained automatically by invalidating and rebuilding whenever: +// - A 'blocks' or 'parent-child' dependency is added or removed +// - Any issue's status changes (affects whether it blocks others) +// - An issue is closed (closed issues don't block others) +// +// Related and discovered-from dependencies do NOT trigger cache invalidation since they +// don't affect blocking semantics. +// +// # Cache Invalidation Strategy +// +// On any triggering change, the entire cache is rebuilt from scratch (DELETE + INSERT). +// This full-rebuild approach is chosen because: +// - Rebuild is fast (<50ms even on 10K databases) due to optimized CTE logic +// - Simpler implementation than incremental updates +// - Dependency changes are rare compared to reads +// - Guarantees consistency - no risk of partial/stale updates +// +// The rebuild happens within the same transaction as the triggering change, ensuring +// atomicity and consistency. The cache can never be in an inconsistent state visible +// to queries. +// +// # Transaction Safety +// +// All cache operations support both transaction and direct database execution: +// - rebuildBlockedCache accepts optional *sql.Tx parameter +// - If tx != nil, uses transaction; otherwise uses direct db connection +// - Cache invalidation during CreateIssue/UpdateIssue/AddDependency happens in their tx +// - Ensures cache is always consistent with the database state +// +// # Performance Characteristics +// +// Query performance (GetReadyWork): +// - Before cache: ~752ms (recursive CTE on 10K issues) +// - With cache: ~29ms (NOT EXISTS check) +// - Speedup: 25x +// +// Write overhead: +// - Cache rebuild: <50ms (full DELETE + INSERT) +// - Only triggered on dependency/status changes (rare operations) +// - Trade-off: slower writes for much faster reads +// +// # Edge Cases Handled +// +// 1. Parent-child transitive blocking: +// - Children of blocked parents are automatically marked as blocked +// - Propagates through arbitrary depth hierarchies (limited to depth 50) +// +// 2. Multiple blockers: +// - Issue blocked by multiple open issues stays blocked until all are closed +// - DISTINCT in CTE ensures issue appears once in cache +// +// 3. Status changes: +// - Closing a blocker removes all blocked descendants from cache +// - Reopening a blocker adds them back +// +// 4. Dependency removal: +// - Removing last blocker unblocks the issue +// - Removing parent-child link unblocks orphaned subtree +// +// 5. Foreign key cascades: +// - Cache entries automatically deleted when issue is deleted (ON DELETE CASCADE) +// - No manual cleanup needed +// +// # Future Optimizations +// +// If rebuild becomes a bottleneck in very large databases (>100K issues): +// - Consider incremental updates for specific dependency types +// - Add indexes to dependencies table for CTE performance +// - Implement dirty tracking to avoid rebuilds when cache is unchanged +// +// However, current performance is excellent for realistic workloads. package sqlite import ( diff --git a/internal/storage/sqlite/ready.go b/internal/storage/sqlite/ready.go index 6eb610fb..25b6d375 100644 --- a/internal/storage/sqlite/ready.go +++ b/internal/storage/sqlite/ready.go @@ -82,7 +82,16 @@ func (s *SQLiteStorage) GetReadyWork(ctx context.Context, filter types.WorkFilte orderBySQL := buildOrderByClause(sortPolicy) // Use blocked_issues_cache for performance (bd-5qim) - // Cache is maintained by invalidateBlockedCache() called on dependency/status changes + // This optimization replaces the recursive CTE that computed blocked issues on every query. + // Performance improvement: 752ms → 29ms on 10K issues (25x speedup). + // + // The cache is automatically maintained by invalidateBlockedCache() which is called: + // - When adding/removing 'blocks' or 'parent-child' dependencies + // - When any issue status changes + // - When closing any issue + // + // Cache rebuild is fast (<50ms) and happens within the same transaction as the + // triggering change, ensuring consistency. See blocked_cache.go for full details. // #nosec G201 - safe SQL with controlled formatting query := fmt.Sprintf(` SELECT i.id, i.content_hash, i.title, i.description, i.design, i.acceptance_criteria, i.notes,