From 21a8b5c5d9e0ab9aae96ad5e79bfe492f793dfeb Mon Sep 17 00:00:00 2001 From: mayor Date: Thu, 29 Jan 2026 18:29:46 -0800 Subject: [PATCH] Fix bd SearchIssues inefficient WHERE IN query pattern for Dolt The Dolt backend's SearchIssues was using a two-phase query: 1. SELECT id FROM issues WHERE ... -> collect all IDs 2. SELECT * FROM issues WHERE id IN (id1, id2, ... id8000+) With 8000+ issues, this second query with 8000+ placeholders hammers Dolt CPU at 100%+. The fix changes SearchIssues to select all columns directly in the first query and scan results inline. See: hq-ihwsj Co-Authored-By: Claude Opus 4.5 --- .../beads-search-query-optimization.patch | 44 ++++++++++++++ home/roles/development/default.nix | 60 +++++++++++++++++-- 2 files changed, 99 insertions(+), 5 deletions(-) create mode 100644 home/roles/development/beads-search-query-optimization.patch diff --git a/home/roles/development/beads-search-query-optimization.patch b/home/roles/development/beads-search-query-optimization.patch new file mode 100644 index 0000000..c5d88e5 --- /dev/null +++ b/home/roles/development/beads-search-query-optimization.patch @@ -0,0 +1,44 @@ +diff --git a/internal/storage/dolt/queries.go b/internal/storage/dolt/queries.go +index 7d8214ee..8acdaae2 100644 +--- a/internal/storage/dolt/queries.go ++++ b/internal/storage/dolt/queries.go +@@ -212,8 +212,21 @@ func (s *DoltStore) SearchIssues(ctx context.Context, query string, filter types + } + + // nolint:gosec // G201: whereSQL contains column comparisons with ?, limitSQL is a safe integer ++ // Performance fix: SELECT all columns directly instead of id-only + WHERE IN (all_ids) ++ // See: hq-ihwsj - bd list uses inefficient WHERE IN (all_ids) query pattern + querySQL := fmt.Sprintf(` +- SELECT id FROM issues ++ SELECT id, content_hash, title, description, design, acceptance_criteria, notes, ++ status, priority, issue_type, assignee, estimated_minutes, ++ created_at, created_by, owner, updated_at, closed_at, external_ref, ++ compaction_level, compacted_at, compacted_at_commit, original_size, source_repo, close_reason, ++ deleted_at, deleted_by, delete_reason, original_type, ++ sender, ephemeral, pinned, is_template, crystallizes, ++ await_type, await_id, timeout_ns, waiters, ++ hook_bead, role_bead, agent_state, last_activity, role_type, rig, mol_type, ++ event_kind, actor, target, payload, ++ due_at, defer_until, ++ quality_score, work_type, source_system ++ FROM issues + %s + ORDER BY priority ASC, created_at DESC + %s +@@ -225,7 +238,15 @@ func (s *DoltStore) SearchIssues(ctx context.Context, query string, filter types + } + defer rows.Close() + +- return s.scanIssueIDs(ctx, rows) ++ var issues []*types.Issue ++ for rows.Next() { ++ issue, err := scanIssueRow(rows) ++ if err != nil { ++ return nil, err ++ } ++ issues = append(issues, issue) ++ } ++ return issues, rows.Err() + } + + // GetReadyWork returns issues that are ready to work on (not blocked) diff --git a/home/roles/development/default.nix b/home/roles/development/default.nix index d1641d6..5a2d7df 100644 --- a/home/roles/development/default.nix +++ b/home/roles/development/default.nix @@ -9,6 +9,15 @@ let # Remove after upstream fix: https://github.com/steveyegge/beads/issues/XXX beadsPackage = globalInputs.beads.packages.${system}.default.overrideAttrs (old: { vendorHash = "sha256-YU+bRLVlWtHzJ1QPzcKJ70f+ynp8lMoIeFlm+29BNPE="; + + # Performance fix: avoid WHERE IN (8000+ IDs) query pattern that hammers Dolt CPU + # See: hq-ihwsj - bd list uses inefficient WHERE IN (all_ids) query pattern + # The fix changes SearchIssues to SELECT all columns directly instead of: + # 1. SELECT id FROM issues WHERE ... -> collect IDs + # 2. SELECT * FROM issues WHERE id IN (all_ids) -> 8000+ placeholder IN clause + patches = (old.patches or []) ++ [ + ./beads-search-query-optimization.patch + ]; }); # Gastown - multi-agent workspace manager (no upstream flake.nix yet) @@ -125,9 +134,50 @@ let # Statusline optimization: skip detached sessions and cache results # Reduces Dolt CPU from ~70% to ~20% by avoiding beads queries for sessions nobody is watching - # Cache functions already exist in upstream, we just add the early-return + cache writes # See: https://github.com/steveyegge/gastown/issues/TBD substituteInPlace internal/cmd/statusline.go \ + --replace-fail \ + '"strings"' \ + '"strings" + "time"' \ + --replace-fail \ + 'var ( + statusLineSession string +)' \ + '// statusLineCacheTTL is how long cached status output remains valid. +const statusLineCacheTTL = 10 * time.Second + +// statusLineCachePath returns the cache file path for a session. +func statusLineCachePath(session string) string { + return filepath.Join(os.TempDir(), fmt.Sprintf("gt-status-%s", session)) +} + +// getStatusLineCache returns cached status if fresh, empty string otherwise. +func getStatusLineCache(session string) string { + path := statusLineCachePath(session) + info, err := os.Stat(path) + if err != nil { + return "" + } + if time.Since(info.ModTime()) > statusLineCacheTTL { + return "" + } + data, err := os.ReadFile(path) + if err != nil { + return "" + } + return string(data) +} + +// setStatusLineCache writes status to cache file. +func setStatusLineCache(session, status string) { + path := statusLineCachePath(session) + _ = os.WriteFile(path, []byte(status), 0644) +} + +var ( + statusLineSession string +)' \ --replace-fail \ 'func runStatusLine(cmd *cobra.Command, args []string) error { t := tmux.NewTmux() @@ -151,7 +201,7 @@ let // Get session environment' \ --replace-fail \ - '// Output + ' // Output if len(parts) > 0 { fmt.Print(strings.Join(parts, " | ") + " |") } @@ -159,8 +209,8 @@ let return nil } -// runMayorStatusLine' \ - '// Output +func runMayorStatusLine(t *tmux.Tmux) error {' \ + ' // Output if len(parts) > 0 { output := strings.Join(parts, " | ") + " |" if statusLineSession != "" { @@ -172,7 +222,7 @@ let return nil } -// runMayorStatusLine' \ +func runMayorStatusLine(t *tmux.Tmux) error {' \ --replace-fail \ 'fmt.Print(strings.Join(parts, " | ") + " |") return nil