Fix bd SearchIssues inefficient WHERE IN query pattern for Dolt
All checks were successful
CI / check (push) Successful in 3m25s

The Dolt backend's SearchIssues was using a two-phase query:
1. SELECT id FROM issues WHERE ... -> collect all IDs
2. SELECT * FROM issues WHERE id IN (id1, id2, ... id8000+)

With 8000+ issues, this second query with 8000+ placeholders hammers
Dolt CPU at 100%+. The fix changes SearchIssues to select all columns
directly in the first query and scan results inline.

See: hq-ihwsj

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-29 18:29:46 -08:00
committed by John Ogle
parent 8f8582b0f3
commit 21a8b5c5d9
2 changed files with 99 additions and 5 deletions

View File

@@ -0,0 +1,44 @@
diff --git a/internal/storage/dolt/queries.go b/internal/storage/dolt/queries.go
index 7d8214ee..8acdaae2 100644
--- a/internal/storage/dolt/queries.go
+++ b/internal/storage/dolt/queries.go
@@ -212,8 +212,21 @@ func (s *DoltStore) SearchIssues(ctx context.Context, query string, filter types
}
// nolint:gosec // G201: whereSQL contains column comparisons with ?, limitSQL is a safe integer
+ // Performance fix: SELECT all columns directly instead of id-only + WHERE IN (all_ids)
+ // See: hq-ihwsj - bd list uses inefficient WHERE IN (all_ids) query pattern
querySQL := fmt.Sprintf(`
- SELECT id FROM issues
+ SELECT id, content_hash, title, description, design, acceptance_criteria, notes,
+ status, priority, issue_type, assignee, estimated_minutes,
+ created_at, created_by, owner, updated_at, closed_at, external_ref,
+ compaction_level, compacted_at, compacted_at_commit, original_size, source_repo, close_reason,
+ deleted_at, deleted_by, delete_reason, original_type,
+ sender, ephemeral, pinned, is_template, crystallizes,
+ await_type, await_id, timeout_ns, waiters,
+ hook_bead, role_bead, agent_state, last_activity, role_type, rig, mol_type,
+ event_kind, actor, target, payload,
+ due_at, defer_until,
+ quality_score, work_type, source_system
+ FROM issues
%s
ORDER BY priority ASC, created_at DESC
%s
@@ -225,7 +238,15 @@ func (s *DoltStore) SearchIssues(ctx context.Context, query string, filter types
}
defer rows.Close()
- return s.scanIssueIDs(ctx, rows)
+ var issues []*types.Issue
+ for rows.Next() {
+ issue, err := scanIssueRow(rows)
+ if err != nil {
+ return nil, err
+ }
+ issues = append(issues, issue)
+ }
+ return issues, rows.Err()
}
// GetReadyWork returns issues that are ready to work on (not blocked)

View File

@@ -9,6 +9,15 @@ let
# Remove after upstream fix: https://github.com/steveyegge/beads/issues/XXX
beadsPackage = globalInputs.beads.packages.${system}.default.overrideAttrs (old: {
vendorHash = "sha256-YU+bRLVlWtHzJ1QPzcKJ70f+ynp8lMoIeFlm+29BNPE=";
# Performance fix: avoid WHERE IN (8000+ IDs) query pattern that hammers Dolt CPU
# See: hq-ihwsj - bd list uses inefficient WHERE IN (all_ids) query pattern
# The fix changes SearchIssues to SELECT all columns directly instead of:
# 1. SELECT id FROM issues WHERE ... -> collect IDs
# 2. SELECT * FROM issues WHERE id IN (all_ids) -> 8000+ placeholder IN clause
patches = (old.patches or []) ++ [
./beads-search-query-optimization.patch
];
});
# Gastown - multi-agent workspace manager (no upstream flake.nix yet)
@@ -125,9 +134,50 @@ let
# Statusline optimization: skip detached sessions and cache results
# Reduces Dolt CPU from ~70% to ~20% by avoiding beads queries for sessions nobody is watching
# Cache functions already exist in upstream, we just add the early-return + cache writes
# See: https://github.com/steveyegge/gastown/issues/TBD
substituteInPlace internal/cmd/statusline.go \
--replace-fail \
'"strings"' \
'"strings"
"time"' \
--replace-fail \
'var (
statusLineSession string
)' \
'// statusLineCacheTTL is how long cached status output remains valid.
const statusLineCacheTTL = 10 * time.Second
// statusLineCachePath returns the cache file path for a session.
func statusLineCachePath(session string) string {
return filepath.Join(os.TempDir(), fmt.Sprintf("gt-status-%s", session))
}
// getStatusLineCache returns cached status if fresh, empty string otherwise.
func getStatusLineCache(session string) string {
path := statusLineCachePath(session)
info, err := os.Stat(path)
if err != nil {
return ""
}
if time.Since(info.ModTime()) > statusLineCacheTTL {
return ""
}
data, err := os.ReadFile(path)
if err != nil {
return ""
}
return string(data)
}
// setStatusLineCache writes status to cache file.
func setStatusLineCache(session, status string) {
path := statusLineCachePath(session)
_ = os.WriteFile(path, []byte(status), 0644)
}
var (
statusLineSession string
)' \
--replace-fail \
'func runStatusLine(cmd *cobra.Command, args []string) error {
t := tmux.NewTmux()
@@ -159,7 +209,7 @@ let
return nil
}
// runMayorStatusLine' \
func runMayorStatusLine(t *tmux.Tmux) error {' \
' // Output
if len(parts) > 0 {
output := strings.Join(parts, " | ") + " |"
@@ -172,7 +222,7 @@ let
return nil
}
// runMayorStatusLine' \
func runMayorStatusLine(t *tmux.Tmux) error {' \
--replace-fail \
'fmt.Print(strings.Join(parts, " | ") + " |")
return nil