Cache compiled regexes in ID replacement for 1.9x performance boost

Implements bd-27: Cache compiled regexes in replaceIDReferences for performance

Problem:
replaceIDReferences() was compiling regex patterns on every call. With 100
issues and 10 ID mappings, that resulted in 4,000 regex compilations (100
issues × 4 text fields × 10 ID mappings).

Solution:
- Added buildReplacementCache() to pre-compile all regexes once
- Added replaceIDReferencesWithCache() to reuse compiled regexes
- Updated updateReferences() to build cache once and reuse for all issues
- Kept replaceIDReferences() for backward compatibility (calls cached version)

Performance Results (from benchmarks):
Single text:
- 1.33x faster (26,162 ns → 19,641 ns)
- 68% less memory (25,769 B → 8,241 B)
- 80% fewer allocations (278 → 55)

Real-world (400 texts, 10 mappings):
- 1.89x faster (5.1ms → 2.7ms)
- 90% less memory (7.7 MB → 0.8 MB)
- 86% fewer allocations (104,112 → 14,801)

Tests:
- All existing tests pass
- Added 3 benchmark tests demonstrating improvements

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Steve Yegge
2025-10-13 23:50:48 -07:00
parent 33412871eb
commit 25644d9717
3 changed files with 155 additions and 29 deletions

View File

@@ -1027,3 +1027,78 @@ func TestUpdateDependencyReferences(t *testing.T) {
t.Errorf("expected 0 dependencies for bd-2, got %d", len(deps2))
}
}
// BenchmarkReplaceIDReferences benchmarks the old approach (compiling regex every time)
func BenchmarkReplaceIDReferences(b *testing.B) {
// Simulate a realistic scenario: 10 ID mappings
idMapping := make(map[string]string)
for i := 1; i <= 10; i++ {
idMapping[fmt.Sprintf("bd-%d", i)] = fmt.Sprintf("bd-%d", i+100)
}
text := "This mentions bd-1, bd-2, bd-3, bd-4, and bd-5 multiple times. " +
"Also bd-6, bd-7, bd-8, bd-9, and bd-10 are referenced here."
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = replaceIDReferences(text, idMapping)
}
}
// BenchmarkReplaceIDReferencesWithCache benchmarks the new cached approach
func BenchmarkReplaceIDReferencesWithCache(b *testing.B) {
// Simulate a realistic scenario: 10 ID mappings
idMapping := make(map[string]string)
for i := 1; i <= 10; i++ {
idMapping[fmt.Sprintf("bd-%d", i)] = fmt.Sprintf("bd-%d", i+100)
}
text := "This mentions bd-1, bd-2, bd-3, bd-4, and bd-5 multiple times. " +
"Also bd-6, bd-7, bd-8, bd-9, and bd-10 are referenced here."
// Pre-compile the cache (this is done once in real usage)
cache, err := buildReplacementCache(idMapping)
if err != nil {
b.Fatalf("failed to build cache: %v", err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = replaceIDReferencesWithCache(text, cache)
}
}
// BenchmarkReplaceIDReferencesMultipleTexts simulates the real-world scenario:
// processing multiple text fields (4 per issue) across 100 issues
func BenchmarkReplaceIDReferencesMultipleTexts(b *testing.B) {
// 10 ID mappings (typical collision scenario)
idMapping := make(map[string]string)
for i := 1; i <= 10; i++ {
idMapping[fmt.Sprintf("bd-%d", i)] = fmt.Sprintf("bd-%d", i+100)
}
// Simulate 100 issues with 4 text fields each
texts := make([]string, 400)
for i := 0; i < 400; i++ {
texts[i] = fmt.Sprintf("Issue %d mentions bd-1, bd-2, and bd-5", i)
}
b.Run("without cache", func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, text := range texts {
_ = replaceIDReferences(text, idMapping)
}
}
})
b.Run("with cache", func(b *testing.B) {
cache, _ := buildReplacementCache(idMapping)
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, text := range texts {
_ = replaceIDReferencesWithCache(text, cache)
}
}
})
}