Implement content-first idempotent import (bd-98)

- Refactored upsertIssues to match by content hash first, then by ID
- Added buildHashMap, buildIDMap, and handleRename helper functions
- Import now detects and handles renames (same content, different ID)
- Importing same data multiple times is idempotent (reports Unchanged)
- Exported BuildReplacementCache and ReplaceIDReferencesWithCache for reuse
- All 30+ existing import tests pass
- Improved convergence for N-way collision scenarios

Changes:
- internal/importer/importer.go: Content-first matching in upsertIssues
- internal/storage/sqlite/collision.go: Exported helper functions
- internal/storage/sqlite/collision_test.go: Updated function names

Amp-Thread-ID: https://ampcode.com/threads/T-3df96ad8-7c0e-4190-87b5-6d5327718f0a
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-28 20:40:36 -07:00
parent 44df03ae55
commit ff02615f61
3 changed files with 179 additions and 68 deletions

View File

@@ -802,14 +802,14 @@ func BenchmarkReplaceIDReferencesWithCache(b *testing.B) {
"Also bd-6, bd-7, bd-8, bd-9, and bd-10 are referenced here."
// Pre-compile the cache (this is done once in real usage)
cache, err := buildReplacementCache(idMapping)
cache, err := BuildReplacementCache(idMapping)
if err != nil {
b.Fatalf("failed to build cache: %v", err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = replaceIDReferencesWithCache(text, cache)
_ = ReplaceIDReferencesWithCache(text, cache)
}
}
@@ -838,11 +838,11 @@ func BenchmarkReplaceIDReferencesMultipleTexts(b *testing.B) {
})
b.Run("with cache", func(b *testing.B) {
cache, _ := buildReplacementCache(idMapping)
cache, _ := BuildReplacementCache(idMapping)
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, text := range texts {
_ = replaceIDReferencesWithCache(text, cache)
_ = ReplaceIDReferencesWithCache(text, cache)
}
}
})