Implement content-first idempotent import (bd-98)

- Refactored upsertIssues to match by content hash first, then by ID
- Added buildHashMap, buildIDMap, and handleRename helper functions
- Import now detects and handles renames (same content, different ID)
- Importing same data multiple times is idempotent (reports Unchanged)
- Exported BuildReplacementCache and ReplaceIDReferencesWithCache for reuse
- All 30+ existing import tests pass
- Improved convergence for N-way collision scenarios

Changes:
- internal/importer/importer.go: Content-first matching in upsertIssues
- internal/storage/sqlite/collision.go: Exported helper functions
- internal/storage/sqlite/collision_test.go: Updated function names

Amp-Thread-ID: https://ampcode.com/threads/T-3df96ad8-7c0e-4190-87b5-6d5327718f0a
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-28 20:40:36 -07:00
parent 44df03ae55
commit ff02615f61
3 changed files with 179 additions and 68 deletions

View File

@@ -478,7 +478,7 @@ func RemapCollisions(ctx context.Context, s *SQLiteStorage, collisions []*Collis
func updateReferences(ctx context.Context, s *SQLiteStorage, idMapping map[string]string) error {
// Pre-compile all regexes once for the entire operation
// This avoids recompiling the same patterns for each text field
cache, err := buildReplacementCache(idMapping)
cache, err := BuildReplacementCache(idMapping)
if err != nil {
return fmt.Errorf("failed to build replacement cache: %w", err)
}
@@ -494,25 +494,25 @@ func updateReferences(ctx context.Context, s *SQLiteStorage, idMapping map[strin
updates := make(map[string]interface{})
// Update description using cached regexes
newDesc := replaceIDReferencesWithCache(issue.Description, cache)
newDesc := ReplaceIDReferencesWithCache(issue.Description, cache)
if newDesc != issue.Description {
updates["description"] = newDesc
}
// Update design using cached regexes
newDesign := replaceIDReferencesWithCache(issue.Design, cache)
newDesign := ReplaceIDReferencesWithCache(issue.Design, cache)
if newDesign != issue.Design {
updates["design"] = newDesign
}
// Update notes using cached regexes
newNotes := replaceIDReferencesWithCache(issue.Notes, cache)
newNotes := ReplaceIDReferencesWithCache(issue.Notes, cache)
if newNotes != issue.Notes {
updates["notes"] = newNotes
}
// Update acceptance criteria using cached regexes
newAC := replaceIDReferencesWithCache(issue.AcceptanceCriteria, cache)
newAC := ReplaceIDReferencesWithCache(issue.AcceptanceCriteria, cache)
if newAC != issue.AcceptanceCriteria {
updates["acceptance_criteria"] = newAC
}
@@ -542,9 +542,9 @@ type idReplacementCache struct {
regex *regexp.Regexp
}
// buildReplacementCache pre-compiles all regex patterns for an ID mapping
// BuildReplacementCache pre-compiles all regex patterns for an ID mapping
// This cache should be created once per ID mapping and reused for all text replacements
func buildReplacementCache(idMapping map[string]string) ([]*idReplacementCache, error) {
func BuildReplacementCache(idMapping map[string]string) ([]*idReplacementCache, error) {
cache := make([]*idReplacementCache, 0, len(idMapping))
i := 0
for oldID, newID := range idMapping {
@@ -566,9 +566,9 @@ func buildReplacementCache(idMapping map[string]string) ([]*idReplacementCache,
return cache, nil
}
// replaceIDReferencesWithCache replaces all occurrences of old IDs with new IDs using a pre-compiled cache
// ReplaceIDReferencesWithCache replaces all occurrences of old IDs with new IDs using a pre-compiled cache
// Uses a two-phase approach to avoid replacement conflicts: first replace with placeholders, then replace with new IDs
func replaceIDReferencesWithCache(text string, cache []*idReplacementCache) string {
func ReplaceIDReferencesWithCache(text string, cache []*idReplacementCache) string {
if len(cache) == 0 || text == "" {
return text
}
@@ -593,16 +593,16 @@ func replaceIDReferencesWithCache(text string, cache []*idReplacementCache) stri
// placeholders, then replace placeholders with new IDs
//
// Note: This function compiles regexes on every call. For better performance when
// processing multiple text fields with the same ID mapping, use buildReplacementCache()
// and replaceIDReferencesWithCache() instead.
// processing multiple text fields with the same ID mapping, use BuildReplacementCache()
// and ReplaceIDReferencesWithCache() instead.
func replaceIDReferences(text string, idMapping map[string]string) string {
// Build cache (compiles regexes)
cache, err := buildReplacementCache(idMapping)
cache, err := BuildReplacementCache(idMapping)
if err != nil {
// Fallback to no replacement if regex compilation fails
return text
}
return replaceIDReferencesWithCache(text, cache)
return ReplaceIDReferencesWithCache(text, cache)
}
// updateDependencyReferences updates dependency records to use new IDs