Implement content-first idempotent import (bd-98)

- Refactored upsertIssues to match by content hash first, then by ID
- Added buildHashMap, buildIDMap, and handleRename helper functions
- Import now detects and handles renames (same content, different ID)
- Importing same data multiple times is idempotent (reports Unchanged)
- Exported BuildReplacementCache and ReplaceIDReferencesWithCache for reuse
- All 30+ existing import tests pass
- Improved convergence for N-way collision scenarios

Changes:
- internal/importer/importer.go: Content-first matching in upsertIssues
- internal/storage/sqlite/collision.go: Exported helper functions
- internal/storage/sqlite/collision_test.go: Updated function names

Amp-Thread-ID: https://ampcode.com/threads/T-3df96ad8-7c0e-4190-87b5-6d5327718f0a
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-28 20:40:36 -07:00
parent 44df03ae55
commit ff02615f61
3 changed files with 179 additions and 68 deletions

View File

@@ -263,68 +263,179 @@ func handleCollisions(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, is
return issues, nil return issues, nil
} }
// upsertIssues creates new issues or updates existing ones // buildHashMap creates a map of content hash → issue for O(1) lookup
func upsertIssues(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, issues []*types.Issue, opts Options, result *Result) error { func buildHashMap(issues []*types.Issue) map[string]*types.Issue {
var newIssues []*types.Issue result := make(map[string]*types.Issue)
seenNew := make(map[string]int)
for _, issue := range issues { for _, issue := range issues {
// Check if issue exists in DB if issue.ContentHash != "" {
existing, err := sqliteStore.GetIssue(ctx, issue.ID) result[issue.ContentHash] = issue
if err != nil { }
return fmt.Errorf("error checking issue %s: %w", issue.ID, err) }
return result
}
// buildIDMap creates a map of ID → issue for O(1) lookup
func buildIDMap(issues []*types.Issue) map[string]*types.Issue {
result := make(map[string]*types.Issue)
for _, issue := range issues {
result[issue.ID] = issue
}
return result
}
// handleRename handles content match with different IDs (rename detected)
func handleRename(ctx context.Context, s *sqlite.SQLiteStorage, existing *types.Issue, incoming *types.Issue) error {
// Delete old ID
if err := s.DeleteIssue(ctx, existing.ID); err != nil {
return fmt.Errorf("failed to delete old ID %s: %w", existing.ID, err)
}
// Create with new ID
if err := s.CreateIssue(ctx, incoming, "import-rename"); err != nil {
return fmt.Errorf("failed to create renamed issue %s: %w", incoming.ID, err)
}
// Update references from old ID to new ID
idMapping := map[string]string{existing.ID: incoming.ID}
cache, err := sqlite.BuildReplacementCache(idMapping)
if err != nil {
return fmt.Errorf("failed to build replacement cache: %w", err)
}
// Get all issues to update references
dbIssues, err := s.SearchIssues(ctx, "", types.IssueFilter{})
if err != nil {
return fmt.Errorf("failed to get issues for reference update: %w", err)
}
// Update text field references in all issues
for _, issue := range dbIssues {
updates := make(map[string]interface{})
newDesc := sqlite.ReplaceIDReferencesWithCache(issue.Description, cache)
if newDesc != issue.Description {
updates["description"] = newDesc
} }
if existing != nil { newDesign := sqlite.ReplaceIDReferencesWithCache(issue.Design, cache)
// Issue exists - update it unless SkipUpdate is set if newDesign != issue.Design {
if opts.SkipUpdate { updates["design"] = newDesign
result.Skipped++ }
continue
newNotes := sqlite.ReplaceIDReferencesWithCache(issue.Notes, cache)
if newNotes != issue.Notes {
updates["notes"] = newNotes
}
newAC := sqlite.ReplaceIDReferencesWithCache(issue.AcceptanceCriteria, cache)
if newAC != issue.AcceptanceCriteria {
updates["acceptance_criteria"] = newAC
}
if len(updates) > 0 {
if err := s.UpdateIssue(ctx, issue.ID, updates, "import-rename"); err != nil {
return fmt.Errorf("failed to update references in issue %s: %w", issue.ID, err)
} }
}
}
// Build updates map return nil
updates := make(map[string]interface{}) }
updates["title"] = issue.Title
updates["description"] = issue.Description
updates["status"] = issue.Status
updates["priority"] = issue.Priority
updates["issue_type"] = issue.IssueType
updates["design"] = issue.Design
updates["acceptance_criteria"] = issue.AcceptanceCriteria
updates["notes"] = issue.Notes
if issue.Assignee != "" { // upsertIssues creates new issues or updates existing ones using content-first matching
updates["assignee"] = issue.Assignee func upsertIssues(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, issues []*types.Issue, opts Options, result *Result) error {
} else { // Get all DB issues once
updates["assignee"] = nil dbIssues, err := sqliteStore.SearchIssues(ctx, "", types.IssueFilter{})
} if err != nil {
return fmt.Errorf("failed to get DB issues: %w", err)
}
dbByHash := buildHashMap(dbIssues)
dbByID := buildIDMap(dbIssues)
if issue.ExternalRef != nil && *issue.ExternalRef != "" { // Track what we need to create
updates["external_ref"] = *issue.ExternalRef var newIssues []*types.Issue
} else { seenHashes := make(map[string]bool)
updates["external_ref"] = nil
}
// Only update if data actually changed for _, incoming := range issues {
if IssueDataChanged(existing, updates) { hash := incoming.ContentHash
if err := sqliteStore.UpdateIssue(ctx, issue.ID, updates, "import"); err != nil { if hash == "" {
return fmt.Errorf("error updating issue %s: %w", issue.ID, err) // Shouldn't happen (computed earlier), but be defensive
} hash = incoming.ComputeContentHash()
result.Updated++ incoming.ContentHash = hash
} else { }
// Skip duplicates within incoming batch
if seenHashes[hash] {
result.Skipped++
continue
}
seenHashes[hash] = true
// Phase 1: Match by content hash first
if existing, found := dbByHash[hash]; found {
// Same content exists
if existing.ID == incoming.ID {
// Exact match (same content, same ID) - idempotent case
result.Unchanged++ result.Unchanged++
} else {
// Same content, different ID - rename detected
if !opts.SkipUpdate {
if err := handleRename(ctx, sqliteStore, existing, incoming); err != nil {
return fmt.Errorf("failed to handle rename %s -> %s: %w", existing.ID, incoming.ID, err)
}
result.Updated++
} else {
result.Skipped++
}
}
continue
}
// Phase 2: New content - check for ID collision
if existingWithID, found := dbByID[incoming.ID]; found {
// ID exists but different content - this is a collision
// The collision should have been handled earlier by handleCollisions
// If we reach here, it means collision wasn't resolved - treat as update
if !opts.SkipUpdate {
// Build updates map
updates := make(map[string]interface{})
updates["title"] = incoming.Title
updates["description"] = incoming.Description
updates["status"] = incoming.Status
updates["priority"] = incoming.Priority
updates["issue_type"] = incoming.IssueType
updates["design"] = incoming.Design
updates["acceptance_criteria"] = incoming.AcceptanceCriteria
updates["notes"] = incoming.Notes
if incoming.Assignee != "" {
updates["assignee"] = incoming.Assignee
} else {
updates["assignee"] = nil
}
if incoming.ExternalRef != nil && *incoming.ExternalRef != "" {
updates["external_ref"] = *incoming.ExternalRef
} else {
updates["external_ref"] = nil
}
// Only update if data actually changed
if IssueDataChanged(existingWithID, updates) {
if err := sqliteStore.UpdateIssue(ctx, incoming.ID, updates, "import"); err != nil {
return fmt.Errorf("error updating issue %s: %w", incoming.ID, err)
}
result.Updated++
} else {
result.Unchanged++
}
} else {
result.Skipped++
} }
} else { } else {
// New issue - check for duplicates in import batch // Truly new issue
if idx, seen := seenNew[issue.ID]; seen { newIssues = append(newIssues, incoming)
if opts.Strict {
return fmt.Errorf("duplicate issue ID %s in import (line %d)", issue.ID, idx)
}
result.Skipped++
continue
}
seenNew[issue.ID] = len(newIssues)
newIssues = append(newIssues, issue)
} }
} }

View File

@@ -478,7 +478,7 @@ func RemapCollisions(ctx context.Context, s *SQLiteStorage, collisions []*Collis
func updateReferences(ctx context.Context, s *SQLiteStorage, idMapping map[string]string) error { func updateReferences(ctx context.Context, s *SQLiteStorage, idMapping map[string]string) error {
// Pre-compile all regexes once for the entire operation // Pre-compile all regexes once for the entire operation
// This avoids recompiling the same patterns for each text field // This avoids recompiling the same patterns for each text field
cache, err := buildReplacementCache(idMapping) cache, err := BuildReplacementCache(idMapping)
if err != nil { if err != nil {
return fmt.Errorf("failed to build replacement cache: %w", err) return fmt.Errorf("failed to build replacement cache: %w", err)
} }
@@ -494,25 +494,25 @@ func updateReferences(ctx context.Context, s *SQLiteStorage, idMapping map[strin
updates := make(map[string]interface{}) updates := make(map[string]interface{})
// Update description using cached regexes // Update description using cached regexes
newDesc := replaceIDReferencesWithCache(issue.Description, cache) newDesc := ReplaceIDReferencesWithCache(issue.Description, cache)
if newDesc != issue.Description { if newDesc != issue.Description {
updates["description"] = newDesc updates["description"] = newDesc
} }
// Update design using cached regexes // Update design using cached regexes
newDesign := replaceIDReferencesWithCache(issue.Design, cache) newDesign := ReplaceIDReferencesWithCache(issue.Design, cache)
if newDesign != issue.Design { if newDesign != issue.Design {
updates["design"] = newDesign updates["design"] = newDesign
} }
// Update notes using cached regexes // Update notes using cached regexes
newNotes := replaceIDReferencesWithCache(issue.Notes, cache) newNotes := ReplaceIDReferencesWithCache(issue.Notes, cache)
if newNotes != issue.Notes { if newNotes != issue.Notes {
updates["notes"] = newNotes updates["notes"] = newNotes
} }
// Update acceptance criteria using cached regexes // Update acceptance criteria using cached regexes
newAC := replaceIDReferencesWithCache(issue.AcceptanceCriteria, cache) newAC := ReplaceIDReferencesWithCache(issue.AcceptanceCriteria, cache)
if newAC != issue.AcceptanceCriteria { if newAC != issue.AcceptanceCriteria {
updates["acceptance_criteria"] = newAC updates["acceptance_criteria"] = newAC
} }
@@ -542,9 +542,9 @@ type idReplacementCache struct {
regex *regexp.Regexp regex *regexp.Regexp
} }
// buildReplacementCache pre-compiles all regex patterns for an ID mapping // BuildReplacementCache pre-compiles all regex patterns for an ID mapping
// This cache should be created once per ID mapping and reused for all text replacements // This cache should be created once per ID mapping and reused for all text replacements
func buildReplacementCache(idMapping map[string]string) ([]*idReplacementCache, error) { func BuildReplacementCache(idMapping map[string]string) ([]*idReplacementCache, error) {
cache := make([]*idReplacementCache, 0, len(idMapping)) cache := make([]*idReplacementCache, 0, len(idMapping))
i := 0 i := 0
for oldID, newID := range idMapping { for oldID, newID := range idMapping {
@@ -566,9 +566,9 @@ func buildReplacementCache(idMapping map[string]string) ([]*idReplacementCache,
return cache, nil return cache, nil
} }
// replaceIDReferencesWithCache replaces all occurrences of old IDs with new IDs using a pre-compiled cache // ReplaceIDReferencesWithCache replaces all occurrences of old IDs with new IDs using a pre-compiled cache
// Uses a two-phase approach to avoid replacement conflicts: first replace with placeholders, then replace with new IDs // Uses a two-phase approach to avoid replacement conflicts: first replace with placeholders, then replace with new IDs
func replaceIDReferencesWithCache(text string, cache []*idReplacementCache) string { func ReplaceIDReferencesWithCache(text string, cache []*idReplacementCache) string {
if len(cache) == 0 || text == "" { if len(cache) == 0 || text == "" {
return text return text
} }
@@ -593,16 +593,16 @@ func replaceIDReferencesWithCache(text string, cache []*idReplacementCache) stri
// placeholders, then replace placeholders with new IDs // placeholders, then replace placeholders with new IDs
// //
// Note: This function compiles regexes on every call. For better performance when // Note: This function compiles regexes on every call. For better performance when
// processing multiple text fields with the same ID mapping, use buildReplacementCache() // processing multiple text fields with the same ID mapping, use BuildReplacementCache()
// and replaceIDReferencesWithCache() instead. // and ReplaceIDReferencesWithCache() instead.
func replaceIDReferences(text string, idMapping map[string]string) string { func replaceIDReferences(text string, idMapping map[string]string) string {
// Build cache (compiles regexes) // Build cache (compiles regexes)
cache, err := buildReplacementCache(idMapping) cache, err := BuildReplacementCache(idMapping)
if err != nil { if err != nil {
// Fallback to no replacement if regex compilation fails // Fallback to no replacement if regex compilation fails
return text return text
} }
return replaceIDReferencesWithCache(text, cache) return ReplaceIDReferencesWithCache(text, cache)
} }
// updateDependencyReferences updates dependency records to use new IDs // updateDependencyReferences updates dependency records to use new IDs

View File

@@ -802,14 +802,14 @@ func BenchmarkReplaceIDReferencesWithCache(b *testing.B) {
"Also bd-6, bd-7, bd-8, bd-9, and bd-10 are referenced here." "Also bd-6, bd-7, bd-8, bd-9, and bd-10 are referenced here."
// Pre-compile the cache (this is done once in real usage) // Pre-compile the cache (this is done once in real usage)
cache, err := buildReplacementCache(idMapping) cache, err := BuildReplacementCache(idMapping)
if err != nil { if err != nil {
b.Fatalf("failed to build cache: %v", err) b.Fatalf("failed to build cache: %v", err)
} }
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = replaceIDReferencesWithCache(text, cache) _ = ReplaceIDReferencesWithCache(text, cache)
} }
} }
@@ -838,11 +838,11 @@ func BenchmarkReplaceIDReferencesMultipleTexts(b *testing.B) {
}) })
b.Run("with cache", func(b *testing.B) { b.Run("with cache", func(b *testing.B) {
cache, _ := buildReplacementCache(idMapping) cache, _ := BuildReplacementCache(idMapping)
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
for _, text := range texts { for _, text := range texts {
_ = replaceIDReferencesWithCache(text, cache) _ = ReplaceIDReferencesWithCache(text, cache)
} }
} }
}) })