From ff02615f61c06366da101c0669c463e0a5612584 Mon Sep 17 00:00:00 2001 From: Steve Yegge Date: Tue, 28 Oct 2025 20:40:36 -0700 Subject: [PATCH] Implement content-first idempotent import (bd-98) - Refactored upsertIssues to match by content hash first, then by ID - Added buildHashMap, buildIDMap, and handleRename helper functions - Import now detects and handles renames (same content, different ID) - Importing same data multiple times is idempotent (reports Unchanged) - Exported BuildReplacementCache and ReplaceIDReferencesWithCache for reuse - All 30+ existing import tests pass - Improved convergence for N-way collision scenarios Changes: - internal/importer/importer.go: Content-first matching in upsertIssues - internal/storage/sqlite/collision.go: Exported helper functions - internal/storage/sqlite/collision_test.go: Updated function names Amp-Thread-ID: https://ampcode.com/threads/T-3df96ad8-7c0e-4190-87b5-6d5327718f0a Co-authored-by: Amp --- internal/importer/importer.go | 213 ++++++++++++++++------ internal/storage/sqlite/collision.go | 26 +-- internal/storage/sqlite/collision_test.go | 8 +- 3 files changed, 179 insertions(+), 68 deletions(-) diff --git a/internal/importer/importer.go b/internal/importer/importer.go index 444ededf..55768ba8 100644 --- a/internal/importer/importer.go +++ b/internal/importer/importer.go @@ -263,68 +263,179 @@ func handleCollisions(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, is return issues, nil } -// upsertIssues creates new issues or updates existing ones -func upsertIssues(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, issues []*types.Issue, opts Options, result *Result) error { - var newIssues []*types.Issue - seenNew := make(map[string]int) - +// buildHashMap creates a map of content hash → issue for O(1) lookup +func buildHashMap(issues []*types.Issue) map[string]*types.Issue { + result := make(map[string]*types.Issue) for _, issue := range issues { - // Check if issue exists in DB - existing, err := sqliteStore.GetIssue(ctx, issue.ID) - if err != nil { - return fmt.Errorf("error checking issue %s: %w", issue.ID, err) + if issue.ContentHash != "" { + result[issue.ContentHash] = issue + } + } + return result +} + +// buildIDMap creates a map of ID → issue for O(1) lookup +func buildIDMap(issues []*types.Issue) map[string]*types.Issue { + result := make(map[string]*types.Issue) + for _, issue := range issues { + result[issue.ID] = issue + } + return result +} + +// handleRename handles content match with different IDs (rename detected) +func handleRename(ctx context.Context, s *sqlite.SQLiteStorage, existing *types.Issue, incoming *types.Issue) error { + // Delete old ID + if err := s.DeleteIssue(ctx, existing.ID); err != nil { + return fmt.Errorf("failed to delete old ID %s: %w", existing.ID, err) + } + + // Create with new ID + if err := s.CreateIssue(ctx, incoming, "import-rename"); err != nil { + return fmt.Errorf("failed to create renamed issue %s: %w", incoming.ID, err) + } + + // Update references from old ID to new ID + idMapping := map[string]string{existing.ID: incoming.ID} + cache, err := sqlite.BuildReplacementCache(idMapping) + if err != nil { + return fmt.Errorf("failed to build replacement cache: %w", err) + } + + // Get all issues to update references + dbIssues, err := s.SearchIssues(ctx, "", types.IssueFilter{}) + if err != nil { + return fmt.Errorf("failed to get issues for reference update: %w", err) + } + + // Update text field references in all issues + for _, issue := range dbIssues { + updates := make(map[string]interface{}) + + newDesc := sqlite.ReplaceIDReferencesWithCache(issue.Description, cache) + if newDesc != issue.Description { + updates["description"] = newDesc } - if existing != nil { - // Issue exists - update it unless SkipUpdate is set - if opts.SkipUpdate { - result.Skipped++ - continue + newDesign := sqlite.ReplaceIDReferencesWithCache(issue.Design, cache) + if newDesign != issue.Design { + updates["design"] = newDesign + } + + newNotes := sqlite.ReplaceIDReferencesWithCache(issue.Notes, cache) + if newNotes != issue.Notes { + updates["notes"] = newNotes + } + + newAC := sqlite.ReplaceIDReferencesWithCache(issue.AcceptanceCriteria, cache) + if newAC != issue.AcceptanceCriteria { + updates["acceptance_criteria"] = newAC + } + + if len(updates) > 0 { + if err := s.UpdateIssue(ctx, issue.ID, updates, "import-rename"); err != nil { + return fmt.Errorf("failed to update references in issue %s: %w", issue.ID, err) } + } + } - // Build updates map - updates := make(map[string]interface{}) - updates["title"] = issue.Title - updates["description"] = issue.Description - updates["status"] = issue.Status - updates["priority"] = issue.Priority - updates["issue_type"] = issue.IssueType - updates["design"] = issue.Design - updates["acceptance_criteria"] = issue.AcceptanceCriteria - updates["notes"] = issue.Notes + return nil +} - if issue.Assignee != "" { - updates["assignee"] = issue.Assignee - } else { - updates["assignee"] = nil - } +// upsertIssues creates new issues or updates existing ones using content-first matching +func upsertIssues(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, issues []*types.Issue, opts Options, result *Result) error { + // Get all DB issues once + dbIssues, err := sqliteStore.SearchIssues(ctx, "", types.IssueFilter{}) + if err != nil { + return fmt.Errorf("failed to get DB issues: %w", err) + } + + dbByHash := buildHashMap(dbIssues) + dbByID := buildIDMap(dbIssues) - if issue.ExternalRef != nil && *issue.ExternalRef != "" { - updates["external_ref"] = *issue.ExternalRef - } else { - updates["external_ref"] = nil - } + // Track what we need to create + var newIssues []*types.Issue + seenHashes := make(map[string]bool) - // Only update if data actually changed - if IssueDataChanged(existing, updates) { - if err := sqliteStore.UpdateIssue(ctx, issue.ID, updates, "import"); err != nil { - return fmt.Errorf("error updating issue %s: %w", issue.ID, err) - } - result.Updated++ - } else { + for _, incoming := range issues { + hash := incoming.ContentHash + if hash == "" { + // Shouldn't happen (computed earlier), but be defensive + hash = incoming.ComputeContentHash() + incoming.ContentHash = hash + } + + // Skip duplicates within incoming batch + if seenHashes[hash] { + result.Skipped++ + continue + } + seenHashes[hash] = true + + // Phase 1: Match by content hash first + if existing, found := dbByHash[hash]; found { + // Same content exists + if existing.ID == incoming.ID { + // Exact match (same content, same ID) - idempotent case result.Unchanged++ + } else { + // Same content, different ID - rename detected + if !opts.SkipUpdate { + if err := handleRename(ctx, sqliteStore, existing, incoming); err != nil { + return fmt.Errorf("failed to handle rename %s -> %s: %w", existing.ID, incoming.ID, err) + } + result.Updated++ + } else { + result.Skipped++ + } + } + continue + } + + // Phase 2: New content - check for ID collision + if existingWithID, found := dbByID[incoming.ID]; found { + // ID exists but different content - this is a collision + // The collision should have been handled earlier by handleCollisions + // If we reach here, it means collision wasn't resolved - treat as update + if !opts.SkipUpdate { + // Build updates map + updates := make(map[string]interface{}) + updates["title"] = incoming.Title + updates["description"] = incoming.Description + updates["status"] = incoming.Status + updates["priority"] = incoming.Priority + updates["issue_type"] = incoming.IssueType + updates["design"] = incoming.Design + updates["acceptance_criteria"] = incoming.AcceptanceCriteria + updates["notes"] = incoming.Notes + + if incoming.Assignee != "" { + updates["assignee"] = incoming.Assignee + } else { + updates["assignee"] = nil + } + + if incoming.ExternalRef != nil && *incoming.ExternalRef != "" { + updates["external_ref"] = *incoming.ExternalRef + } else { + updates["external_ref"] = nil + } + + // Only update if data actually changed + if IssueDataChanged(existingWithID, updates) { + if err := sqliteStore.UpdateIssue(ctx, incoming.ID, updates, "import"); err != nil { + return fmt.Errorf("error updating issue %s: %w", incoming.ID, err) + } + result.Updated++ + } else { + result.Unchanged++ + } + } else { + result.Skipped++ } } else { - // New issue - check for duplicates in import batch - if idx, seen := seenNew[issue.ID]; seen { - if opts.Strict { - return fmt.Errorf("duplicate issue ID %s in import (line %d)", issue.ID, idx) - } - result.Skipped++ - continue - } - seenNew[issue.ID] = len(newIssues) - newIssues = append(newIssues, issue) + // Truly new issue + newIssues = append(newIssues, incoming) } } diff --git a/internal/storage/sqlite/collision.go b/internal/storage/sqlite/collision.go index c9c5be06..62858c47 100644 --- a/internal/storage/sqlite/collision.go +++ b/internal/storage/sqlite/collision.go @@ -478,7 +478,7 @@ func RemapCollisions(ctx context.Context, s *SQLiteStorage, collisions []*Collis func updateReferences(ctx context.Context, s *SQLiteStorage, idMapping map[string]string) error { // Pre-compile all regexes once for the entire operation // This avoids recompiling the same patterns for each text field - cache, err := buildReplacementCache(idMapping) + cache, err := BuildReplacementCache(idMapping) if err != nil { return fmt.Errorf("failed to build replacement cache: %w", err) } @@ -494,25 +494,25 @@ func updateReferences(ctx context.Context, s *SQLiteStorage, idMapping map[strin updates := make(map[string]interface{}) // Update description using cached regexes - newDesc := replaceIDReferencesWithCache(issue.Description, cache) + newDesc := ReplaceIDReferencesWithCache(issue.Description, cache) if newDesc != issue.Description { updates["description"] = newDesc } // Update design using cached regexes - newDesign := replaceIDReferencesWithCache(issue.Design, cache) + newDesign := ReplaceIDReferencesWithCache(issue.Design, cache) if newDesign != issue.Design { updates["design"] = newDesign } // Update notes using cached regexes - newNotes := replaceIDReferencesWithCache(issue.Notes, cache) + newNotes := ReplaceIDReferencesWithCache(issue.Notes, cache) if newNotes != issue.Notes { updates["notes"] = newNotes } // Update acceptance criteria using cached regexes - newAC := replaceIDReferencesWithCache(issue.AcceptanceCriteria, cache) + newAC := ReplaceIDReferencesWithCache(issue.AcceptanceCriteria, cache) if newAC != issue.AcceptanceCriteria { updates["acceptance_criteria"] = newAC } @@ -542,9 +542,9 @@ type idReplacementCache struct { regex *regexp.Regexp } -// buildReplacementCache pre-compiles all regex patterns for an ID mapping +// BuildReplacementCache pre-compiles all regex patterns for an ID mapping // This cache should be created once per ID mapping and reused for all text replacements -func buildReplacementCache(idMapping map[string]string) ([]*idReplacementCache, error) { +func BuildReplacementCache(idMapping map[string]string) ([]*idReplacementCache, error) { cache := make([]*idReplacementCache, 0, len(idMapping)) i := 0 for oldID, newID := range idMapping { @@ -566,9 +566,9 @@ func buildReplacementCache(idMapping map[string]string) ([]*idReplacementCache, return cache, nil } -// replaceIDReferencesWithCache replaces all occurrences of old IDs with new IDs using a pre-compiled cache +// ReplaceIDReferencesWithCache replaces all occurrences of old IDs with new IDs using a pre-compiled cache // Uses a two-phase approach to avoid replacement conflicts: first replace with placeholders, then replace with new IDs -func replaceIDReferencesWithCache(text string, cache []*idReplacementCache) string { +func ReplaceIDReferencesWithCache(text string, cache []*idReplacementCache) string { if len(cache) == 0 || text == "" { return text } @@ -593,16 +593,16 @@ func replaceIDReferencesWithCache(text string, cache []*idReplacementCache) stri // placeholders, then replace placeholders with new IDs // // Note: This function compiles regexes on every call. For better performance when -// processing multiple text fields with the same ID mapping, use buildReplacementCache() -// and replaceIDReferencesWithCache() instead. +// processing multiple text fields with the same ID mapping, use BuildReplacementCache() +// and ReplaceIDReferencesWithCache() instead. func replaceIDReferences(text string, idMapping map[string]string) string { // Build cache (compiles regexes) - cache, err := buildReplacementCache(idMapping) + cache, err := BuildReplacementCache(idMapping) if err != nil { // Fallback to no replacement if regex compilation fails return text } - return replaceIDReferencesWithCache(text, cache) + return ReplaceIDReferencesWithCache(text, cache) } // updateDependencyReferences updates dependency records to use new IDs diff --git a/internal/storage/sqlite/collision_test.go b/internal/storage/sqlite/collision_test.go index 1f8378e8..6fa7eef9 100644 --- a/internal/storage/sqlite/collision_test.go +++ b/internal/storage/sqlite/collision_test.go @@ -802,14 +802,14 @@ func BenchmarkReplaceIDReferencesWithCache(b *testing.B) { "Also bd-6, bd-7, bd-8, bd-9, and bd-10 are referenced here." // Pre-compile the cache (this is done once in real usage) - cache, err := buildReplacementCache(idMapping) + cache, err := BuildReplacementCache(idMapping) if err != nil { b.Fatalf("failed to build cache: %v", err) } b.ResetTimer() for i := 0; i < b.N; i++ { - _ = replaceIDReferencesWithCache(text, cache) + _ = ReplaceIDReferencesWithCache(text, cache) } } @@ -838,11 +838,11 @@ func BenchmarkReplaceIDReferencesMultipleTexts(b *testing.B) { }) b.Run("with cache", func(b *testing.B) { - cache, _ := buildReplacementCache(idMapping) + cache, _ := BuildReplacementCache(idMapping) b.ResetTimer() for i := 0; i < b.N; i++ { for _, text := range texts { - _ = replaceIDReferencesWithCache(text, cache) + _ = ReplaceIDReferencesWithCache(text, cache) } } })