Implement content-first idempotent import (bd-98)
- Refactored upsertIssues to match by content hash first, then by ID - Added buildHashMap, buildIDMap, and handleRename helper functions - Import now detects and handles renames (same content, different ID) - Importing same data multiple times is idempotent (reports Unchanged) - Exported BuildReplacementCache and ReplaceIDReferencesWithCache for reuse - All 30+ existing import tests pass - Improved convergence for N-way collision scenarios Changes: - internal/importer/importer.go: Content-first matching in upsertIssues - internal/storage/sqlite/collision.go: Exported helper functions - internal/storage/sqlite/collision_test.go: Updated function names Amp-Thread-ID: https://ampcode.com/threads/T-3df96ad8-7c0e-4190-87b5-6d5327718f0a Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
@@ -263,68 +263,179 @@ func handleCollisions(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, is
|
||||
return issues, nil
|
||||
}
|
||||
|
||||
// upsertIssues creates new issues or updates existing ones
|
||||
func upsertIssues(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, issues []*types.Issue, opts Options, result *Result) error {
|
||||
var newIssues []*types.Issue
|
||||
seenNew := make(map[string]int)
|
||||
|
||||
// buildHashMap creates a map of content hash → issue for O(1) lookup
|
||||
func buildHashMap(issues []*types.Issue) map[string]*types.Issue {
|
||||
result := make(map[string]*types.Issue)
|
||||
for _, issue := range issues {
|
||||
// Check if issue exists in DB
|
||||
existing, err := sqliteStore.GetIssue(ctx, issue.ID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error checking issue %s: %w", issue.ID, err)
|
||||
if issue.ContentHash != "" {
|
||||
result[issue.ContentHash] = issue
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// buildIDMap creates a map of ID → issue for O(1) lookup
|
||||
func buildIDMap(issues []*types.Issue) map[string]*types.Issue {
|
||||
result := make(map[string]*types.Issue)
|
||||
for _, issue := range issues {
|
||||
result[issue.ID] = issue
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// handleRename handles content match with different IDs (rename detected)
|
||||
func handleRename(ctx context.Context, s *sqlite.SQLiteStorage, existing *types.Issue, incoming *types.Issue) error {
|
||||
// Delete old ID
|
||||
if err := s.DeleteIssue(ctx, existing.ID); err != nil {
|
||||
return fmt.Errorf("failed to delete old ID %s: %w", existing.ID, err)
|
||||
}
|
||||
|
||||
// Create with new ID
|
||||
if err := s.CreateIssue(ctx, incoming, "import-rename"); err != nil {
|
||||
return fmt.Errorf("failed to create renamed issue %s: %w", incoming.ID, err)
|
||||
}
|
||||
|
||||
// Update references from old ID to new ID
|
||||
idMapping := map[string]string{existing.ID: incoming.ID}
|
||||
cache, err := sqlite.BuildReplacementCache(idMapping)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to build replacement cache: %w", err)
|
||||
}
|
||||
|
||||
// Get all issues to update references
|
||||
dbIssues, err := s.SearchIssues(ctx, "", types.IssueFilter{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get issues for reference update: %w", err)
|
||||
}
|
||||
|
||||
// Update text field references in all issues
|
||||
for _, issue := range dbIssues {
|
||||
updates := make(map[string]interface{})
|
||||
|
||||
newDesc := sqlite.ReplaceIDReferencesWithCache(issue.Description, cache)
|
||||
if newDesc != issue.Description {
|
||||
updates["description"] = newDesc
|
||||
}
|
||||
|
||||
if existing != nil {
|
||||
// Issue exists - update it unless SkipUpdate is set
|
||||
if opts.SkipUpdate {
|
||||
result.Skipped++
|
||||
continue
|
||||
newDesign := sqlite.ReplaceIDReferencesWithCache(issue.Design, cache)
|
||||
if newDesign != issue.Design {
|
||||
updates["design"] = newDesign
|
||||
}
|
||||
|
||||
newNotes := sqlite.ReplaceIDReferencesWithCache(issue.Notes, cache)
|
||||
if newNotes != issue.Notes {
|
||||
updates["notes"] = newNotes
|
||||
}
|
||||
|
||||
newAC := sqlite.ReplaceIDReferencesWithCache(issue.AcceptanceCriteria, cache)
|
||||
if newAC != issue.AcceptanceCriteria {
|
||||
updates["acceptance_criteria"] = newAC
|
||||
}
|
||||
|
||||
if len(updates) > 0 {
|
||||
if err := s.UpdateIssue(ctx, issue.ID, updates, "import-rename"); err != nil {
|
||||
return fmt.Errorf("failed to update references in issue %s: %w", issue.ID, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build updates map
|
||||
updates := make(map[string]interface{})
|
||||
updates["title"] = issue.Title
|
||||
updates["description"] = issue.Description
|
||||
updates["status"] = issue.Status
|
||||
updates["priority"] = issue.Priority
|
||||
updates["issue_type"] = issue.IssueType
|
||||
updates["design"] = issue.Design
|
||||
updates["acceptance_criteria"] = issue.AcceptanceCriteria
|
||||
updates["notes"] = issue.Notes
|
||||
return nil
|
||||
}
|
||||
|
||||
if issue.Assignee != "" {
|
||||
updates["assignee"] = issue.Assignee
|
||||
} else {
|
||||
updates["assignee"] = nil
|
||||
}
|
||||
// upsertIssues creates new issues or updates existing ones using content-first matching
|
||||
func upsertIssues(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, issues []*types.Issue, opts Options, result *Result) error {
|
||||
// Get all DB issues once
|
||||
dbIssues, err := sqliteStore.SearchIssues(ctx, "", types.IssueFilter{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get DB issues: %w", err)
|
||||
}
|
||||
|
||||
dbByHash := buildHashMap(dbIssues)
|
||||
dbByID := buildIDMap(dbIssues)
|
||||
|
||||
if issue.ExternalRef != nil && *issue.ExternalRef != "" {
|
||||
updates["external_ref"] = *issue.ExternalRef
|
||||
} else {
|
||||
updates["external_ref"] = nil
|
||||
}
|
||||
// Track what we need to create
|
||||
var newIssues []*types.Issue
|
||||
seenHashes := make(map[string]bool)
|
||||
|
||||
// Only update if data actually changed
|
||||
if IssueDataChanged(existing, updates) {
|
||||
if err := sqliteStore.UpdateIssue(ctx, issue.ID, updates, "import"); err != nil {
|
||||
return fmt.Errorf("error updating issue %s: %w", issue.ID, err)
|
||||
}
|
||||
result.Updated++
|
||||
} else {
|
||||
for _, incoming := range issues {
|
||||
hash := incoming.ContentHash
|
||||
if hash == "" {
|
||||
// Shouldn't happen (computed earlier), but be defensive
|
||||
hash = incoming.ComputeContentHash()
|
||||
incoming.ContentHash = hash
|
||||
}
|
||||
|
||||
// Skip duplicates within incoming batch
|
||||
if seenHashes[hash] {
|
||||
result.Skipped++
|
||||
continue
|
||||
}
|
||||
seenHashes[hash] = true
|
||||
|
||||
// Phase 1: Match by content hash first
|
||||
if existing, found := dbByHash[hash]; found {
|
||||
// Same content exists
|
||||
if existing.ID == incoming.ID {
|
||||
// Exact match (same content, same ID) - idempotent case
|
||||
result.Unchanged++
|
||||
} else {
|
||||
// Same content, different ID - rename detected
|
||||
if !opts.SkipUpdate {
|
||||
if err := handleRename(ctx, sqliteStore, existing, incoming); err != nil {
|
||||
return fmt.Errorf("failed to handle rename %s -> %s: %w", existing.ID, incoming.ID, err)
|
||||
}
|
||||
result.Updated++
|
||||
} else {
|
||||
result.Skipped++
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Phase 2: New content - check for ID collision
|
||||
if existingWithID, found := dbByID[incoming.ID]; found {
|
||||
// ID exists but different content - this is a collision
|
||||
// The collision should have been handled earlier by handleCollisions
|
||||
// If we reach here, it means collision wasn't resolved - treat as update
|
||||
if !opts.SkipUpdate {
|
||||
// Build updates map
|
||||
updates := make(map[string]interface{})
|
||||
updates["title"] = incoming.Title
|
||||
updates["description"] = incoming.Description
|
||||
updates["status"] = incoming.Status
|
||||
updates["priority"] = incoming.Priority
|
||||
updates["issue_type"] = incoming.IssueType
|
||||
updates["design"] = incoming.Design
|
||||
updates["acceptance_criteria"] = incoming.AcceptanceCriteria
|
||||
updates["notes"] = incoming.Notes
|
||||
|
||||
if incoming.Assignee != "" {
|
||||
updates["assignee"] = incoming.Assignee
|
||||
} else {
|
||||
updates["assignee"] = nil
|
||||
}
|
||||
|
||||
if incoming.ExternalRef != nil && *incoming.ExternalRef != "" {
|
||||
updates["external_ref"] = *incoming.ExternalRef
|
||||
} else {
|
||||
updates["external_ref"] = nil
|
||||
}
|
||||
|
||||
// Only update if data actually changed
|
||||
if IssueDataChanged(existingWithID, updates) {
|
||||
if err := sqliteStore.UpdateIssue(ctx, incoming.ID, updates, "import"); err != nil {
|
||||
return fmt.Errorf("error updating issue %s: %w", incoming.ID, err)
|
||||
}
|
||||
result.Updated++
|
||||
} else {
|
||||
result.Unchanged++
|
||||
}
|
||||
} else {
|
||||
result.Skipped++
|
||||
}
|
||||
} else {
|
||||
// New issue - check for duplicates in import batch
|
||||
if idx, seen := seenNew[issue.ID]; seen {
|
||||
if opts.Strict {
|
||||
return fmt.Errorf("duplicate issue ID %s in import (line %d)", issue.ID, idx)
|
||||
}
|
||||
result.Skipped++
|
||||
continue
|
||||
}
|
||||
seenNew[issue.ID] = len(newIssues)
|
||||
newIssues = append(newIssues, issue)
|
||||
// Truly new issue
|
||||
newIssues = append(newIssues, incoming)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -478,7 +478,7 @@ func RemapCollisions(ctx context.Context, s *SQLiteStorage, collisions []*Collis
|
||||
func updateReferences(ctx context.Context, s *SQLiteStorage, idMapping map[string]string) error {
|
||||
// Pre-compile all regexes once for the entire operation
|
||||
// This avoids recompiling the same patterns for each text field
|
||||
cache, err := buildReplacementCache(idMapping)
|
||||
cache, err := BuildReplacementCache(idMapping)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to build replacement cache: %w", err)
|
||||
}
|
||||
@@ -494,25 +494,25 @@ func updateReferences(ctx context.Context, s *SQLiteStorage, idMapping map[strin
|
||||
updates := make(map[string]interface{})
|
||||
|
||||
// Update description using cached regexes
|
||||
newDesc := replaceIDReferencesWithCache(issue.Description, cache)
|
||||
newDesc := ReplaceIDReferencesWithCache(issue.Description, cache)
|
||||
if newDesc != issue.Description {
|
||||
updates["description"] = newDesc
|
||||
}
|
||||
|
||||
// Update design using cached regexes
|
||||
newDesign := replaceIDReferencesWithCache(issue.Design, cache)
|
||||
newDesign := ReplaceIDReferencesWithCache(issue.Design, cache)
|
||||
if newDesign != issue.Design {
|
||||
updates["design"] = newDesign
|
||||
}
|
||||
|
||||
// Update notes using cached regexes
|
||||
newNotes := replaceIDReferencesWithCache(issue.Notes, cache)
|
||||
newNotes := ReplaceIDReferencesWithCache(issue.Notes, cache)
|
||||
if newNotes != issue.Notes {
|
||||
updates["notes"] = newNotes
|
||||
}
|
||||
|
||||
// Update acceptance criteria using cached regexes
|
||||
newAC := replaceIDReferencesWithCache(issue.AcceptanceCriteria, cache)
|
||||
newAC := ReplaceIDReferencesWithCache(issue.AcceptanceCriteria, cache)
|
||||
if newAC != issue.AcceptanceCriteria {
|
||||
updates["acceptance_criteria"] = newAC
|
||||
}
|
||||
@@ -542,9 +542,9 @@ type idReplacementCache struct {
|
||||
regex *regexp.Regexp
|
||||
}
|
||||
|
||||
// buildReplacementCache pre-compiles all regex patterns for an ID mapping
|
||||
// BuildReplacementCache pre-compiles all regex patterns for an ID mapping
|
||||
// This cache should be created once per ID mapping and reused for all text replacements
|
||||
func buildReplacementCache(idMapping map[string]string) ([]*idReplacementCache, error) {
|
||||
func BuildReplacementCache(idMapping map[string]string) ([]*idReplacementCache, error) {
|
||||
cache := make([]*idReplacementCache, 0, len(idMapping))
|
||||
i := 0
|
||||
for oldID, newID := range idMapping {
|
||||
@@ -566,9 +566,9 @@ func buildReplacementCache(idMapping map[string]string) ([]*idReplacementCache,
|
||||
return cache, nil
|
||||
}
|
||||
|
||||
// replaceIDReferencesWithCache replaces all occurrences of old IDs with new IDs using a pre-compiled cache
|
||||
// ReplaceIDReferencesWithCache replaces all occurrences of old IDs with new IDs using a pre-compiled cache
|
||||
// Uses a two-phase approach to avoid replacement conflicts: first replace with placeholders, then replace with new IDs
|
||||
func replaceIDReferencesWithCache(text string, cache []*idReplacementCache) string {
|
||||
func ReplaceIDReferencesWithCache(text string, cache []*idReplacementCache) string {
|
||||
if len(cache) == 0 || text == "" {
|
||||
return text
|
||||
}
|
||||
@@ -593,16 +593,16 @@ func replaceIDReferencesWithCache(text string, cache []*idReplacementCache) stri
|
||||
// placeholders, then replace placeholders with new IDs
|
||||
//
|
||||
// Note: This function compiles regexes on every call. For better performance when
|
||||
// processing multiple text fields with the same ID mapping, use buildReplacementCache()
|
||||
// and replaceIDReferencesWithCache() instead.
|
||||
// processing multiple text fields with the same ID mapping, use BuildReplacementCache()
|
||||
// and ReplaceIDReferencesWithCache() instead.
|
||||
func replaceIDReferences(text string, idMapping map[string]string) string {
|
||||
// Build cache (compiles regexes)
|
||||
cache, err := buildReplacementCache(idMapping)
|
||||
cache, err := BuildReplacementCache(idMapping)
|
||||
if err != nil {
|
||||
// Fallback to no replacement if regex compilation fails
|
||||
return text
|
||||
}
|
||||
return replaceIDReferencesWithCache(text, cache)
|
||||
return ReplaceIDReferencesWithCache(text, cache)
|
||||
}
|
||||
|
||||
// updateDependencyReferences updates dependency records to use new IDs
|
||||
|
||||
@@ -802,14 +802,14 @@ func BenchmarkReplaceIDReferencesWithCache(b *testing.B) {
|
||||
"Also bd-6, bd-7, bd-8, bd-9, and bd-10 are referenced here."
|
||||
|
||||
// Pre-compile the cache (this is done once in real usage)
|
||||
cache, err := buildReplacementCache(idMapping)
|
||||
cache, err := BuildReplacementCache(idMapping)
|
||||
if err != nil {
|
||||
b.Fatalf("failed to build cache: %v", err)
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = replaceIDReferencesWithCache(text, cache)
|
||||
_ = ReplaceIDReferencesWithCache(text, cache)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -838,11 +838,11 @@ func BenchmarkReplaceIDReferencesMultipleTexts(b *testing.B) {
|
||||
})
|
||||
|
||||
b.Run("with cache", func(b *testing.B) {
|
||||
cache, _ := buildReplacementCache(idMapping)
|
||||
cache, _ := BuildReplacementCache(idMapping)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, text := range texts {
|
||||
_ = replaceIDReferencesWithCache(text, cache)
|
||||
_ = ReplaceIDReferencesWithCache(text, cache)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user