Implement content-first idempotent import (bd-98)
- Refactored upsertIssues to match by content hash first, then by ID - Added buildHashMap, buildIDMap, and handleRename helper functions - Import now detects and handles renames (same content, different ID) - Importing same data multiple times is idempotent (reports Unchanged) - Exported BuildReplacementCache and ReplaceIDReferencesWithCache for reuse - All 30+ existing import tests pass - Improved convergence for N-way collision scenarios Changes: - internal/importer/importer.go: Content-first matching in upsertIssues - internal/storage/sqlite/collision.go: Exported helper functions - internal/storage/sqlite/collision_test.go: Updated function names Amp-Thread-ID: https://ampcode.com/threads/T-3df96ad8-7c0e-4190-87b5-6d5327718f0a Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
@@ -263,68 +263,179 @@ func handleCollisions(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, is
|
|||||||
return issues, nil
|
return issues, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// upsertIssues creates new issues or updates existing ones
|
// buildHashMap creates a map of content hash → issue for O(1) lookup
|
||||||
func upsertIssues(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, issues []*types.Issue, opts Options, result *Result) error {
|
func buildHashMap(issues []*types.Issue) map[string]*types.Issue {
|
||||||
var newIssues []*types.Issue
|
result := make(map[string]*types.Issue)
|
||||||
seenNew := make(map[string]int)
|
|
||||||
|
|
||||||
for _, issue := range issues {
|
for _, issue := range issues {
|
||||||
// Check if issue exists in DB
|
if issue.ContentHash != "" {
|
||||||
existing, err := sqliteStore.GetIssue(ctx, issue.ID)
|
result[issue.ContentHash] = issue
|
||||||
if err != nil {
|
}
|
||||||
return fmt.Errorf("error checking issue %s: %w", issue.ID, err)
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildIDMap creates a map of ID → issue for O(1) lookup
|
||||||
|
func buildIDMap(issues []*types.Issue) map[string]*types.Issue {
|
||||||
|
result := make(map[string]*types.Issue)
|
||||||
|
for _, issue := range issues {
|
||||||
|
result[issue.ID] = issue
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleRename handles content match with different IDs (rename detected)
|
||||||
|
func handleRename(ctx context.Context, s *sqlite.SQLiteStorage, existing *types.Issue, incoming *types.Issue) error {
|
||||||
|
// Delete old ID
|
||||||
|
if err := s.DeleteIssue(ctx, existing.ID); err != nil {
|
||||||
|
return fmt.Errorf("failed to delete old ID %s: %w", existing.ID, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create with new ID
|
||||||
|
if err := s.CreateIssue(ctx, incoming, "import-rename"); err != nil {
|
||||||
|
return fmt.Errorf("failed to create renamed issue %s: %w", incoming.ID, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update references from old ID to new ID
|
||||||
|
idMapping := map[string]string{existing.ID: incoming.ID}
|
||||||
|
cache, err := sqlite.BuildReplacementCache(idMapping)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to build replacement cache: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all issues to update references
|
||||||
|
dbIssues, err := s.SearchIssues(ctx, "", types.IssueFilter{})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get issues for reference update: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update text field references in all issues
|
||||||
|
for _, issue := range dbIssues {
|
||||||
|
updates := make(map[string]interface{})
|
||||||
|
|
||||||
|
newDesc := sqlite.ReplaceIDReferencesWithCache(issue.Description, cache)
|
||||||
|
if newDesc != issue.Description {
|
||||||
|
updates["description"] = newDesc
|
||||||
}
|
}
|
||||||
|
|
||||||
if existing != nil {
|
newDesign := sqlite.ReplaceIDReferencesWithCache(issue.Design, cache)
|
||||||
// Issue exists - update it unless SkipUpdate is set
|
if newDesign != issue.Design {
|
||||||
if opts.SkipUpdate {
|
updates["design"] = newDesign
|
||||||
result.Skipped++
|
}
|
||||||
continue
|
|
||||||
|
newNotes := sqlite.ReplaceIDReferencesWithCache(issue.Notes, cache)
|
||||||
|
if newNotes != issue.Notes {
|
||||||
|
updates["notes"] = newNotes
|
||||||
|
}
|
||||||
|
|
||||||
|
newAC := sqlite.ReplaceIDReferencesWithCache(issue.AcceptanceCriteria, cache)
|
||||||
|
if newAC != issue.AcceptanceCriteria {
|
||||||
|
updates["acceptance_criteria"] = newAC
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(updates) > 0 {
|
||||||
|
if err := s.UpdateIssue(ctx, issue.ID, updates, "import-rename"); err != nil {
|
||||||
|
return fmt.Errorf("failed to update references in issue %s: %w", issue.ID, err)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Build updates map
|
return nil
|
||||||
updates := make(map[string]interface{})
|
}
|
||||||
updates["title"] = issue.Title
|
|
||||||
updates["description"] = issue.Description
|
|
||||||
updates["status"] = issue.Status
|
|
||||||
updates["priority"] = issue.Priority
|
|
||||||
updates["issue_type"] = issue.IssueType
|
|
||||||
updates["design"] = issue.Design
|
|
||||||
updates["acceptance_criteria"] = issue.AcceptanceCriteria
|
|
||||||
updates["notes"] = issue.Notes
|
|
||||||
|
|
||||||
if issue.Assignee != "" {
|
// upsertIssues creates new issues or updates existing ones using content-first matching
|
||||||
updates["assignee"] = issue.Assignee
|
func upsertIssues(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, issues []*types.Issue, opts Options, result *Result) error {
|
||||||
} else {
|
// Get all DB issues once
|
||||||
updates["assignee"] = nil
|
dbIssues, err := sqliteStore.SearchIssues(ctx, "", types.IssueFilter{})
|
||||||
}
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get DB issues: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
dbByHash := buildHashMap(dbIssues)
|
||||||
|
dbByID := buildIDMap(dbIssues)
|
||||||
|
|
||||||
if issue.ExternalRef != nil && *issue.ExternalRef != "" {
|
// Track what we need to create
|
||||||
updates["external_ref"] = *issue.ExternalRef
|
var newIssues []*types.Issue
|
||||||
} else {
|
seenHashes := make(map[string]bool)
|
||||||
updates["external_ref"] = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only update if data actually changed
|
for _, incoming := range issues {
|
||||||
if IssueDataChanged(existing, updates) {
|
hash := incoming.ContentHash
|
||||||
if err := sqliteStore.UpdateIssue(ctx, issue.ID, updates, "import"); err != nil {
|
if hash == "" {
|
||||||
return fmt.Errorf("error updating issue %s: %w", issue.ID, err)
|
// Shouldn't happen (computed earlier), but be defensive
|
||||||
}
|
hash = incoming.ComputeContentHash()
|
||||||
result.Updated++
|
incoming.ContentHash = hash
|
||||||
} else {
|
}
|
||||||
|
|
||||||
|
// Skip duplicates within incoming batch
|
||||||
|
if seenHashes[hash] {
|
||||||
|
result.Skipped++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seenHashes[hash] = true
|
||||||
|
|
||||||
|
// Phase 1: Match by content hash first
|
||||||
|
if existing, found := dbByHash[hash]; found {
|
||||||
|
// Same content exists
|
||||||
|
if existing.ID == incoming.ID {
|
||||||
|
// Exact match (same content, same ID) - idempotent case
|
||||||
result.Unchanged++
|
result.Unchanged++
|
||||||
|
} else {
|
||||||
|
// Same content, different ID - rename detected
|
||||||
|
if !opts.SkipUpdate {
|
||||||
|
if err := handleRename(ctx, sqliteStore, existing, incoming); err != nil {
|
||||||
|
return fmt.Errorf("failed to handle rename %s -> %s: %w", existing.ID, incoming.ID, err)
|
||||||
|
}
|
||||||
|
result.Updated++
|
||||||
|
} else {
|
||||||
|
result.Skipped++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 2: New content - check for ID collision
|
||||||
|
if existingWithID, found := dbByID[incoming.ID]; found {
|
||||||
|
// ID exists but different content - this is a collision
|
||||||
|
// The collision should have been handled earlier by handleCollisions
|
||||||
|
// If we reach here, it means collision wasn't resolved - treat as update
|
||||||
|
if !opts.SkipUpdate {
|
||||||
|
// Build updates map
|
||||||
|
updates := make(map[string]interface{})
|
||||||
|
updates["title"] = incoming.Title
|
||||||
|
updates["description"] = incoming.Description
|
||||||
|
updates["status"] = incoming.Status
|
||||||
|
updates["priority"] = incoming.Priority
|
||||||
|
updates["issue_type"] = incoming.IssueType
|
||||||
|
updates["design"] = incoming.Design
|
||||||
|
updates["acceptance_criteria"] = incoming.AcceptanceCriteria
|
||||||
|
updates["notes"] = incoming.Notes
|
||||||
|
|
||||||
|
if incoming.Assignee != "" {
|
||||||
|
updates["assignee"] = incoming.Assignee
|
||||||
|
} else {
|
||||||
|
updates["assignee"] = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if incoming.ExternalRef != nil && *incoming.ExternalRef != "" {
|
||||||
|
updates["external_ref"] = *incoming.ExternalRef
|
||||||
|
} else {
|
||||||
|
updates["external_ref"] = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only update if data actually changed
|
||||||
|
if IssueDataChanged(existingWithID, updates) {
|
||||||
|
if err := sqliteStore.UpdateIssue(ctx, incoming.ID, updates, "import"); err != nil {
|
||||||
|
return fmt.Errorf("error updating issue %s: %w", incoming.ID, err)
|
||||||
|
}
|
||||||
|
result.Updated++
|
||||||
|
} else {
|
||||||
|
result.Unchanged++
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
result.Skipped++
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// New issue - check for duplicates in import batch
|
// Truly new issue
|
||||||
if idx, seen := seenNew[issue.ID]; seen {
|
newIssues = append(newIssues, incoming)
|
||||||
if opts.Strict {
|
|
||||||
return fmt.Errorf("duplicate issue ID %s in import (line %d)", issue.ID, idx)
|
|
||||||
}
|
|
||||||
result.Skipped++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
seenNew[issue.ID] = len(newIssues)
|
|
||||||
newIssues = append(newIssues, issue)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -478,7 +478,7 @@ func RemapCollisions(ctx context.Context, s *SQLiteStorage, collisions []*Collis
|
|||||||
func updateReferences(ctx context.Context, s *SQLiteStorage, idMapping map[string]string) error {
|
func updateReferences(ctx context.Context, s *SQLiteStorage, idMapping map[string]string) error {
|
||||||
// Pre-compile all regexes once for the entire operation
|
// Pre-compile all regexes once for the entire operation
|
||||||
// This avoids recompiling the same patterns for each text field
|
// This avoids recompiling the same patterns for each text field
|
||||||
cache, err := buildReplacementCache(idMapping)
|
cache, err := BuildReplacementCache(idMapping)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to build replacement cache: %w", err)
|
return fmt.Errorf("failed to build replacement cache: %w", err)
|
||||||
}
|
}
|
||||||
@@ -494,25 +494,25 @@ func updateReferences(ctx context.Context, s *SQLiteStorage, idMapping map[strin
|
|||||||
updates := make(map[string]interface{})
|
updates := make(map[string]interface{})
|
||||||
|
|
||||||
// Update description using cached regexes
|
// Update description using cached regexes
|
||||||
newDesc := replaceIDReferencesWithCache(issue.Description, cache)
|
newDesc := ReplaceIDReferencesWithCache(issue.Description, cache)
|
||||||
if newDesc != issue.Description {
|
if newDesc != issue.Description {
|
||||||
updates["description"] = newDesc
|
updates["description"] = newDesc
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update design using cached regexes
|
// Update design using cached regexes
|
||||||
newDesign := replaceIDReferencesWithCache(issue.Design, cache)
|
newDesign := ReplaceIDReferencesWithCache(issue.Design, cache)
|
||||||
if newDesign != issue.Design {
|
if newDesign != issue.Design {
|
||||||
updates["design"] = newDesign
|
updates["design"] = newDesign
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update notes using cached regexes
|
// Update notes using cached regexes
|
||||||
newNotes := replaceIDReferencesWithCache(issue.Notes, cache)
|
newNotes := ReplaceIDReferencesWithCache(issue.Notes, cache)
|
||||||
if newNotes != issue.Notes {
|
if newNotes != issue.Notes {
|
||||||
updates["notes"] = newNotes
|
updates["notes"] = newNotes
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update acceptance criteria using cached regexes
|
// Update acceptance criteria using cached regexes
|
||||||
newAC := replaceIDReferencesWithCache(issue.AcceptanceCriteria, cache)
|
newAC := ReplaceIDReferencesWithCache(issue.AcceptanceCriteria, cache)
|
||||||
if newAC != issue.AcceptanceCriteria {
|
if newAC != issue.AcceptanceCriteria {
|
||||||
updates["acceptance_criteria"] = newAC
|
updates["acceptance_criteria"] = newAC
|
||||||
}
|
}
|
||||||
@@ -542,9 +542,9 @@ type idReplacementCache struct {
|
|||||||
regex *regexp.Regexp
|
regex *regexp.Regexp
|
||||||
}
|
}
|
||||||
|
|
||||||
// buildReplacementCache pre-compiles all regex patterns for an ID mapping
|
// BuildReplacementCache pre-compiles all regex patterns for an ID mapping
|
||||||
// This cache should be created once per ID mapping and reused for all text replacements
|
// This cache should be created once per ID mapping and reused for all text replacements
|
||||||
func buildReplacementCache(idMapping map[string]string) ([]*idReplacementCache, error) {
|
func BuildReplacementCache(idMapping map[string]string) ([]*idReplacementCache, error) {
|
||||||
cache := make([]*idReplacementCache, 0, len(idMapping))
|
cache := make([]*idReplacementCache, 0, len(idMapping))
|
||||||
i := 0
|
i := 0
|
||||||
for oldID, newID := range idMapping {
|
for oldID, newID := range idMapping {
|
||||||
@@ -566,9 +566,9 @@ func buildReplacementCache(idMapping map[string]string) ([]*idReplacementCache,
|
|||||||
return cache, nil
|
return cache, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// replaceIDReferencesWithCache replaces all occurrences of old IDs with new IDs using a pre-compiled cache
|
// ReplaceIDReferencesWithCache replaces all occurrences of old IDs with new IDs using a pre-compiled cache
|
||||||
// Uses a two-phase approach to avoid replacement conflicts: first replace with placeholders, then replace with new IDs
|
// Uses a two-phase approach to avoid replacement conflicts: first replace with placeholders, then replace with new IDs
|
||||||
func replaceIDReferencesWithCache(text string, cache []*idReplacementCache) string {
|
func ReplaceIDReferencesWithCache(text string, cache []*idReplacementCache) string {
|
||||||
if len(cache) == 0 || text == "" {
|
if len(cache) == 0 || text == "" {
|
||||||
return text
|
return text
|
||||||
}
|
}
|
||||||
@@ -593,16 +593,16 @@ func replaceIDReferencesWithCache(text string, cache []*idReplacementCache) stri
|
|||||||
// placeholders, then replace placeholders with new IDs
|
// placeholders, then replace placeholders with new IDs
|
||||||
//
|
//
|
||||||
// Note: This function compiles regexes on every call. For better performance when
|
// Note: This function compiles regexes on every call. For better performance when
|
||||||
// processing multiple text fields with the same ID mapping, use buildReplacementCache()
|
// processing multiple text fields with the same ID mapping, use BuildReplacementCache()
|
||||||
// and replaceIDReferencesWithCache() instead.
|
// and ReplaceIDReferencesWithCache() instead.
|
||||||
func replaceIDReferences(text string, idMapping map[string]string) string {
|
func replaceIDReferences(text string, idMapping map[string]string) string {
|
||||||
// Build cache (compiles regexes)
|
// Build cache (compiles regexes)
|
||||||
cache, err := buildReplacementCache(idMapping)
|
cache, err := BuildReplacementCache(idMapping)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Fallback to no replacement if regex compilation fails
|
// Fallback to no replacement if regex compilation fails
|
||||||
return text
|
return text
|
||||||
}
|
}
|
||||||
return replaceIDReferencesWithCache(text, cache)
|
return ReplaceIDReferencesWithCache(text, cache)
|
||||||
}
|
}
|
||||||
|
|
||||||
// updateDependencyReferences updates dependency records to use new IDs
|
// updateDependencyReferences updates dependency records to use new IDs
|
||||||
|
|||||||
@@ -802,14 +802,14 @@ func BenchmarkReplaceIDReferencesWithCache(b *testing.B) {
|
|||||||
"Also bd-6, bd-7, bd-8, bd-9, and bd-10 are referenced here."
|
"Also bd-6, bd-7, bd-8, bd-9, and bd-10 are referenced here."
|
||||||
|
|
||||||
// Pre-compile the cache (this is done once in real usage)
|
// Pre-compile the cache (this is done once in real usage)
|
||||||
cache, err := buildReplacementCache(idMapping)
|
cache, err := BuildReplacementCache(idMapping)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
b.Fatalf("failed to build cache: %v", err)
|
b.Fatalf("failed to build cache: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
_ = replaceIDReferencesWithCache(text, cache)
|
_ = ReplaceIDReferencesWithCache(text, cache)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -838,11 +838,11 @@ func BenchmarkReplaceIDReferencesMultipleTexts(b *testing.B) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
b.Run("with cache", func(b *testing.B) {
|
b.Run("with cache", func(b *testing.B) {
|
||||||
cache, _ := buildReplacementCache(idMapping)
|
cache, _ := BuildReplacementCache(idMapping)
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
for _, text := range texts {
|
for _, text := range texts {
|
||||||
_ = replaceIDReferencesWithCache(text, cache)
|
_ = ReplaceIDReferencesWithCache(text, cache)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
Reference in New Issue
Block a user