package sqlite import ( "context" "crypto/sha256" "fmt" "regexp" "strings" "github.com/steveyegge/beads/internal/types" ) // CollisionResult categorizes incoming issues by their relationship to existing DB state type CollisionResult struct { ExactMatches []string // IDs that match exactly (idempotent import) Collisions []*CollisionDetail // Issues with same ID but different content NewIssues []string // IDs that don't exist in DB yet Renames []*RenameDetail // Issues with same content but different ID (renames) } // RenameDetail captures a rename/remap detected during collision detection type RenameDetail struct { OldID string // ID in database (to be deleted) NewID string // ID in incoming (to be created) Issue *types.Issue // The issue with new ID } // CollisionDetail provides detailed information about a collision type CollisionDetail struct { ID string // The issue ID that collided IncomingIssue *types.Issue // The issue from the import file ExistingIssue *types.Issue // The issue currently in the database ConflictingFields []string // List of field names that differ RemapIncoming bool // If true, remap incoming; if false, remap existing } // DetectCollisions compares incoming JSONL issues against DB state // It distinguishes between: // 1. Exact match (idempotent) - ID and content are identical // 2. ID match but different content (collision) - same ID, different fields // 3. New issue - ID doesn't exist in DB // 4. Rename detected - Different ID but same content (from prior remap) // // Returns a CollisionResult categorizing all incoming issues. func DetectCollisions(ctx context.Context, s *SQLiteStorage, incomingIssues []*types.Issue) (*CollisionResult, error) { result := &CollisionResult{ ExactMatches: make([]string, 0), Collisions: make([]*CollisionDetail, 0), NewIssues: make([]string, 0), } // Phase 1: Deduplicate within incoming batch // Group by content hash to find duplicates with different IDs deduped := deduplicateIncomingIssues(incomingIssues) // Phase 2: Build content hash map of all DB issues // This allows us to detect renames (different ID, same content) dbIssues, err := s.SearchIssues(ctx, "", types.IssueFilter{}) if err != nil { return nil, fmt.Errorf("failed to get all DB issues: %w", err) } contentToDBIssue := make(map[string]*types.Issue) for _, dbIssue := range dbIssues { hash := hashIssueContent(dbIssue) contentToDBIssue[hash] = dbIssue } // Phase 3: Process each incoming issue for _, incoming := range deduped { incomingHash := hashIssueContent(incoming) // Check if issue exists in database by ID existing, err := s.GetIssue(ctx, incoming.ID) if err != nil { return nil, fmt.Errorf("failed to check issue %s: %w", incoming.ID, err) } if existing == nil { // Issue doesn't exist by ID - check for rename by content if dbMatch, found := contentToDBIssue[incomingHash]; found { // Same content, different ID - this is a rename/remap // The incoming ID is the NEW canonical ID, existing DB ID is OLD // Record this as a rename to be handled later (read-only detection) result.Renames = append(result.Renames, &RenameDetail{ OldID: dbMatch.ID, NewID: incoming.ID, Issue: incoming, }) // Don't add to NewIssues - will be handled by ApplyCollisionResolution } else { // Truly new issue result.NewIssues = append(result.NewIssues, incoming.ID) } continue } // Issue exists by ID - compare content conflicts := compareIssues(existing, incoming) if len(conflicts) == 0 { // No differences - exact match (idempotent) result.ExactMatches = append(result.ExactMatches, incoming.ID) } else { // Same ID but different content - collision result.Collisions = append(result.Collisions, &CollisionDetail{ ID: incoming.ID, IncomingIssue: incoming, ExistingIssue: existing, ConflictingFields: conflicts, }) } } return result, nil } // compareIssues compares two issues and returns a list of field names that differ // Timestamps (CreatedAt, UpdatedAt, ClosedAt) are intentionally not compared // Dependencies are also not compared (handled separately in import) func compareIssues(existing, incoming *types.Issue) []string { conflicts := make([]string, 0) // Compare all relevant fields if existing.Title != incoming.Title { conflicts = append(conflicts, "title") } if existing.Description != incoming.Description { conflicts = append(conflicts, "description") } if existing.Design != incoming.Design { conflicts = append(conflicts, "design") } if existing.AcceptanceCriteria != incoming.AcceptanceCriteria { conflicts = append(conflicts, "acceptance_criteria") } if existing.Notes != incoming.Notes { conflicts = append(conflicts, "notes") } if existing.Status != incoming.Status { conflicts = append(conflicts, "status") } if existing.Priority != incoming.Priority { conflicts = append(conflicts, "priority") } if existing.IssueType != incoming.IssueType { conflicts = append(conflicts, "issue_type") } if existing.Assignee != incoming.Assignee { conflicts = append(conflicts, "assignee") } // Compare EstimatedMinutes (handle nil cases) if !equalIntPtr(existing.EstimatedMinutes, incoming.EstimatedMinutes) { conflicts = append(conflicts, "estimated_minutes") } // Compare ExternalRef (handle nil cases) if !equalStringPtr(existing.ExternalRef, incoming.ExternalRef) { conflicts = append(conflicts, "external_ref") } return conflicts } // equalIntPtr compares two *int pointers for equality func equalIntPtr(a, b *int) bool { if a == nil && b == nil { return true } if a == nil || b == nil { return false } return *a == *b } // equalStringPtr compares two *string pointers for equality func equalStringPtr(a, b *string) bool { if a == nil && b == nil { return true } if a == nil || b == nil { return false } return *a == *b } // hashIssueContent creates a deterministic hash of an issue's content. // Uses all substantive fields (excluding timestamps and ID) to ensure // that identical content produces identical hashes across all clones. func hashIssueContent(issue *types.Issue) string { h := sha256.New() // Hash all substantive fields in a stable order h.Write([]byte(issue.Title)) h.Write([]byte{0}) // separator h.Write([]byte(issue.Description)) h.Write([]byte{0}) h.Write([]byte(issue.Design)) h.Write([]byte{0}) h.Write([]byte(issue.AcceptanceCriteria)) h.Write([]byte{0}) h.Write([]byte(issue.Notes)) h.Write([]byte{0}) h.Write([]byte(issue.Status)) h.Write([]byte{0}) h.Write([]byte(fmt.Sprintf("%d", issue.Priority))) h.Write([]byte{0}) h.Write([]byte(issue.IssueType)) h.Write([]byte{0}) h.Write([]byte(issue.Assignee)) h.Write([]byte{0}) if issue.ExternalRef != nil { h.Write([]byte(*issue.ExternalRef)) } return fmt.Sprintf("%x", h.Sum(nil)) } // ApplyCollisionResolution applies the modifications detected during collision detection. // This function handles: // 1. Rename deletions (delete old IDs for renamed issues) // 2. Creating remapped issues (based on mapping) // 3. Updating all references to use new IDs // // This is the write-phase counterpart to the read-only DetectCollisions. func ApplyCollisionResolution(ctx context.Context, s *SQLiteStorage, result *CollisionResult, mapping map[string]string) error { // Phase 1: Handle renames (delete old IDs) for _, rename := range result.Renames { if err := s.DeleteIssue(ctx, rename.OldID); err != nil { return fmt.Errorf("failed to delete renamed issue %s (renamed to %s): %w", rename.OldID, rename.NewID, err) } } // Phase 2: Update references using the mapping if len(mapping) > 0 { if err := updateReferences(ctx, s, mapping); err != nil { return fmt.Errorf("failed to update references: %w", err) } } return nil } // ScoreCollisions determines which version of each colliding issue to keep vs. remap. // Uses deterministic content-based hashing to ensure all clones make the same decision. // // Decision process: // 1. Hash both versions (existing and incoming) based on content // 2. Keep the version with the lexicographically LOWER hash // 3. Mark the other version for remapping // // This ensures: // - Deterministic: same collision always produces same result // - Symmetric: works regardless of which clone syncs first // - Idempotent: converges to same state across all clones func ScoreCollisions(ctx context.Context, s *SQLiteStorage, collisions []*CollisionDetail, allIssues []*types.Issue) error { // Determine which version to keep for each collision for _, collision := range collisions { existingHash := hashIssueContent(collision.ExistingIssue) incomingHash := hashIssueContent(collision.IncomingIssue) // Keep the version with lower hash (deterministic winner) // If incoming has lower hash, we need to remap existing and keep incoming // If existing has lower hash, we need to remap incoming and keep existing collision.RemapIncoming = existingHash < incomingHash } return nil } // deduplicateIncomingIssues removes content-duplicate issues within the incoming batch // Returns deduplicated slice, keeping the first issue ID (lexicographically) for each unique content func deduplicateIncomingIssues(issues []*types.Issue) []*types.Issue { // Group issues by content hash (ignoring ID and timestamps) type contentKey struct { title string description string design string acceptanceCriteria string notes string status string priority int issueType string assignee string } seen := make(map[contentKey]*types.Issue) result := make([]*types.Issue, 0, len(issues)) for _, issue := range issues { key := contentKey{ title: issue.Title, description: issue.Description, design: issue.Design, acceptanceCriteria: issue.AcceptanceCriteria, notes: issue.Notes, status: string(issue.Status), priority: issue.Priority, issueType: string(issue.IssueType), assignee: issue.Assignee, } if existing, found := seen[key]; found { // Duplicate found - keep the one with lexicographically smaller ID if issue.ID < existing.ID { // Replace existing with this one (smaller ID) for i, r := range result { if r.ID == existing.ID { result[i] = issue break } } seen[key] = issue } // Otherwise skip this duplicate } else { // First time seeing this content seen[key] = issue result = append(result, issue) } } return result } // RemapCollisions handles ID remapping for colliding issues based on content hash. // For each collision, either the incoming or existing issue is remapped (determined by ScoreCollisions). // Returns a map of old ID -> new ID for reporting. // // Process: // 1. If RemapIncoming=true: remap incoming issue, keep existing // 2. If RemapIncoming=false: remap existing issue, replace with incoming // // This ensures deterministic, symmetric collision resolution across all clones. // // The function automatically retries up to 3 times on UNIQUE constraint failures, // syncing counters between retries to handle concurrent ID allocation. func RemapCollisions(ctx context.Context, s *SQLiteStorage, collisions []*CollisionDetail, incomingIssues []*types.Issue) (map[string]string, error) { const maxRetries = 3 var lastErr error for attempt := 0; attempt < maxRetries; attempt++ { idMapping, err := remapCollisionsOnce(ctx, s, collisions, incomingIssues) if err == nil { return idMapping, nil } lastErr = err if !isUniqueConstraintError(err) { return nil, err } // REMOVED (bd-c7af): Counter sync on retry - no longer needed with hash IDs } return nil, fmt.Errorf("failed after %d retries due to UNIQUE constraint violations: %w", maxRetries, lastErr) } // remapCollisionsOnce performs a single attempt at collision resolution. // This is the actual implementation that RemapCollisions wraps with retry logic. // REMOVED (bd-8e05): With hash-based IDs, collision remapping is no longer needed. func remapCollisionsOnce(ctx context.Context, s *SQLiteStorage, collisions []*CollisionDetail, _ []*types.Issue) (map[string]string, error) { // With hash-based IDs, collisions should not occur. If they do, it's a bug. if len(collisions) > 0 { return nil, fmt.Errorf("collision remapping no longer supported with hash IDs - %d collisions detected", len(collisions)) } return nil, nil } // OLD IMPLEMENTATION REMOVED (bd-8e05) - retained for reference during migration // The original 250+ line function implemented sequential ID-based collision remapping // which is obsolete with hash-based IDs // Stub out the old implementation to avoid compile errors // The actual 250+ line implementation has been removed (bd-8e05) func _OLD_remapCollisionsOnce_REMOVED(ctx context.Context, s *SQLiteStorage, collisions []*CollisionDetail, _ []*types.Issue) (map[string]string, error) { // Original implementation removed - see git history before bd-8e05 return nil, nil } /* OLD CODE REMOVED (bd-8e05) - kept for git history reference if collision.RemapIncoming { // Incoming has higher hash -> remap incoming, keep existing // Record mapping idMapping[oldID] = newID // Update incoming issue ID collision.IncomingIssue.ID = newID // Create incoming issue with new ID if err := s.CreateIssue(ctx, collision.IncomingIssue, "import-remap"); err != nil { return nil, fmt.Errorf("failed to create remapped incoming issue %s -> %s: %w", oldID, newID, err) } } else { // Existing has higher hash -> remap existing, replace with incoming // Record mapping FIRST before any operations idMapping[oldID] = newID // Create a copy of existing issue with new ID existingCopy := *collision.ExistingIssue existingCopy.ID = newID if err := s.CreateIssue(ctx, &existingCopy, "import-remap"); err != nil { return nil, fmt.Errorf("failed to create remapped existing issue %s -> %s: %w", oldID, newID, err) } // Create incoming issue with original ID (this will REPLACE when we delete old ID) // We do this BEFORE deleting so both issues exist temporarily // Note: This will fail if incoming ID already exists, which is expected in this flow // So we skip this step and do it after deletion // Note: We do NOT copy dependencies here - DeleteIssue will cascade delete them // But we've already recorded the mapping, so updateReferences will fix everything // after all collisions are processed // Delete the existing issue with old ID (this will cascade delete old dependencies) if err := s.DeleteIssue(ctx, oldID); err != nil { return nil, fmt.Errorf("failed to delete old existing issue %s: %w", oldID, err) } // NOW create incoming issue with original ID (replaces the deleted one) if err := s.CreateIssue(ctx, collision.IncomingIssue, "import-replace"); err != nil { return nil, fmt.Errorf("failed to create incoming issue %s: %w", oldID, err) } } } // Step 3: Recreate dependencies with updated IDs // For each dependency that involved a remapped issue, recreate it with new IDs for issueID, deps := range allDepsBeforeRemap { for _, dep := range deps { // Determine new IDs (use mapping if available, otherwise keep original) newIssueID := issueID if mappedID, ok := idMapping[issueID]; ok { newIssueID = mappedID } newDependsOnID := dep.DependsOnID if mappedID, ok := idMapping[dep.DependsOnID]; ok { newDependsOnID = mappedID } // Only recreate if at least one ID was remapped if newIssueID != issueID || newDependsOnID != dep.DependsOnID { // Check if both issues still exist (the source might have been replaced) sourceExists, err := s.GetIssue(ctx, newIssueID) if err != nil || sourceExists == nil { continue // Skip if source was deleted/replaced } targetExists, err := s.GetIssue(ctx, newDependsOnID) if err != nil || targetExists == nil { continue // Skip if target doesn't exist } // Create the dependency with new IDs newDep := &types.Dependency{ IssueID: newIssueID, DependsOnID: newDependsOnID, Type: dep.Type, } if err := s.addDependencyUnchecked(ctx, newDep, "import-remap"); err != nil { // Ignore duplicate dependency errors continue } } } } // Step 4: Update all text field references if err := updateReferences(ctx, s, idMapping); err != nil { return nil, fmt.Errorf("failed to update references: %w", err) } return idMapping, nil } END OF REMOVED CODE */ // updateReferences updates all text field references and dependency records // to point to new IDs based on the idMapping func updateReferences(ctx context.Context, s *SQLiteStorage, idMapping map[string]string) error { // Pre-compile all regexes once for the entire operation // This avoids recompiling the same patterns for each text field cache, err := BuildReplacementCache(idMapping) if err != nil { return fmt.Errorf("failed to build replacement cache: %w", err) } // Update text fields in all issues (both DB and incoming) // We need to update issues in the database dbIssues, err := s.SearchIssues(ctx, "", types.IssueFilter{}) if err != nil { return fmt.Errorf("failed to get all issues from DB: %w", err) } for _, issue := range dbIssues { updates := make(map[string]interface{}) // Update description using cached regexes newDesc := ReplaceIDReferencesWithCache(issue.Description, cache) if newDesc != issue.Description { updates["description"] = newDesc } // Update design using cached regexes newDesign := ReplaceIDReferencesWithCache(issue.Design, cache) if newDesign != issue.Design { updates["design"] = newDesign } // Update notes using cached regexes newNotes := ReplaceIDReferencesWithCache(issue.Notes, cache) if newNotes != issue.Notes { updates["notes"] = newNotes } // Update acceptance criteria using cached regexes newAC := ReplaceIDReferencesWithCache(issue.AcceptanceCriteria, cache) if newAC != issue.AcceptanceCriteria { updates["acceptance_criteria"] = newAC } // If there are updates, apply them if len(updates) > 0 { if err := s.UpdateIssue(ctx, issue.ID, updates, "import-remap"); err != nil { return fmt.Errorf("failed to update references in issue %s: %w", issue.ID, err) } } } // Update dependency records if err := updateDependencyReferences(ctx, s, idMapping); err != nil { return fmt.Errorf("failed to update dependency references: %w", err) } return nil } // idReplacementCache stores pre-compiled regexes for ID replacements // This avoids recompiling the same regex patterns for each text field type idReplacementCache struct { oldID string newID string placeholder string regex *regexp.Regexp } // BuildReplacementCache pre-compiles all regex patterns for an ID mapping // This cache should be created once per ID mapping and reused for all text replacements func BuildReplacementCache(idMapping map[string]string) ([]*idReplacementCache, error) { cache := make([]*idReplacementCache, 0, len(idMapping)) i := 0 for oldID, newID := range idMapping { // Use word boundary regex for exact matching pattern := fmt.Sprintf(`\b%s\b`, regexp.QuoteMeta(oldID)) re, err := regexp.Compile(pattern) if err != nil { return nil, fmt.Errorf("failed to compile regex for %s: %w", oldID, err) } cache = append(cache, &idReplacementCache{ oldID: oldID, newID: newID, placeholder: fmt.Sprintf("\x00REMAP\x00_%d_\x00", i), regex: re, }) i++ } return cache, nil } // ReplaceIDReferencesWithCache replaces all occurrences of old IDs with new IDs using a pre-compiled cache // Uses a two-phase approach to avoid replacement conflicts: first replace with placeholders, then replace with new IDs func ReplaceIDReferencesWithCache(text string, cache []*idReplacementCache) string { if len(cache) == 0 || text == "" { return text } // Phase 1: Replace all old IDs with unique placeholders result := text for _, entry := range cache { result = entry.regex.ReplaceAllString(result, entry.placeholder) } // Phase 2: Replace all placeholders with new IDs for _, entry := range cache { result = strings.ReplaceAll(result, entry.placeholder, entry.newID) } return result } // replaceIDReferences replaces all occurrences of old IDs with new IDs in text // Uses word-boundary regex to ensure exact matches (bd-10 but not bd-100) // Uses a two-phase approach to avoid replacement conflicts: first replace with // placeholders, then replace placeholders with new IDs // // Note: This function compiles regexes on every call. For better performance when // processing multiple text fields with the same ID mapping, use BuildReplacementCache() // and ReplaceIDReferencesWithCache() instead. func replaceIDReferences(text string, idMapping map[string]string) string { // Build cache (compiles regexes) cache, err := BuildReplacementCache(idMapping) if err != nil { // Fallback to no replacement if regex compilation fails return text } return ReplaceIDReferencesWithCache(text, cache) } // updateDependencyReferences updates dependency records to use new IDs // This handles both IssueID and DependsOnID fields // IMPORTANT: Only updates dependencies belonging to REMAPPED issues (with new IDs from idMapping). // Dependencies belonging to existing issues are left untouched. // // NOTE: During normal import flow, this is effectively a no-op because imported dependencies // haven't been added to the database yet when RemapCollisions runs. Dependencies are imported // later in Phase 5 of import_shared.go. However, this function still serves as a safety guard // and handles edge cases where dependencies might exist with the new remapped IDs. func updateDependencyReferences(ctx context.Context, s *SQLiteStorage, idMapping map[string]string) error { // Build set of NEW remapped IDs (idMapping values) // Only dependencies with these IDs as IssueID should be updated newRemappedIDs := make(map[string]bool) for _, newID := range idMapping { newRemappedIDs[newID] = true } // Get all dependency records allDeps, err := s.GetAllDependencyRecords(ctx) if err != nil { return fmt.Errorf("failed to get all dependencies: %w", err) } // Phase 1: Collect all changes to avoid race conditions while iterating type depUpdate struct { oldIssueID string oldDependsOnID string newDep *types.Dependency } var updates []depUpdate for _, deps := range allDeps { for _, dep := range deps { // CRITICAL FIX: Only update dependencies that belong to REMAPPED issues // A dependency belongs to a remapped issue if its IssueID is a NEW remapped ID // (one of the VALUES in idMapping, not the keys) // // We must NOT check against idMapping keys (old IDs) because those are the same // as existing issue IDs in the database, and we'd incorrectly modify their dependencies. if !newRemappedIDs[dep.IssueID] { // This dependency does not belong to a remapped issue - skip it continue } needsUpdate := false newIssueID := dep.IssueID newDependsOnID := dep.DependsOnID // Check if either ID was remapped if mappedID, ok := idMapping[dep.IssueID]; ok { newIssueID = mappedID needsUpdate = true } if mappedID, ok := idMapping[dep.DependsOnID]; ok { newDependsOnID = mappedID needsUpdate = true } if needsUpdate { updates = append(updates, depUpdate{ oldIssueID: dep.IssueID, oldDependsOnID: dep.DependsOnID, newDep: &types.Dependency{ IssueID: newIssueID, DependsOnID: newDependsOnID, Type: dep.Type, }, }) } } } // Phase 2: Apply all collected changes for _, update := range updates { // Remove old dependency - use RemoveDependencyIfExists which doesn't error on missing deps if err := s.removeDependencyIfExists(ctx, update.oldIssueID, update.oldDependsOnID, "import-remap"); err != nil { return fmt.Errorf("failed to remove old dependency %s -> %s: %w", update.oldIssueID, update.oldDependsOnID, err) } // Add new dependency with updated IDs // Use addDependencyUnchecked to skip semantic validation (like parent-child direction) // since we're just remapping existing dependencies that were already validated if err := s.addDependencyUnchecked(ctx, update.newDep, "import-remap"); err != nil { return fmt.Errorf("failed to add updated dependency %s -> %s: %w", update.newDep.IssueID, update.newDep.DependsOnID, err) } } return nil }