Files
beads/internal/storage/sqlite/collision.go
Steve Yegge 5d137ffeeb Remove sequential ID generation and SyncAllCounters (bd-c7af, bd-8e05, bd-4c74)
- Removed SyncAllCounters() and all call sites (already no-op with hash IDs)
- Removed AllocateNextID() and getNextIDForPrefix() - sequential ID generation
- Removed collision remapping logic in internal/storage/sqlite/collision.go
- Removed rename collision handling in internal/importer/importer.go
- Removed branch-merge example (collision resolution no longer needed)
- Updated EXTENDING.md to remove counter sync examples

These were all deprecated code paths for sequential IDs that are obsolete
with hash-based IDs. Hash ID collisions are handled by extending the hash,
not by remapping to new sequential IDs.
2025-10-30 22:24:42 -07:00

705 lines
24 KiB
Go

package sqlite
import (
"context"
"crypto/sha256"
"fmt"
"regexp"
"strings"
"github.com/steveyegge/beads/internal/types"
)
// CollisionResult categorizes incoming issues by their relationship to existing DB state
type CollisionResult struct {
ExactMatches []string // IDs that match exactly (idempotent import)
Collisions []*CollisionDetail // Issues with same ID but different content
NewIssues []string // IDs that don't exist in DB yet
Renames []*RenameDetail // Issues with same content but different ID (renames)
}
// RenameDetail captures a rename/remap detected during collision detection
type RenameDetail struct {
OldID string // ID in database (to be deleted)
NewID string // ID in incoming (to be created)
Issue *types.Issue // The issue with new ID
}
// CollisionDetail provides detailed information about a collision
type CollisionDetail struct {
ID string // The issue ID that collided
IncomingIssue *types.Issue // The issue from the import file
ExistingIssue *types.Issue // The issue currently in the database
ConflictingFields []string // List of field names that differ
RemapIncoming bool // If true, remap incoming; if false, remap existing
}
// DetectCollisions compares incoming JSONL issues against DB state
// It distinguishes between:
// 1. Exact match (idempotent) - ID and content are identical
// 2. ID match but different content (collision) - same ID, different fields
// 3. New issue - ID doesn't exist in DB
// 4. Rename detected - Different ID but same content (from prior remap)
//
// Returns a CollisionResult categorizing all incoming issues.
func DetectCollisions(ctx context.Context, s *SQLiteStorage, incomingIssues []*types.Issue) (*CollisionResult, error) {
result := &CollisionResult{
ExactMatches: make([]string, 0),
Collisions: make([]*CollisionDetail, 0),
NewIssues: make([]string, 0),
}
// Phase 1: Deduplicate within incoming batch
// Group by content hash to find duplicates with different IDs
deduped := deduplicateIncomingIssues(incomingIssues)
// Phase 2: Build content hash map of all DB issues
// This allows us to detect renames (different ID, same content)
dbIssues, err := s.SearchIssues(ctx, "", types.IssueFilter{})
if err != nil {
return nil, fmt.Errorf("failed to get all DB issues: %w", err)
}
contentToDBIssue := make(map[string]*types.Issue)
for _, dbIssue := range dbIssues {
hash := hashIssueContent(dbIssue)
contentToDBIssue[hash] = dbIssue
}
// Phase 3: Process each incoming issue
for _, incoming := range deduped {
incomingHash := hashIssueContent(incoming)
// Check if issue exists in database by ID
existing, err := s.GetIssue(ctx, incoming.ID)
if err != nil {
return nil, fmt.Errorf("failed to check issue %s: %w", incoming.ID, err)
}
if existing == nil {
// Issue doesn't exist by ID - check for rename by content
if dbMatch, found := contentToDBIssue[incomingHash]; found {
// Same content, different ID - this is a rename/remap
// The incoming ID is the NEW canonical ID, existing DB ID is OLD
// Record this as a rename to be handled later (read-only detection)
result.Renames = append(result.Renames, &RenameDetail{
OldID: dbMatch.ID,
NewID: incoming.ID,
Issue: incoming,
})
// Don't add to NewIssues - will be handled by ApplyCollisionResolution
} else {
// Truly new issue
result.NewIssues = append(result.NewIssues, incoming.ID)
}
continue
}
// Issue exists by ID - compare content
conflicts := compareIssues(existing, incoming)
if len(conflicts) == 0 {
// No differences - exact match (idempotent)
result.ExactMatches = append(result.ExactMatches, incoming.ID)
} else {
// Same ID but different content - collision
result.Collisions = append(result.Collisions, &CollisionDetail{
ID: incoming.ID,
IncomingIssue: incoming,
ExistingIssue: existing,
ConflictingFields: conflicts,
})
}
}
return result, nil
}
// compareIssues compares two issues and returns a list of field names that differ
// Timestamps (CreatedAt, UpdatedAt, ClosedAt) are intentionally not compared
// Dependencies are also not compared (handled separately in import)
func compareIssues(existing, incoming *types.Issue) []string {
conflicts := make([]string, 0)
// Compare all relevant fields
if existing.Title != incoming.Title {
conflicts = append(conflicts, "title")
}
if existing.Description != incoming.Description {
conflicts = append(conflicts, "description")
}
if existing.Design != incoming.Design {
conflicts = append(conflicts, "design")
}
if existing.AcceptanceCriteria != incoming.AcceptanceCriteria {
conflicts = append(conflicts, "acceptance_criteria")
}
if existing.Notes != incoming.Notes {
conflicts = append(conflicts, "notes")
}
if existing.Status != incoming.Status {
conflicts = append(conflicts, "status")
}
if existing.Priority != incoming.Priority {
conflicts = append(conflicts, "priority")
}
if existing.IssueType != incoming.IssueType {
conflicts = append(conflicts, "issue_type")
}
if existing.Assignee != incoming.Assignee {
conflicts = append(conflicts, "assignee")
}
// Compare EstimatedMinutes (handle nil cases)
if !equalIntPtr(existing.EstimatedMinutes, incoming.EstimatedMinutes) {
conflicts = append(conflicts, "estimated_minutes")
}
// Compare ExternalRef (handle nil cases)
if !equalStringPtr(existing.ExternalRef, incoming.ExternalRef) {
conflicts = append(conflicts, "external_ref")
}
return conflicts
}
// equalIntPtr compares two *int pointers for equality
func equalIntPtr(a, b *int) bool {
if a == nil && b == nil {
return true
}
if a == nil || b == nil {
return false
}
return *a == *b
}
// equalStringPtr compares two *string pointers for equality
func equalStringPtr(a, b *string) bool {
if a == nil && b == nil {
return true
}
if a == nil || b == nil {
return false
}
return *a == *b
}
// hashIssueContent creates a deterministic hash of an issue's content.
// Uses all substantive fields (excluding timestamps and ID) to ensure
// that identical content produces identical hashes across all clones.
func hashIssueContent(issue *types.Issue) string {
h := sha256.New()
// Hash all substantive fields in a stable order
h.Write([]byte(issue.Title))
h.Write([]byte{0}) // separator
h.Write([]byte(issue.Description))
h.Write([]byte{0})
h.Write([]byte(issue.Design))
h.Write([]byte{0})
h.Write([]byte(issue.AcceptanceCriteria))
h.Write([]byte{0})
h.Write([]byte(issue.Notes))
h.Write([]byte{0})
h.Write([]byte(issue.Status))
h.Write([]byte{0})
h.Write([]byte(fmt.Sprintf("%d", issue.Priority)))
h.Write([]byte{0})
h.Write([]byte(issue.IssueType))
h.Write([]byte{0})
h.Write([]byte(issue.Assignee))
h.Write([]byte{0})
if issue.ExternalRef != nil {
h.Write([]byte(*issue.ExternalRef))
}
return fmt.Sprintf("%x", h.Sum(nil))
}
// ApplyCollisionResolution applies the modifications detected during collision detection.
// This function handles:
// 1. Rename deletions (delete old IDs for renamed issues)
// 2. Creating remapped issues (based on mapping)
// 3. Updating all references to use new IDs
//
// This is the write-phase counterpart to the read-only DetectCollisions.
func ApplyCollisionResolution(ctx context.Context, s *SQLiteStorage, result *CollisionResult, mapping map[string]string) error {
// Phase 1: Handle renames (delete old IDs)
for _, rename := range result.Renames {
if err := s.DeleteIssue(ctx, rename.OldID); err != nil {
return fmt.Errorf("failed to delete renamed issue %s (renamed to %s): %w",
rename.OldID, rename.NewID, err)
}
}
// Phase 2: Update references using the mapping
if len(mapping) > 0 {
if err := updateReferences(ctx, s, mapping); err != nil {
return fmt.Errorf("failed to update references: %w", err)
}
}
return nil
}
// ScoreCollisions determines which version of each colliding issue to keep vs. remap.
// Uses deterministic content-based hashing to ensure all clones make the same decision.
//
// Decision process:
// 1. Hash both versions (existing and incoming) based on content
// 2. Keep the version with the lexicographically LOWER hash
// 3. Mark the other version for remapping
//
// This ensures:
// - Deterministic: same collision always produces same result
// - Symmetric: works regardless of which clone syncs first
// - Idempotent: converges to same state across all clones
func ScoreCollisions(ctx context.Context, s *SQLiteStorage, collisions []*CollisionDetail, allIssues []*types.Issue) error {
// Determine which version to keep for each collision
for _, collision := range collisions {
existingHash := hashIssueContent(collision.ExistingIssue)
incomingHash := hashIssueContent(collision.IncomingIssue)
// Keep the version with lower hash (deterministic winner)
// If incoming has lower hash, we need to remap existing and keep incoming
// If existing has lower hash, we need to remap incoming and keep existing
collision.RemapIncoming = existingHash < incomingHash
}
return nil
}
// deduplicateIncomingIssues removes content-duplicate issues within the incoming batch
// Returns deduplicated slice, keeping the first issue ID (lexicographically) for each unique content
func deduplicateIncomingIssues(issues []*types.Issue) []*types.Issue {
// Group issues by content hash (ignoring ID and timestamps)
type contentKey struct {
title string
description string
design string
acceptanceCriteria string
notes string
status string
priority int
issueType string
assignee string
}
seen := make(map[contentKey]*types.Issue)
result := make([]*types.Issue, 0, len(issues))
for _, issue := range issues {
key := contentKey{
title: issue.Title,
description: issue.Description,
design: issue.Design,
acceptanceCriteria: issue.AcceptanceCriteria,
notes: issue.Notes,
status: string(issue.Status),
priority: issue.Priority,
issueType: string(issue.IssueType),
assignee: issue.Assignee,
}
if existing, found := seen[key]; found {
// Duplicate found - keep the one with lexicographically smaller ID
if issue.ID < existing.ID {
// Replace existing with this one (smaller ID)
for i, r := range result {
if r.ID == existing.ID {
result[i] = issue
break
}
}
seen[key] = issue
}
// Otherwise skip this duplicate
} else {
// First time seeing this content
seen[key] = issue
result = append(result, issue)
}
}
return result
}
// RemapCollisions handles ID remapping for colliding issues based on content hash.
// For each collision, either the incoming or existing issue is remapped (determined by ScoreCollisions).
// Returns a map of old ID -> new ID for reporting.
//
// Process:
// 1. If RemapIncoming=true: remap incoming issue, keep existing
// 2. If RemapIncoming=false: remap existing issue, replace with incoming
//
// This ensures deterministic, symmetric collision resolution across all clones.
//
// The function automatically retries up to 3 times on UNIQUE constraint failures,
// syncing counters between retries to handle concurrent ID allocation.
func RemapCollisions(ctx context.Context, s *SQLiteStorage, collisions []*CollisionDetail, incomingIssues []*types.Issue) (map[string]string, error) {
const maxRetries = 3
var lastErr error
for attempt := 0; attempt < maxRetries; attempt++ {
idMapping, err := remapCollisionsOnce(ctx, s, collisions, incomingIssues)
if err == nil {
return idMapping, nil
}
lastErr = err
if !isUniqueConstraintError(err) {
return nil, err
}
// REMOVED (bd-c7af): Counter sync on retry - no longer needed with hash IDs
}
return nil, fmt.Errorf("failed after %d retries due to UNIQUE constraint violations: %w", maxRetries, lastErr)
}
// remapCollisionsOnce performs a single attempt at collision resolution.
// This is the actual implementation that RemapCollisions wraps with retry logic.
// REMOVED (bd-8e05): With hash-based IDs, collision remapping is no longer needed.
func remapCollisionsOnce(ctx context.Context, s *SQLiteStorage, collisions []*CollisionDetail, _ []*types.Issue) (map[string]string, error) {
// With hash-based IDs, collisions should not occur. If they do, it's a bug.
if len(collisions) > 0 {
return nil, fmt.Errorf("collision remapping no longer supported with hash IDs - %d collisions detected", len(collisions))
}
return nil, nil
}
// OLD IMPLEMENTATION REMOVED (bd-8e05) - retained for reference during migration
// The original 250+ line function implemented sequential ID-based collision remapping
// which is obsolete with hash-based IDs
// Stub out the old implementation to avoid compile errors
// The actual 250+ line implementation has been removed (bd-8e05)
func _OLD_remapCollisionsOnce_REMOVED(ctx context.Context, s *SQLiteStorage, collisions []*CollisionDetail, _ []*types.Issue) (map[string]string, error) {
// Original implementation removed - see git history before bd-8e05
return nil, nil
}
/* OLD CODE REMOVED (bd-8e05) - kept for git history reference
if collision.RemapIncoming {
// Incoming has higher hash -> remap incoming, keep existing
// Record mapping
idMapping[oldID] = newID
// Update incoming issue ID
collision.IncomingIssue.ID = newID
// Create incoming issue with new ID
if err := s.CreateIssue(ctx, collision.IncomingIssue, "import-remap"); err != nil {
return nil, fmt.Errorf("failed to create remapped incoming issue %s -> %s: %w", oldID, newID, err)
}
} else {
// Existing has higher hash -> remap existing, replace with incoming
// Record mapping FIRST before any operations
idMapping[oldID] = newID
// Create a copy of existing issue with new ID
existingCopy := *collision.ExistingIssue
existingCopy.ID = newID
if err := s.CreateIssue(ctx, &existingCopy, "import-remap"); err != nil {
return nil, fmt.Errorf("failed to create remapped existing issue %s -> %s: %w", oldID, newID, err)
}
// Create incoming issue with original ID (this will REPLACE when we delete old ID)
// We do this BEFORE deleting so both issues exist temporarily
// Note: This will fail if incoming ID already exists, which is expected in this flow
// So we skip this step and do it after deletion
// Note: We do NOT copy dependencies here - DeleteIssue will cascade delete them
// But we've already recorded the mapping, so updateReferences will fix everything
// after all collisions are processed
// Delete the existing issue with old ID (this will cascade delete old dependencies)
if err := s.DeleteIssue(ctx, oldID); err != nil {
return nil, fmt.Errorf("failed to delete old existing issue %s: %w", oldID, err)
}
// NOW create incoming issue with original ID (replaces the deleted one)
if err := s.CreateIssue(ctx, collision.IncomingIssue, "import-replace"); err != nil {
return nil, fmt.Errorf("failed to create incoming issue %s: %w", oldID, err)
}
}
}
// Step 3: Recreate dependencies with updated IDs
// For each dependency that involved a remapped issue, recreate it with new IDs
for issueID, deps := range allDepsBeforeRemap {
for _, dep := range deps {
// Determine new IDs (use mapping if available, otherwise keep original)
newIssueID := issueID
if mappedID, ok := idMapping[issueID]; ok {
newIssueID = mappedID
}
newDependsOnID := dep.DependsOnID
if mappedID, ok := idMapping[dep.DependsOnID]; ok {
newDependsOnID = mappedID
}
// Only recreate if at least one ID was remapped
if newIssueID != issueID || newDependsOnID != dep.DependsOnID {
// Check if both issues still exist (the source might have been replaced)
sourceExists, err := s.GetIssue(ctx, newIssueID)
if err != nil || sourceExists == nil {
continue // Skip if source was deleted/replaced
}
targetExists, err := s.GetIssue(ctx, newDependsOnID)
if err != nil || targetExists == nil {
continue // Skip if target doesn't exist
}
// Create the dependency with new IDs
newDep := &types.Dependency{
IssueID: newIssueID,
DependsOnID: newDependsOnID,
Type: dep.Type,
}
if err := s.addDependencyUnchecked(ctx, newDep, "import-remap"); err != nil {
// Ignore duplicate dependency errors
continue
}
}
}
}
// Step 4: Update all text field references
if err := updateReferences(ctx, s, idMapping); err != nil {
return nil, fmt.Errorf("failed to update references: %w", err)
}
return idMapping, nil
}
END OF REMOVED CODE */
// updateReferences updates all text field references and dependency records
// to point to new IDs based on the idMapping
func updateReferences(ctx context.Context, s *SQLiteStorage, idMapping map[string]string) error {
// Pre-compile all regexes once for the entire operation
// This avoids recompiling the same patterns for each text field
cache, err := BuildReplacementCache(idMapping)
if err != nil {
return fmt.Errorf("failed to build replacement cache: %w", err)
}
// Update text fields in all issues (both DB and incoming)
// We need to update issues in the database
dbIssues, err := s.SearchIssues(ctx, "", types.IssueFilter{})
if err != nil {
return fmt.Errorf("failed to get all issues from DB: %w", err)
}
for _, issue := range dbIssues {
updates := make(map[string]interface{})
// Update description using cached regexes
newDesc := ReplaceIDReferencesWithCache(issue.Description, cache)
if newDesc != issue.Description {
updates["description"] = newDesc
}
// Update design using cached regexes
newDesign := ReplaceIDReferencesWithCache(issue.Design, cache)
if newDesign != issue.Design {
updates["design"] = newDesign
}
// Update notes using cached regexes
newNotes := ReplaceIDReferencesWithCache(issue.Notes, cache)
if newNotes != issue.Notes {
updates["notes"] = newNotes
}
// Update acceptance criteria using cached regexes
newAC := ReplaceIDReferencesWithCache(issue.AcceptanceCriteria, cache)
if newAC != issue.AcceptanceCriteria {
updates["acceptance_criteria"] = newAC
}
// If there are updates, apply them
if len(updates) > 0 {
if err := s.UpdateIssue(ctx, issue.ID, updates, "import-remap"); err != nil {
return fmt.Errorf("failed to update references in issue %s: %w", issue.ID, err)
}
}
}
// Update dependency records
if err := updateDependencyReferences(ctx, s, idMapping); err != nil {
return fmt.Errorf("failed to update dependency references: %w", err)
}
return nil
}
// idReplacementCache stores pre-compiled regexes for ID replacements
// This avoids recompiling the same regex patterns for each text field
type idReplacementCache struct {
oldID string
newID string
placeholder string
regex *regexp.Regexp
}
// BuildReplacementCache pre-compiles all regex patterns for an ID mapping
// This cache should be created once per ID mapping and reused for all text replacements
func BuildReplacementCache(idMapping map[string]string) ([]*idReplacementCache, error) {
cache := make([]*idReplacementCache, 0, len(idMapping))
i := 0
for oldID, newID := range idMapping {
// Use word boundary regex for exact matching
pattern := fmt.Sprintf(`\b%s\b`, regexp.QuoteMeta(oldID))
re, err := regexp.Compile(pattern)
if err != nil {
return nil, fmt.Errorf("failed to compile regex for %s: %w", oldID, err)
}
cache = append(cache, &idReplacementCache{
oldID: oldID,
newID: newID,
placeholder: fmt.Sprintf("\x00REMAP\x00_%d_\x00", i),
regex: re,
})
i++
}
return cache, nil
}
// ReplaceIDReferencesWithCache replaces all occurrences of old IDs with new IDs using a pre-compiled cache
// Uses a two-phase approach to avoid replacement conflicts: first replace with placeholders, then replace with new IDs
func ReplaceIDReferencesWithCache(text string, cache []*idReplacementCache) string {
if len(cache) == 0 || text == "" {
return text
}
// Phase 1: Replace all old IDs with unique placeholders
result := text
for _, entry := range cache {
result = entry.regex.ReplaceAllString(result, entry.placeholder)
}
// Phase 2: Replace all placeholders with new IDs
for _, entry := range cache {
result = strings.ReplaceAll(result, entry.placeholder, entry.newID)
}
return result
}
// replaceIDReferences replaces all occurrences of old IDs with new IDs in text
// Uses word-boundary regex to ensure exact matches (bd-10 but not bd-100)
// Uses a two-phase approach to avoid replacement conflicts: first replace with
// placeholders, then replace placeholders with new IDs
//
// Note: This function compiles regexes on every call. For better performance when
// processing multiple text fields with the same ID mapping, use BuildReplacementCache()
// and ReplaceIDReferencesWithCache() instead.
func replaceIDReferences(text string, idMapping map[string]string) string {
// Build cache (compiles regexes)
cache, err := BuildReplacementCache(idMapping)
if err != nil {
// Fallback to no replacement if regex compilation fails
return text
}
return ReplaceIDReferencesWithCache(text, cache)
}
// updateDependencyReferences updates dependency records to use new IDs
// This handles both IssueID and DependsOnID fields
// IMPORTANT: Only updates dependencies belonging to REMAPPED issues (with new IDs from idMapping).
// Dependencies belonging to existing issues are left untouched.
//
// NOTE: During normal import flow, this is effectively a no-op because imported dependencies
// haven't been added to the database yet when RemapCollisions runs. Dependencies are imported
// later in Phase 5 of import_shared.go. However, this function still serves as a safety guard
// and handles edge cases where dependencies might exist with the new remapped IDs.
func updateDependencyReferences(ctx context.Context, s *SQLiteStorage, idMapping map[string]string) error {
// Build set of NEW remapped IDs (idMapping values)
// Only dependencies with these IDs as IssueID should be updated
newRemappedIDs := make(map[string]bool)
for _, newID := range idMapping {
newRemappedIDs[newID] = true
}
// Get all dependency records
allDeps, err := s.GetAllDependencyRecords(ctx)
if err != nil {
return fmt.Errorf("failed to get all dependencies: %w", err)
}
// Phase 1: Collect all changes to avoid race conditions while iterating
type depUpdate struct {
oldIssueID string
oldDependsOnID string
newDep *types.Dependency
}
var updates []depUpdate
for _, deps := range allDeps {
for _, dep := range deps {
// CRITICAL FIX: Only update dependencies that belong to REMAPPED issues
// A dependency belongs to a remapped issue if its IssueID is a NEW remapped ID
// (one of the VALUES in idMapping, not the keys)
//
// We must NOT check against idMapping keys (old IDs) because those are the same
// as existing issue IDs in the database, and we'd incorrectly modify their dependencies.
if !newRemappedIDs[dep.IssueID] {
// This dependency does not belong to a remapped issue - skip it
continue
}
needsUpdate := false
newIssueID := dep.IssueID
newDependsOnID := dep.DependsOnID
// Check if either ID was remapped
if mappedID, ok := idMapping[dep.IssueID]; ok {
newIssueID = mappedID
needsUpdate = true
}
if mappedID, ok := idMapping[dep.DependsOnID]; ok {
newDependsOnID = mappedID
needsUpdate = true
}
if needsUpdate {
updates = append(updates, depUpdate{
oldIssueID: dep.IssueID,
oldDependsOnID: dep.DependsOnID,
newDep: &types.Dependency{
IssueID: newIssueID,
DependsOnID: newDependsOnID,
Type: dep.Type,
},
})
}
}
}
// Phase 2: Apply all collected changes
for _, update := range updates {
// Remove old dependency - use RemoveDependencyIfExists which doesn't error on missing deps
if err := s.removeDependencyIfExists(ctx, update.oldIssueID, update.oldDependsOnID, "import-remap"); err != nil {
return fmt.Errorf("failed to remove old dependency %s -> %s: %w",
update.oldIssueID, update.oldDependsOnID, err)
}
// Add new dependency with updated IDs
// Use addDependencyUnchecked to skip semantic validation (like parent-child direction)
// since we're just remapping existing dependencies that were already validated
if err := s.addDependencyUnchecked(ctx, update.newDep, "import-remap"); err != nil {
return fmt.Errorf("failed to add updated dependency %s -> %s: %w",
update.newDep.IssueID, update.newDep.DependsOnID, err)
}
}
return nil
}