WIP: Implement content-hash based collision resolution (bd-89)

- Replace timestamp-based collision scoring with deterministic content hashing
- Add hashIssueContent() using SHA-256 of all substantive fields
- Modify ScoreCollisions to compare hashes and set RemapIncoming flag
- Update RemapCollisions to handle both directions (remap incoming OR existing)
- Add CollisionDetail.RemapIncoming field to control which version gets remapped
- Add unit tests for hash function and deterministic collision resolution

Status: Hash-based resolution works correctly, but TestTwoCloneCollision still
fails due to missing rename detection. After Clone B resolves collision,
Clone A needs to recognize its issue was remapped to a different ID.

Next: Add content-based rename detection during import to prevent
re-resolving already-resolved collisions.

Progress on bd-86.

Amp-Thread-ID: https://ampcode.com/threads/T-b19b49e8-b52a-463d-b052-8a526a500260
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-28 17:11:40 -07:00
parent c5e7ad8d71
commit 2e87329cf8
3 changed files with 192 additions and 37 deletions

View File

@@ -2,9 +2,9 @@ package sqlite
import (
"context"
"crypto/sha256"
"fmt"
"regexp"
"sort"
"strings"
"github.com/steveyegge/beads/internal/types"
@@ -23,7 +23,8 @@ type CollisionDetail struct {
IncomingIssue *types.Issue // The issue from the import file
ExistingIssue *types.Issue // The issue currently in the database
ConflictingFields []string // List of field names that differ
ReferenceScore int // Number of references to this issue (for scoring)
ReferenceScore int // Number of references to this issue (for scoring) - DEPRECATED
RemapIncoming bool // If true, remap incoming; if false, remap existing
}
// DetectCollisions compares incoming JSONL issues against DB state
@@ -146,32 +147,63 @@ func equalStringPtr(a, b *string) bool {
return *a == *b
}
// ScoreCollisions calculates reference scores for all colliding issues and sorts them
// by score ascending (fewest references first). This minimizes the total number of
// updates needed during renumbering - issues with fewer references are renumbered first.
// hashIssueContent creates a deterministic hash of an issue's content.
// Uses all substantive fields (excluding timestamps and ID) to ensure
// that identical content produces identical hashes across all clones.
func hashIssueContent(issue *types.Issue) string {
h := sha256.New()
// Hash all substantive fields in a stable order
h.Write([]byte(issue.Title))
h.Write([]byte{0}) // separator
h.Write([]byte(issue.Description))
h.Write([]byte{0})
h.Write([]byte(issue.Design))
h.Write([]byte{0})
h.Write([]byte(issue.AcceptanceCriteria))
h.Write([]byte{0})
h.Write([]byte(issue.Notes))
h.Write([]byte{0})
h.Write([]byte(issue.Status))
h.Write([]byte{0})
h.Write([]byte(fmt.Sprintf("%d", issue.Priority)))
h.Write([]byte{0})
h.Write([]byte(issue.IssueType))
h.Write([]byte{0})
h.Write([]byte(issue.Assignee))
h.Write([]byte{0})
if issue.ExternalRef != nil {
h.Write([]byte(*issue.ExternalRef))
}
return fmt.Sprintf("%x", h.Sum(nil))
}
// ScoreCollisions determines which version of each colliding issue to keep vs. remap.
// Uses deterministic content-based hashing to ensure all clones make the same decision.
//
// Reference score = text mentions + dependency references
// Decision process:
// 1. Hash both versions (existing and incoming) based on content
// 2. Keep the version with the lexicographically LOWER hash
// 3. Mark the other version for remapping
//
// This ensures:
// - Deterministic: same collision always produces same result
// - Symmetric: works regardless of which clone syncs first
// - Idempotent: converges to same state across all clones
func ScoreCollisions(ctx context.Context, s *SQLiteStorage, collisions []*CollisionDetail, allIssues []*types.Issue) error {
// Get all dependency records for efficient lookup
allDeps, err := s.GetAllDependencyRecords(ctx)
if err != nil {
return fmt.Errorf("failed to get dependency records: %w", err)
}
// Calculate reference score for each collision
// Determine which version to keep for each collision
for _, collision := range collisions {
score, err := countReferences(collision.ID, allIssues, allDeps)
if err != nil {
return fmt.Errorf("failed to count references for %s: %w", collision.ID, err)
}
collision.ReferenceScore = score
existingHash := hashIssueContent(collision.ExistingIssue)
incomingHash := hashIssueContent(collision.IncomingIssue)
// Keep the version with lower hash (deterministic winner)
// If incoming has lower hash, we need to remap existing and keep incoming
// If existing has lower hash, we need to remap incoming and keep existing
collision.RemapIncoming = existingHash < incomingHash
}
// Sort collisions by reference score ascending (fewest first)
sort.Slice(collisions, func(i, j int) bool {
return collisions[i].ReferenceScore < collisions[j].ReferenceScore
})
return nil
}
@@ -286,9 +318,15 @@ func deduplicateIncomingIssues(issues []*types.Issue) []*types.Issue {
return result
}
// RemapCollisions handles ID remapping for colliding issues
// Takes sorted collisions (fewest references first) and remaps them to new IDs
// Returns a map of old ID -> new ID for reporting
// RemapCollisions handles ID remapping for colliding issues based on content hash.
// For each collision, either the incoming or existing issue is remapped (determined by ScoreCollisions).
// Returns a map of old ID -> new ID for reporting.
//
// Process:
// 1. If RemapIncoming=true: remap incoming issue, keep existing
// 2. If RemapIncoming=false: remap existing issue, replace with incoming
//
// This ensures deterministic, symmetric collision resolution across all clones.
//
// NOTE: This function is not atomic - it performs multiple separate database operations.
// If an error occurs partway through, some issues may be created without their references
@@ -302,7 +340,7 @@ func RemapCollisions(ctx context.Context, s *SQLiteStorage, collisions []*Collis
return nil, fmt.Errorf("failed to sync ID counters: %w", err)
}
// For each collision (in order of ascending reference score)
// Process each collision based on which version should be remapped
for _, collision := range collisions {
oldID := collision.ID
@@ -317,16 +355,40 @@ func RemapCollisions(ctx context.Context, s *SQLiteStorage, collisions []*Collis
}
newID := fmt.Sprintf("%s-%d", prefix, nextID)
// Record mapping
idMapping[oldID] = newID
// Update the issue ID in the incoming issue
collision.IncomingIssue.ID = newID
// Create the issue with new ID
// Note: CreateIssue will use the ID we set
if err := s.CreateIssue(ctx, collision.IncomingIssue, "import-remap"); err != nil {
return nil, fmt.Errorf("failed to create remapped issue %s -> %s: %w", oldID, newID, err)
if collision.RemapIncoming {
// Incoming has higher hash -> remap incoming, keep existing
// Record mapping
idMapping[oldID] = newID
// Update incoming issue ID
collision.IncomingIssue.ID = newID
// Create incoming issue with new ID
if err := s.CreateIssue(ctx, collision.IncomingIssue, "import-remap"); err != nil {
return nil, fmt.Errorf("failed to create remapped incoming issue %s -> %s: %w", oldID, newID, err)
}
} else {
// Existing has higher hash -> remap existing, replace with incoming
// First, remap the existing issue to new ID
idMapping[oldID] = newID
// Create a copy of existing issue with new ID
existingCopy := *collision.ExistingIssue
existingCopy.ID = newID
if err := s.CreateIssue(ctx, &existingCopy, "import-remap"); err != nil {
return nil, fmt.Errorf("failed to create remapped existing issue %s -> %s: %w", oldID, newID, err)
}
// Delete the existing issue with old ID
if err := s.DeleteIssue(ctx, oldID); err != nil {
return nil, fmt.Errorf("failed to delete old existing issue %s: %w", oldID, err)
}
// Create incoming issue with original ID (replaces the deleted one)
if err := s.CreateIssue(ctx, collision.IncomingIssue, "import-replace"); err != nil {
return nil, fmt.Errorf("failed to create incoming issue %s: %w", oldID, err)
}
}
}