Implement reference scoring algorithm (bd-13)

Add reference scoring to prioritize which colliding issues should be
renumbered during collision resolution. Issues with fewer references
are renumbered first to minimize total update work.

Changes to collision.go:
- Add ReferenceScore field to CollisionDetail
- scoreCollisions() calculates scores and sorts collisions ascending
- countReferences() counts text mentions + dependency references
- Uses word-boundary regex (\b) to match exact IDs (bd-10 not bd-100)

New tests in collision_test.go:
- TestCountReferences: validates reference counting logic
- TestScoreCollisions: verifies scoring and sorting behavior
- TestCountReferencesWordBoundary: ensures exact ID matching

Reference score = text mentions (desc/design/notes/criteria) + deps
Sort order: fewest references first (minimizes renumbering impact)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Steve Yegge
2025-10-12 16:27:05 -07:00
parent b065b32a51
commit 42e3bb315d
3 changed files with 411 additions and 24 deletions

View File

@@ -3,6 +3,8 @@ package sqlite
import (
"context"
"fmt"
"regexp"
"sort"
"github.com/steveyegge/beads/internal/types"
)
@@ -20,6 +22,7 @@ type CollisionDetail struct {
IncomingIssue *types.Issue // The issue from the import file
ExistingIssue *types.Issue // The issue currently in the database
ConflictingFields []string // List of field names that differ
ReferenceScore int // Number of references to this issue (for scoring)
}
// detectCollisions compares incoming JSONL issues against DB state
@@ -121,3 +124,94 @@ func equalIntPtr(a, b *int) bool {
}
return *a == *b
}
// scoreCollisions calculates reference scores for all colliding issues and sorts them
// by score ascending (fewest references first). This minimizes the total number of
// updates needed during renumbering - issues with fewer references are renumbered first.
//
// Reference score = text mentions + dependency references
func scoreCollisions(ctx context.Context, s *SQLiteStorage, collisions []*CollisionDetail, allIssues []*types.Issue) error {
// Build a map of all issues for quick lookup
issueMap := make(map[string]*types.Issue)
for _, issue := range allIssues {
issueMap[issue.ID] = issue
}
// Get all dependency records for efficient lookup
allDeps, err := s.GetAllDependencyRecords(ctx)
if err != nil {
return fmt.Errorf("failed to get dependency records: %w", err)
}
// Calculate reference score for each collision
for _, collision := range collisions {
score, err := countReferences(collision.ID, allIssues, allDeps)
if err != nil {
return fmt.Errorf("failed to count references for %s: %w", collision.ID, err)
}
collision.ReferenceScore = score
}
// Sort collisions by reference score ascending (fewest first)
sort.Slice(collisions, func(i, j int) bool {
return collisions[i].ReferenceScore < collisions[j].ReferenceScore
})
return nil
}
// countReferences counts how many times an issue ID is referenced
// Returns: text mentions + dependency references
func countReferences(issueID string, allIssues []*types.Issue, allDeps map[string][]*types.Dependency) (int, error) {
count := 0
// Count text mentions in all issues' text fields
// Use word boundary regex to match exact IDs (e.g., "bd-10" but not "bd-100")
pattern := fmt.Sprintf(`\b%s\b`, regexp.QuoteMeta(issueID))
re, err := regexp.Compile(pattern)
if err != nil {
return 0, fmt.Errorf("failed to compile regex for %s: %w", issueID, err)
}
for _, issue := range allIssues {
// Skip counting references in the issue itself
if issue.ID == issueID {
continue
}
// Count mentions in description
count += len(re.FindAllString(issue.Description, -1))
// Count mentions in design
count += len(re.FindAllString(issue.Design, -1))
// Count mentions in notes
count += len(re.FindAllString(issue.Notes, -1))
// Count mentions in acceptance criteria
count += len(re.FindAllString(issue.AcceptanceCriteria, -1))
}
// Count dependency references
// An issue can be referenced as either IssueID or DependsOnID
for _, deps := range allDeps {
for _, dep := range deps {
// Skip self-references
if dep.IssueID == issueID && dep.DependsOnID == issueID {
continue
}
// Count if this issue is the source (IssueID)
if dep.IssueID == issueID {
count++
}
// Count if this issue is the target (DependsOnID)
if dep.DependsOnID == issueID {
count++
}
}
}
return count, nil
}