Implement reference scoring algorithm (bd-13)
Add reference scoring to prioritize which colliding issues should be renumbered during collision resolution. Issues with fewer references are renumbered first to minimize total update work. Changes to collision.go: - Add ReferenceScore field to CollisionDetail - scoreCollisions() calculates scores and sorts collisions ascending - countReferences() counts text mentions + dependency references - Uses word-boundary regex (\b) to match exact IDs (bd-10 not bd-100) New tests in collision_test.go: - TestCountReferences: validates reference counting logic - TestScoreCollisions: verifies scoring and sorting behavior - TestCountReferencesWordBoundary: ensures exact ID matching Reference score = text mentions (desc/design/notes/criteria) + deps Sort order: fewest references first (minimizes renumbering impact) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,8 @@ package sqlite
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"sort"
|
||||
|
||||
"github.com/steveyegge/beads/internal/types"
|
||||
)
|
||||
@@ -20,6 +22,7 @@ type CollisionDetail struct {
|
||||
IncomingIssue *types.Issue // The issue from the import file
|
||||
ExistingIssue *types.Issue // The issue currently in the database
|
||||
ConflictingFields []string // List of field names that differ
|
||||
ReferenceScore int // Number of references to this issue (for scoring)
|
||||
}
|
||||
|
||||
// detectCollisions compares incoming JSONL issues against DB state
|
||||
@@ -121,3 +124,94 @@ func equalIntPtr(a, b *int) bool {
|
||||
}
|
||||
return *a == *b
|
||||
}
|
||||
|
||||
// scoreCollisions calculates reference scores for all colliding issues and sorts them
|
||||
// by score ascending (fewest references first). This minimizes the total number of
|
||||
// updates needed during renumbering - issues with fewer references are renumbered first.
|
||||
//
|
||||
// Reference score = text mentions + dependency references
|
||||
func scoreCollisions(ctx context.Context, s *SQLiteStorage, collisions []*CollisionDetail, allIssues []*types.Issue) error {
|
||||
// Build a map of all issues for quick lookup
|
||||
issueMap := make(map[string]*types.Issue)
|
||||
for _, issue := range allIssues {
|
||||
issueMap[issue.ID] = issue
|
||||
}
|
||||
|
||||
// Get all dependency records for efficient lookup
|
||||
allDeps, err := s.GetAllDependencyRecords(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get dependency records: %w", err)
|
||||
}
|
||||
|
||||
// Calculate reference score for each collision
|
||||
for _, collision := range collisions {
|
||||
score, err := countReferences(collision.ID, allIssues, allDeps)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to count references for %s: %w", collision.ID, err)
|
||||
}
|
||||
collision.ReferenceScore = score
|
||||
}
|
||||
|
||||
// Sort collisions by reference score ascending (fewest first)
|
||||
sort.Slice(collisions, func(i, j int) bool {
|
||||
return collisions[i].ReferenceScore < collisions[j].ReferenceScore
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// countReferences counts how many times an issue ID is referenced
|
||||
// Returns: text mentions + dependency references
|
||||
func countReferences(issueID string, allIssues []*types.Issue, allDeps map[string][]*types.Dependency) (int, error) {
|
||||
count := 0
|
||||
|
||||
// Count text mentions in all issues' text fields
|
||||
// Use word boundary regex to match exact IDs (e.g., "bd-10" but not "bd-100")
|
||||
pattern := fmt.Sprintf(`\b%s\b`, regexp.QuoteMeta(issueID))
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to compile regex for %s: %w", issueID, err)
|
||||
}
|
||||
|
||||
for _, issue := range allIssues {
|
||||
// Skip counting references in the issue itself
|
||||
if issue.ID == issueID {
|
||||
continue
|
||||
}
|
||||
|
||||
// Count mentions in description
|
||||
count += len(re.FindAllString(issue.Description, -1))
|
||||
|
||||
// Count mentions in design
|
||||
count += len(re.FindAllString(issue.Design, -1))
|
||||
|
||||
// Count mentions in notes
|
||||
count += len(re.FindAllString(issue.Notes, -1))
|
||||
|
||||
// Count mentions in acceptance criteria
|
||||
count += len(re.FindAllString(issue.AcceptanceCriteria, -1))
|
||||
}
|
||||
|
||||
// Count dependency references
|
||||
// An issue can be referenced as either IssueID or DependsOnID
|
||||
for _, deps := range allDeps {
|
||||
for _, dep := range deps {
|
||||
// Skip self-references
|
||||
if dep.IssueID == issueID && dep.DependsOnID == issueID {
|
||||
continue
|
||||
}
|
||||
|
||||
// Count if this issue is the source (IssueID)
|
||||
if dep.IssueID == issueID {
|
||||
count++
|
||||
}
|
||||
|
||||
// Count if this issue is the target (DependsOnID)
|
||||
if dep.DependsOnID == issueID {
|
||||
count++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
@@ -426,3 +426,296 @@ func TestEqualIntPtr(t *testing.T) {
|
||||
func intPtr(i int) *int {
|
||||
return &i
|
||||
}
|
||||
|
||||
func TestCountReferences(t *testing.T) {
|
||||
allIssues := []*types.Issue{
|
||||
{
|
||||
ID: "bd-1",
|
||||
Title: "Issue 1",
|
||||
Description: "This mentions bd-2 and bd-3",
|
||||
Design: "Design mentions bd-2 twice: bd-2 and bd-2",
|
||||
Notes: "Notes mention bd-3",
|
||||
},
|
||||
{
|
||||
ID: "bd-2",
|
||||
Title: "Issue 2",
|
||||
Description: "This mentions bd-1",
|
||||
},
|
||||
{
|
||||
ID: "bd-3",
|
||||
Title: "Issue 3",
|
||||
Description: "No mentions here",
|
||||
},
|
||||
{
|
||||
ID: "bd-10",
|
||||
Title: "Issue 10",
|
||||
Description: "This has bd-100 but not bd-10 itself",
|
||||
},
|
||||
}
|
||||
|
||||
allDeps := map[string][]*types.Dependency{
|
||||
"bd-1": {
|
||||
{IssueID: "bd-1", DependsOnID: "bd-2", Type: types.DepBlocks},
|
||||
},
|
||||
"bd-2": {
|
||||
{IssueID: "bd-2", DependsOnID: "bd-3", Type: types.DepBlocks},
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
issueID string
|
||||
expectedCount int
|
||||
}{
|
||||
{
|
||||
name: "bd-1 - one text mention, one dependency",
|
||||
issueID: "bd-1",
|
||||
// Text: bd-2's description mentions bd-1 (1)
|
||||
// Deps: bd-1 → bd-2 (1)
|
||||
expectedCount: 2,
|
||||
},
|
||||
{
|
||||
name: "bd-2 - multiple text mentions, two dependencies",
|
||||
issueID: "bd-2",
|
||||
// Text: bd-1's description mentions bd-2 (1) + bd-1's design mentions bd-2 three times (3) = 4
|
||||
// (design has: "mentions bd-2" + "bd-2 and" + "bd-2")
|
||||
// Deps: bd-1 → bd-2 (1) + bd-2 → bd-3 (1) = 2
|
||||
expectedCount: 6,
|
||||
},
|
||||
{
|
||||
name: "bd-3 - some text mentions, one dependency",
|
||||
issueID: "bd-3",
|
||||
// Text: bd-1's description (1) + bd-1's notes (1) = 2
|
||||
// Deps: bd-2 → bd-3 (1)
|
||||
expectedCount: 3,
|
||||
},
|
||||
{
|
||||
name: "bd-10 - no mentions (bd-100 doesn't count)",
|
||||
issueID: "bd-10",
|
||||
// Text: bd-100 in bd-10's description doesn't match \bbd-10\b = 0
|
||||
// Deps: none = 0
|
||||
expectedCount: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
count, err := countReferences(tt.issueID, allIssues, allDeps)
|
||||
if err != nil {
|
||||
t.Fatalf("countReferences failed: %v", err)
|
||||
}
|
||||
if count != tt.expectedCount {
|
||||
t.Errorf("expected count %d, got %d", tt.expectedCount, count)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestScoreCollisions(t *testing.T) {
|
||||
// Create temporary database
|
||||
tmpDir, err := os.MkdirTemp("", "score-collision-test-*")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create temp dir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
dbPath := filepath.Join(tmpDir, "test.db")
|
||||
store, err := New(dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create storage: %v", err)
|
||||
}
|
||||
defer store.Close()
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// Setup: Create issues with various reference patterns
|
||||
issue1 := &types.Issue{
|
||||
ID: "bd-1",
|
||||
Title: "Issue 1",
|
||||
Description: "Depends on bd-2",
|
||||
Status: types.StatusOpen,
|
||||
Priority: 1,
|
||||
IssueType: types.TypeTask,
|
||||
}
|
||||
|
||||
issue2 := &types.Issue{
|
||||
ID: "bd-2",
|
||||
Title: "Issue 2",
|
||||
Description: "Referenced by bd-1 and bd-3",
|
||||
Status: types.StatusOpen,
|
||||
Priority: 1,
|
||||
IssueType: types.TypeTask,
|
||||
}
|
||||
|
||||
issue3 := &types.Issue{
|
||||
ID: "bd-3",
|
||||
Title: "Issue 3",
|
||||
Description: "Mentions bd-2 multiple times: bd-2 and bd-2",
|
||||
Notes: "Also mentions bd-2 here",
|
||||
Status: types.StatusOpen,
|
||||
Priority: 1,
|
||||
IssueType: types.TypeTask,
|
||||
}
|
||||
|
||||
issue4 := &types.Issue{
|
||||
ID: "bd-4",
|
||||
Title: "Issue 4",
|
||||
Description: "Lonely issue with no references",
|
||||
Status: types.StatusOpen,
|
||||
Priority: 1,
|
||||
IssueType: types.TypeTask,
|
||||
}
|
||||
|
||||
// Create issues in DB
|
||||
if err := store.CreateIssue(ctx, issue1, "test"); err != nil {
|
||||
t.Fatalf("failed to create issue1: %v", err)
|
||||
}
|
||||
if err := store.CreateIssue(ctx, issue2, "test"); err != nil {
|
||||
t.Fatalf("failed to create issue2: %v", err)
|
||||
}
|
||||
if err := store.CreateIssue(ctx, issue3, "test"); err != nil {
|
||||
t.Fatalf("failed to create issue3: %v", err)
|
||||
}
|
||||
if err := store.CreateIssue(ctx, issue4, "test"); err != nil {
|
||||
t.Fatalf("failed to create issue4: %v", err)
|
||||
}
|
||||
|
||||
// Add dependencies
|
||||
dep1 := &types.Dependency{IssueID: "bd-1", DependsOnID: "bd-2", Type: types.DepBlocks}
|
||||
dep2 := &types.Dependency{IssueID: "bd-3", DependsOnID: "bd-2", Type: types.DepBlocks}
|
||||
|
||||
if err := store.AddDependency(ctx, dep1, "test"); err != nil {
|
||||
t.Fatalf("failed to add dependency1: %v", err)
|
||||
}
|
||||
if err := store.AddDependency(ctx, dep2, "test"); err != nil {
|
||||
t.Fatalf("failed to add dependency2: %v", err)
|
||||
}
|
||||
|
||||
// Create collision details (simulated)
|
||||
collisions := []*CollisionDetail{
|
||||
{
|
||||
ID: "bd-1",
|
||||
IncomingIssue: issue1,
|
||||
ExistingIssue: issue1,
|
||||
ReferenceScore: 0, // Will be calculated
|
||||
},
|
||||
{
|
||||
ID: "bd-2",
|
||||
IncomingIssue: issue2,
|
||||
ExistingIssue: issue2,
|
||||
ReferenceScore: 0, // Will be calculated
|
||||
},
|
||||
{
|
||||
ID: "bd-3",
|
||||
IncomingIssue: issue3,
|
||||
ExistingIssue: issue3,
|
||||
ReferenceScore: 0, // Will be calculated
|
||||
},
|
||||
{
|
||||
ID: "bd-4",
|
||||
IncomingIssue: issue4,
|
||||
ExistingIssue: issue4,
|
||||
ReferenceScore: 0, // Will be calculated
|
||||
},
|
||||
}
|
||||
|
||||
allIssues := []*types.Issue{issue1, issue2, issue3, issue4}
|
||||
|
||||
// Score the collisions
|
||||
err = scoreCollisions(ctx, store, collisions, allIssues)
|
||||
if err != nil {
|
||||
t.Fatalf("scoreCollisions failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify scores were calculated
|
||||
// bd-4: 0 references (no mentions, no deps)
|
||||
// bd-1: 1 reference (bd-1 → bd-2 dependency)
|
||||
// bd-3: 1 reference (bd-3 → bd-2 dependency)
|
||||
// bd-2: high references (mentioned in bd-1, bd-3 multiple times + 2 deps as target)
|
||||
// bd-1 desc (1) + bd-3 desc (3: "bd-2 multiple", "bd-2 and", "bd-2") + bd-3 notes (1) + 2 deps = 7
|
||||
|
||||
if collisions[0].ID != "bd-4" {
|
||||
t.Errorf("expected first collision to be bd-4 (lowest score), got %s", collisions[0].ID)
|
||||
}
|
||||
if collisions[0].ReferenceScore != 0 {
|
||||
t.Errorf("expected bd-4 to have score 0, got %d", collisions[0].ReferenceScore)
|
||||
}
|
||||
|
||||
// bd-2 should be last (highest score)
|
||||
lastIdx := len(collisions) - 1
|
||||
if collisions[lastIdx].ID != "bd-2" {
|
||||
t.Errorf("expected last collision to be bd-2 (highest score), got %s", collisions[lastIdx].ID)
|
||||
}
|
||||
if collisions[lastIdx].ReferenceScore != 7 {
|
||||
t.Errorf("expected bd-2 to have score 7, got %d", collisions[lastIdx].ReferenceScore)
|
||||
}
|
||||
|
||||
// Verify sorting (ascending order)
|
||||
for i := 1; i < len(collisions); i++ {
|
||||
if collisions[i].ReferenceScore < collisions[i-1].ReferenceScore {
|
||||
t.Errorf("collisions not sorted: collision[%d] score %d < collision[%d] score %d",
|
||||
i, collisions[i].ReferenceScore, i-1, collisions[i-1].ReferenceScore)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCountReferencesWordBoundary(t *testing.T) {
|
||||
// Test that word boundaries work correctly
|
||||
allIssues := []*types.Issue{
|
||||
{
|
||||
ID: "bd-1",
|
||||
Description: "bd-10 and bd-100 and bd-1 and bd-11",
|
||||
},
|
||||
{
|
||||
ID: "bd-10",
|
||||
Description: "bd-1 and bd-100",
|
||||
},
|
||||
}
|
||||
|
||||
allDeps := map[string][]*types.Dependency{}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
issueID string
|
||||
expectedCount int
|
||||
description string
|
||||
}{
|
||||
{
|
||||
name: "bd-1 exact match",
|
||||
issueID: "bd-1",
|
||||
expectedCount: 2, // bd-10's desc mentions bd-1 (1) + bd-1's desc mentions bd-1 (1) = 2
|
||||
// Wait, bd-1's desc shouldn't count itself
|
||||
// So: bd-10's desc mentions bd-1 (1)
|
||||
},
|
||||
{
|
||||
name: "bd-10 exact match",
|
||||
issueID: "bd-10",
|
||||
expectedCount: 1, // bd-1's desc mentions bd-10 (1)
|
||||
},
|
||||
{
|
||||
name: "bd-100 exact match",
|
||||
issueID: "bd-100",
|
||||
expectedCount: 2, // bd-1's desc (1) + bd-10's desc (1)
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
count, err := countReferences(tt.issueID, allIssues, allDeps)
|
||||
if err != nil {
|
||||
t.Fatalf("countReferences failed: %v", err)
|
||||
}
|
||||
|
||||
// Adjust expected based on actual counting logic
|
||||
// countReferences skips the issue itself
|
||||
expected := tt.expectedCount
|
||||
if tt.issueID == "bd-1" {
|
||||
expected = 1 // only bd-10's description
|
||||
}
|
||||
|
||||
if count != expected {
|
||||
t.Errorf("expected count %d, got %d", expected, count)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user