fix(duplicates): prefer issues with children/deps when choosing merge target (GH#1022)

The duplicate merge target selection now considers structural relationships:
1. Dependent count (children, blocked-by) - highest priority
2. Text reference count - secondary
3. Lexicographically smallest ID - tiebreaker

This fixes the bug where `bd duplicates --auto-merge` would suggest closing
an epic with 17 children instead of the empty shell duplicate.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
emma
2026-01-12 00:45:25 -08:00
committed by Steve Yegge
parent 8395105493
commit 1c1dabcfdd
3 changed files with 153 additions and 25 deletions

View File

@@ -75,11 +75,13 @@ Example:
} }
// Count references for each issue // Count references for each issue
refCounts := countReferences(allIssues) refCounts := countReferences(allIssues)
// Count structural relationships (children, dependencies) for duplicate groups
structuralScores := countStructuralRelationships(duplicateGroups)
// Prepare output // Prepare output
var mergeCommands []string var mergeCommands []string
var mergeResults []map[string]interface{} var mergeResults []map[string]interface{}
for _, group := range duplicateGroups { for _, group := range duplicateGroups {
target := chooseMergeTarget(group, refCounts) target := chooseMergeTarget(group, refCounts, structuralScores)
sources := make([]string, 0, len(group)-1) sources := make([]string, 0, len(group)-1)
for _, issue := range group { for _, issue := range group {
if issue.ID != target.ID { if issue.ID != target.ID {
@@ -110,7 +112,7 @@ Example:
if jsonOutput { if jsonOutput {
output := map[string]interface{}{ output := map[string]interface{}{
"duplicate_groups": len(duplicateGroups), "duplicate_groups": len(duplicateGroups),
"groups": formatDuplicateGroupsJSON(duplicateGroups, refCounts), "groups": formatDuplicateGroupsJSON(duplicateGroups, refCounts, structuralScores),
} }
if autoMerge || dryRun { if autoMerge || dryRun {
output["merge_commands"] = mergeCommands output["merge_commands"] = mergeCommands
@@ -122,16 +124,20 @@ Example:
} else { } else {
fmt.Printf("%s Found %d duplicate group(s):\n\n", ui.RenderWarn("🔍"), len(duplicateGroups)) fmt.Printf("%s Found %d duplicate group(s):\n\n", ui.RenderWarn("🔍"), len(duplicateGroups))
for i, group := range duplicateGroups { for i, group := range duplicateGroups {
target := chooseMergeTarget(group, refCounts) target := chooseMergeTarget(group, refCounts, structuralScores)
fmt.Printf("%s Group %d: %s\n", ui.RenderAccent("━━"), i+1, group[0].Title) fmt.Printf("%s Group %d: %s\n", ui.RenderAccent("━━"), i+1, group[0].Title)
for _, issue := range group { for _, issue := range group {
refs := refCounts[issue.ID] refs := refCounts[issue.ID]
depCount := 0
if score, ok := structuralScores[issue.ID]; ok {
depCount = score.dependentCount
}
marker := " " marker := " "
if issue.ID == target.ID { if issue.ID == target.ID {
marker = ui.RenderPass("→ ") marker = ui.RenderPass("→ ")
} }
fmt.Printf("%s%s (%s, P%d, %d references)\n", fmt.Printf("%s%s (%s, P%d, %d dependents, %d refs)\n",
marker, issue.ID, issue.Status, issue.Priority, refs) marker, issue.ID, issue.Status, issue.Priority, depCount, refs)
} }
sources := make([]string, 0, len(group)-1) sources := make([]string, 0, len(group)-1)
for _, issue := range group { for _, issue := range group {
@@ -190,6 +196,13 @@ func findDuplicateGroups(issues []*types.Issue) [][]*types.Issue {
} }
return duplicates return duplicates
} }
// issueScore captures all factors used to choose which duplicate to keep
type issueScore struct {
dependentCount int // Issues that depend on this one (children, blocked-by) - highest priority
dependsOnCount int // Issues this one depends on
textRefs int // Text mentions in other issues' descriptions/notes
}
// countReferences counts how many times each issue is referenced in text fields // countReferences counts how many times each issue is referenced in text fields
func countReferences(issues []*types.Issue) map[string]int { func countReferences(issues []*types.Issue) map[string]int {
counts := make(map[string]int) counts := make(map[string]int)
@@ -211,36 +224,110 @@ func countReferences(issues []*types.Issue) map[string]int {
} }
return counts return counts
} }
// countStructuralRelationships counts dependency relationships for issues in duplicate groups.
// Uses the efficient GetDependencyCounts batch query.
func countStructuralRelationships(groups [][]*types.Issue) map[string]*issueScore {
scores := make(map[string]*issueScore)
ctx := rootCtx
// Collect all issue IDs from all groups
var issueIDs []string
for _, group := range groups {
for _, issue := range group {
issueIDs = append(issueIDs, issue.ID)
scores[issue.ID] = &issueScore{}
}
}
// Batch query for dependency counts
depCounts, err := store.GetDependencyCounts(ctx, issueIDs)
if err != nil {
// On error, return empty scores - fallback to text refs only
return scores
}
// Populate scores from dependency counts
for id, counts := range depCounts {
if score, ok := scores[id]; ok {
score.dependentCount = counts.DependentCount // Issues that depend on this one (children, etc)
score.dependsOnCount = counts.DependencyCount
}
}
return scores
}
// chooseMergeTarget selects the best issue to merge into // chooseMergeTarget selects the best issue to merge into
// Priority: highest reference count, then lexicographically smallest ID // Priority order:
func chooseMergeTarget(group []*types.Issue, refCounts map[string]int) *types.Issue { // 1. Highest dependent count (children, blocked-by relationships) - most connected issue wins
// 2. Highest text reference count (mentions in descriptions/notes)
// 3. Lexicographically smallest ID (stable tiebreaker)
func chooseMergeTarget(group []*types.Issue, refCounts map[string]int, structuralScores map[string]*issueScore) *types.Issue {
if len(group) == 0 { if len(group) == 0 {
return nil return nil
} }
getScore := func(id string) (int, int) {
depCount := 0
if score, ok := structuralScores[id]; ok {
depCount = score.dependentCount
}
textRefs := refCounts[id]
return depCount, textRefs
}
target := group[0] target := group[0]
targetRefs := refCounts[target.ID] targetDeps, targetRefs := getScore(target.ID)
for _, issue := range group[1:] { for _, issue := range group[1:] {
issueRefs := refCounts[issue.ID] issueDeps, issueRefs := getScore(issue.ID)
if issueRefs > targetRefs || (issueRefs == targetRefs && issue.ID < target.ID) {
// Compare by dependent count first (children/blocked-by)
if issueDeps > targetDeps {
target = issue target = issue
targetRefs = issueRefs targetDeps, targetRefs = issueDeps, issueRefs
continue
}
if issueDeps < targetDeps {
continue
}
// Equal dependent count - compare by text references
if issueRefs > targetRefs {
target = issue
targetDeps, targetRefs = issueDeps, issueRefs
continue
}
if issueRefs < targetRefs {
continue
}
// Equal on both - use lexicographically smallest ID as tiebreaker
if issue.ID < target.ID {
target = issue
targetDeps, targetRefs = issueDeps, issueRefs
} }
} }
return target return target
} }
// formatDuplicateGroupsJSON formats duplicate groups for JSON output // formatDuplicateGroupsJSON formats duplicate groups for JSON output
func formatDuplicateGroupsJSON(groups [][]*types.Issue, refCounts map[string]int) []map[string]interface{} { func formatDuplicateGroupsJSON(groups [][]*types.Issue, refCounts map[string]int, structuralScores map[string]*issueScore) []map[string]interface{} {
var result []map[string]interface{} var result []map[string]interface{}
for _, group := range groups { for _, group := range groups {
target := chooseMergeTarget(group, refCounts) target := chooseMergeTarget(group, refCounts, structuralScores)
issues := make([]map[string]interface{}, len(group)) issues := make([]map[string]interface{}, len(group))
for i, issue := range group { for i, issue := range group {
depCount := 0
if score, ok := structuralScores[issue.ID]; ok {
depCount = score.dependentCount
}
issues[i] = map[string]interface{}{ issues[i] = map[string]interface{}{
"id": issue.ID, "id": issue.ID,
"title": issue.Title, "title": issue.Title,
"status": issue.Status, "status": issue.Status,
"priority": issue.Priority, "priority": issue.Priority,
"references": refCounts[issue.ID], "references": refCounts[issue.ID],
"dependents": depCount,
"is_merge_target": issue.ID == target.ID, "is_merge_target": issue.ID == target.ID,
} }
} }

View File

@@ -86,13 +86,14 @@ func TestFindDuplicateGroups(t *testing.T) {
func TestChooseMergeTarget(t *testing.T) { func TestChooseMergeTarget(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
group []*types.Issue group []*types.Issue
refCounts map[string]int refCounts map[string]int
wantID string structuralScores map[string]*issueScore
wantID string
}{ }{
{ {
name: "choose by reference count", name: "choose by reference count when no structural data",
group: []*types.Issue{ group: []*types.Issue{
{ID: "bd-2", Title: "Task"}, {ID: "bd-2", Title: "Task"},
{ID: "bd-1", Title: "Task"}, {ID: "bd-1", Title: "Task"},
@@ -101,7 +102,8 @@ func TestChooseMergeTarget(t *testing.T) {
"bd-1": 5, "bd-1": 5,
"bd-2": 0, "bd-2": 0,
}, },
wantID: "bd-1", structuralScores: map[string]*issueScore{},
wantID: "bd-1",
}, },
{ {
name: "choose by lexicographic order if same references", name: "choose by lexicographic order if same references",
@@ -113,7 +115,8 @@ func TestChooseMergeTarget(t *testing.T) {
"bd-1": 0, "bd-1": 0,
"bd-2": 0, "bd-2": 0,
}, },
wantID: "bd-1", structuralScores: map[string]*issueScore{},
wantID: "bd-1",
}, },
{ {
name: "prefer higher references even with larger ID", name: "prefer higher references even with larger ID",
@@ -125,13 +128,46 @@ func TestChooseMergeTarget(t *testing.T) {
"bd-1": 1, "bd-1": 1,
"bd-100": 10, "bd-100": 10,
}, },
wantID: "bd-100", structuralScores: map[string]*issueScore{},
wantID: "bd-100",
},
{
name: "prefer dependents over text references (GH#1022)",
group: []*types.Issue{
{ID: "HONEY-s2g1", Title: "P1 / Foundations"}, // Has 17 children
{ID: "HONEY-d0mw", Title: "P1 / Foundations"}, // Empty shell
},
refCounts: map[string]int{
"HONEY-s2g1": 0,
"HONEY-d0mw": 0,
},
structuralScores: map[string]*issueScore{
"HONEY-s2g1": {dependentCount: 17, dependsOnCount: 2, textRefs: 0},
"HONEY-d0mw": {dependentCount: 0, dependsOnCount: 0, textRefs: 0},
},
wantID: "HONEY-s2g1", // Should keep the one with children
},
{
name: "dependents beat text references",
group: []*types.Issue{
{ID: "bd-1", Title: "Task"}, // Has text refs but no deps
{ID: "bd-2", Title: "Task"}, // Has deps but no text refs
},
refCounts: map[string]int{
"bd-1": 100, // Lots of text references
"bd-2": 0,
},
structuralScores: map[string]*issueScore{
"bd-1": {dependentCount: 0, dependsOnCount: 0, textRefs: 100},
"bd-2": {dependentCount: 5, dependsOnCount: 0, textRefs: 0}, // 5 children/dependents
},
wantID: "bd-2", // Dependents take priority
}, },
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
target := chooseMergeTarget(tt.group, tt.refCounts) target := chooseMergeTarget(tt.group, tt.refCounts, tt.structuralScores)
if target.ID != tt.wantID { if target.ID != tt.wantID {
t.Errorf("chooseMergeTarget() = %v, want %v", target.ID, tt.wantID) t.Errorf("chooseMergeTarget() = %v, want %v", target.ID, tt.wantID)
} }

View File

@@ -466,21 +466,26 @@ NOTE: Import requires direct database access and does not work with daemon mode.
} }
refCounts := countReferences(allIssues) refCounts := countReferences(allIssues)
structuralScores := countStructuralRelationships(duplicateGroups)
fmt.Fprintf(os.Stderr, "Found %d duplicate group(s)\n\n", len(duplicateGroups)) fmt.Fprintf(os.Stderr, "Found %d duplicate group(s)\n\n", len(duplicateGroups))
for i, group := range duplicateGroups { for i, group := range duplicateGroups {
target := chooseMergeTarget(group, refCounts) target := chooseMergeTarget(group, refCounts, structuralScores)
fmt.Fprintf(os.Stderr, "Group %d: %s\n", i+1, group[0].Title) fmt.Fprintf(os.Stderr, "Group %d: %s\n", i+1, group[0].Title)
for _, issue := range group { for _, issue := range group {
refs := refCounts[issue.ID] refs := refCounts[issue.ID]
depCount := 0
if score, ok := structuralScores[issue.ID]; ok {
depCount = score.dependentCount
}
marker := " " marker := " "
if issue.ID == target.ID { if issue.ID == target.ID {
marker = "→ " marker = "→ "
} }
fmt.Fprintf(os.Stderr, " %s%s (%s, P%d, %d refs)\n", fmt.Fprintf(os.Stderr, " %s%s (%s, P%d, %d dependents, %d refs)\n",
marker, issue.ID, issue.Status, issue.Priority, refs) marker, issue.ID, issue.Status, issue.Priority, depCount, refs)
} }
sources := make([]string, 0, len(group)-1) sources := make([]string, 0, len(group)-1)