fix(duplicates): prefer issues with children/deps when choosing merge target (GH#1022)

The duplicate merge target selection now considers structural relationships:
1. Dependent count (children, blocked-by) - highest priority
2. Text reference count - secondary
3. Lexicographically smallest ID - tiebreaker

This fixes the bug where `bd duplicates --auto-merge` would suggest closing
an epic with 17 children instead of the empty shell duplicate.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
emma
2026-01-12 00:45:25 -08:00
committed by Steve Yegge
parent 8395105493
commit 1c1dabcfdd
3 changed files with 153 additions and 25 deletions

View File

@@ -75,11 +75,13 @@ Example:
}
// Count references for each issue
refCounts := countReferences(allIssues)
// Count structural relationships (children, dependencies) for duplicate groups
structuralScores := countStructuralRelationships(duplicateGroups)
// Prepare output
var mergeCommands []string
var mergeResults []map[string]interface{}
for _, group := range duplicateGroups {
target := chooseMergeTarget(group, refCounts)
target := chooseMergeTarget(group, refCounts, structuralScores)
sources := make([]string, 0, len(group)-1)
for _, issue := range group {
if issue.ID != target.ID {
@@ -110,7 +112,7 @@ Example:
if jsonOutput {
output := map[string]interface{}{
"duplicate_groups": len(duplicateGroups),
"groups": formatDuplicateGroupsJSON(duplicateGroups, refCounts),
"groups": formatDuplicateGroupsJSON(duplicateGroups, refCounts, structuralScores),
}
if autoMerge || dryRun {
output["merge_commands"] = mergeCommands
@@ -122,16 +124,20 @@ Example:
} else {
fmt.Printf("%s Found %d duplicate group(s):\n\n", ui.RenderWarn("🔍"), len(duplicateGroups))
for i, group := range duplicateGroups {
target := chooseMergeTarget(group, refCounts)
target := chooseMergeTarget(group, refCounts, structuralScores)
fmt.Printf("%s Group %d: %s\n", ui.RenderAccent("━━"), i+1, group[0].Title)
for _, issue := range group {
refs := refCounts[issue.ID]
depCount := 0
if score, ok := structuralScores[issue.ID]; ok {
depCount = score.dependentCount
}
marker := " "
if issue.ID == target.ID {
marker = ui.RenderPass("→ ")
}
fmt.Printf("%s%s (%s, P%d, %d references)\n",
marker, issue.ID, issue.Status, issue.Priority, refs)
fmt.Printf("%s%s (%s, P%d, %d dependents, %d refs)\n",
marker, issue.ID, issue.Status, issue.Priority, depCount, refs)
}
sources := make([]string, 0, len(group)-1)
for _, issue := range group {
@@ -190,6 +196,13 @@ func findDuplicateGroups(issues []*types.Issue) [][]*types.Issue {
}
return duplicates
}
// issueScore captures all factors used to choose which duplicate to keep
type issueScore struct {
dependentCount int // Issues that depend on this one (children, blocked-by) - highest priority
dependsOnCount int // Issues this one depends on
textRefs int // Text mentions in other issues' descriptions/notes
}
// countReferences counts how many times each issue is referenced in text fields
func countReferences(issues []*types.Issue) map[string]int {
counts := make(map[string]int)
@@ -211,36 +224,110 @@ func countReferences(issues []*types.Issue) map[string]int {
}
return counts
}
// countStructuralRelationships counts dependency relationships for issues in duplicate groups.
// Uses the efficient GetDependencyCounts batch query.
func countStructuralRelationships(groups [][]*types.Issue) map[string]*issueScore {
scores := make(map[string]*issueScore)
ctx := rootCtx
// Collect all issue IDs from all groups
var issueIDs []string
for _, group := range groups {
for _, issue := range group {
issueIDs = append(issueIDs, issue.ID)
scores[issue.ID] = &issueScore{}
}
}
// Batch query for dependency counts
depCounts, err := store.GetDependencyCounts(ctx, issueIDs)
if err != nil {
// On error, return empty scores - fallback to text refs only
return scores
}
// Populate scores from dependency counts
for id, counts := range depCounts {
if score, ok := scores[id]; ok {
score.dependentCount = counts.DependentCount // Issues that depend on this one (children, etc)
score.dependsOnCount = counts.DependencyCount
}
}
return scores
}
// chooseMergeTarget selects the best issue to merge into
// Priority: highest reference count, then lexicographically smallest ID
func chooseMergeTarget(group []*types.Issue, refCounts map[string]int) *types.Issue {
// Priority order:
// 1. Highest dependent count (children, blocked-by relationships) - most connected issue wins
// 2. Highest text reference count (mentions in descriptions/notes)
// 3. Lexicographically smallest ID (stable tiebreaker)
func chooseMergeTarget(group []*types.Issue, refCounts map[string]int, structuralScores map[string]*issueScore) *types.Issue {
if len(group) == 0 {
return nil
}
getScore := func(id string) (int, int) {
depCount := 0
if score, ok := structuralScores[id]; ok {
depCount = score.dependentCount
}
textRefs := refCounts[id]
return depCount, textRefs
}
target := group[0]
targetRefs := refCounts[target.ID]
targetDeps, targetRefs := getScore(target.ID)
for _, issue := range group[1:] {
issueRefs := refCounts[issue.ID]
if issueRefs > targetRefs || (issueRefs == targetRefs && issue.ID < target.ID) {
issueDeps, issueRefs := getScore(issue.ID)
// Compare by dependent count first (children/blocked-by)
if issueDeps > targetDeps {
target = issue
targetRefs = issueRefs
targetDeps, targetRefs = issueDeps, issueRefs
continue
}
if issueDeps < targetDeps {
continue
}
// Equal dependent count - compare by text references
if issueRefs > targetRefs {
target = issue
targetDeps, targetRefs = issueDeps, issueRefs
continue
}
if issueRefs < targetRefs {
continue
}
// Equal on both - use lexicographically smallest ID as tiebreaker
if issue.ID < target.ID {
target = issue
targetDeps, targetRefs = issueDeps, issueRefs
}
}
return target
}
// formatDuplicateGroupsJSON formats duplicate groups for JSON output
func formatDuplicateGroupsJSON(groups [][]*types.Issue, refCounts map[string]int) []map[string]interface{} {
func formatDuplicateGroupsJSON(groups [][]*types.Issue, refCounts map[string]int, structuralScores map[string]*issueScore) []map[string]interface{} {
var result []map[string]interface{}
for _, group := range groups {
target := chooseMergeTarget(group, refCounts)
target := chooseMergeTarget(group, refCounts, structuralScores)
issues := make([]map[string]interface{}, len(group))
for i, issue := range group {
depCount := 0
if score, ok := structuralScores[issue.ID]; ok {
depCount = score.dependentCount
}
issues[i] = map[string]interface{}{
"id": issue.ID,
"title": issue.Title,
"status": issue.Status,
"priority": issue.Priority,
"references": refCounts[issue.ID],
"dependents": depCount,
"is_merge_target": issue.ID == target.ID,
}
}

View File

@@ -86,13 +86,14 @@ func TestFindDuplicateGroups(t *testing.T) {
func TestChooseMergeTarget(t *testing.T) {
tests := []struct {
name string
group []*types.Issue
refCounts map[string]int
wantID string
name string
group []*types.Issue
refCounts map[string]int
structuralScores map[string]*issueScore
wantID string
}{
{
name: "choose by reference count",
name: "choose by reference count when no structural data",
group: []*types.Issue{
{ID: "bd-2", Title: "Task"},
{ID: "bd-1", Title: "Task"},
@@ -101,7 +102,8 @@ func TestChooseMergeTarget(t *testing.T) {
"bd-1": 5,
"bd-2": 0,
},
wantID: "bd-1",
structuralScores: map[string]*issueScore{},
wantID: "bd-1",
},
{
name: "choose by lexicographic order if same references",
@@ -113,7 +115,8 @@ func TestChooseMergeTarget(t *testing.T) {
"bd-1": 0,
"bd-2": 0,
},
wantID: "bd-1",
structuralScores: map[string]*issueScore{},
wantID: "bd-1",
},
{
name: "prefer higher references even with larger ID",
@@ -125,13 +128,46 @@ func TestChooseMergeTarget(t *testing.T) {
"bd-1": 1,
"bd-100": 10,
},
wantID: "bd-100",
structuralScores: map[string]*issueScore{},
wantID: "bd-100",
},
{
name: "prefer dependents over text references (GH#1022)",
group: []*types.Issue{
{ID: "HONEY-s2g1", Title: "P1 / Foundations"}, // Has 17 children
{ID: "HONEY-d0mw", Title: "P1 / Foundations"}, // Empty shell
},
refCounts: map[string]int{
"HONEY-s2g1": 0,
"HONEY-d0mw": 0,
},
structuralScores: map[string]*issueScore{
"HONEY-s2g1": {dependentCount: 17, dependsOnCount: 2, textRefs: 0},
"HONEY-d0mw": {dependentCount: 0, dependsOnCount: 0, textRefs: 0},
},
wantID: "HONEY-s2g1", // Should keep the one with children
},
{
name: "dependents beat text references",
group: []*types.Issue{
{ID: "bd-1", Title: "Task"}, // Has text refs but no deps
{ID: "bd-2", Title: "Task"}, // Has deps but no text refs
},
refCounts: map[string]int{
"bd-1": 100, // Lots of text references
"bd-2": 0,
},
structuralScores: map[string]*issueScore{
"bd-1": {dependentCount: 0, dependsOnCount: 0, textRefs: 100},
"bd-2": {dependentCount: 5, dependsOnCount: 0, textRefs: 0}, // 5 children/dependents
},
wantID: "bd-2", // Dependents take priority
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
target := chooseMergeTarget(tt.group, tt.refCounts)
target := chooseMergeTarget(tt.group, tt.refCounts, tt.structuralScores)
if target.ID != tt.wantID {
t.Errorf("chooseMergeTarget() = %v, want %v", target.ID, tt.wantID)
}

View File

@@ -466,21 +466,26 @@ NOTE: Import requires direct database access and does not work with daemon mode.
}
refCounts := countReferences(allIssues)
structuralScores := countStructuralRelationships(duplicateGroups)
fmt.Fprintf(os.Stderr, "Found %d duplicate group(s)\n\n", len(duplicateGroups))
for i, group := range duplicateGroups {
target := chooseMergeTarget(group, refCounts)
target := chooseMergeTarget(group, refCounts, structuralScores)
fmt.Fprintf(os.Stderr, "Group %d: %s\n", i+1, group[0].Title)
for _, issue := range group {
refs := refCounts[issue.ID]
depCount := 0
if score, ok := structuralScores[issue.ID]; ok {
depCount = score.dependentCount
}
marker := " "
if issue.ID == target.ID {
marker = "→ "
}
fmt.Fprintf(os.Stderr, " %s%s (%s, P%d, %d refs)\n",
marker, issue.ID, issue.Status, issue.Priority, refs)
fmt.Fprintf(os.Stderr, " %s%s (%s, P%d, %d dependents, %d refs)\n",
marker, issue.ID, issue.Status, issue.Priority, depCount, refs)
}
sources := make([]string, 0, len(group)-1)