fix(duplicates): prefer issues with children/deps when choosing merge target (GH#1022)

The duplicate merge target selection now considers structural relationships: 1. Dependent count (children, blocked-by) - highest priority 2. Text reference count - secondary 3. Lexicographically smallest ID - tiebreaker This fixes the bug where `bd duplicates --auto-merge` would suggest closing an epic with 17 children instead of the empty shell duplicate. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-12 00:45:25 -08:00
parent 8395105493
commit 1c1dabcfdd
3 changed files with 153 additions and 25 deletions
--- a/cmd/bd/duplicates.go
+++ b/cmd/bd/duplicates.go
@@ -75,11 +75,13 @@ Example:
 		}
 		// Count references for each issue
 		refCounts := countReferences(allIssues)
 		// Count structural relationships (children, dependencies) for duplicate groups
 		structuralScores := countStructuralRelationships(duplicateGroups)
 		// Prepare output
 		var mergeCommands []string
 		var mergeResults []map[string]interface{}
 		for _, group := range duplicateGroups {
-			target := chooseMergeTarget(group, refCounts)
+			target := chooseMergeTarget(group, refCounts, structuralScores)
 			sources := make([]string, 0, len(group)-1)
 			for _, issue := range group {
 				if issue.ID != target.ID {
@@ -110,7 +112,7 @@ Example:
 		if jsonOutput {
 			output := map[string]interface{}{
 				"duplicate_groups": len(duplicateGroups),
-				"groups":           formatDuplicateGroupsJSON(duplicateGroups, refCounts),
+				"groups":           formatDuplicateGroupsJSON(duplicateGroups, refCounts, structuralScores),
 			}
 			if autoMerge || dryRun {
 				output["merge_commands"] = mergeCommands
@@ -122,16 +124,20 @@ Example:
 		} else {
 			fmt.Printf("%s Found %d duplicate group(s):\n\n", ui.RenderWarn("🔍"), len(duplicateGroups))
 			for i, group := range duplicateGroups {
-				target := chooseMergeTarget(group, refCounts)
+				target := chooseMergeTarget(group, refCounts, structuralScores)
 				fmt.Printf("%s Group %d: %s\n", ui.RenderAccent("━━"), i+1, group[0].Title)
 				for _, issue := range group {
 					refs := refCounts[issue.ID]
 					depCount := 0
 					if score, ok := structuralScores[issue.ID]; ok {
 						depCount = score.dependentCount
 					}
 					marker := "  "
 					if issue.ID == target.ID {
 						marker = ui.RenderPass("→ ")
 					}
-					fmt.Printf("%s%s (%s, P%d, %d references)\n",
+					fmt.Printf("%s%s (%s, P%d, %d dependents, %d refs)\n",
-						marker, issue.ID, issue.Status, issue.Priority, refs)
+						marker, issue.ID, issue.Status, issue.Priority, depCount, refs)
 				}
 				sources := make([]string, 0, len(group)-1)
 				for _, issue := range group {
@@ -190,6 +196,13 @@ func findDuplicateGroups(issues []*types.Issue) [][]*types.Issue {
 	}
 	return duplicates
 }
 // issueScore captures all factors used to choose which duplicate to keep
 type issueScore struct {
 	dependentCount int // Issues that depend on this one (children, blocked-by) - highest priority
 	dependsOnCount int // Issues this one depends on
 	textRefs       int // Text mentions in other issues' descriptions/notes
 }
 // countReferences counts how many times each issue is referenced in text fields
 func countReferences(issues []*types.Issue) map[string]int {
 	counts := make(map[string]int)
@@ -211,36 +224,110 @@ func countReferences(issues []*types.Issue) map[string]int {
 	}
 	return counts
 }
 // countStructuralRelationships counts dependency relationships for issues in duplicate groups.
 // Uses the efficient GetDependencyCounts batch query.
 func countStructuralRelationships(groups [][]*types.Issue) map[string]*issueScore {
 	scores := make(map[string]*issueScore)
 	ctx := rootCtx
 	// Collect all issue IDs from all groups
 	var issueIDs []string
 	for _, group := range groups {
 		for _, issue := range group {
 			issueIDs = append(issueIDs, issue.ID)
 			scores[issue.ID] = &issueScore{}
 		}
 	}
 	// Batch query for dependency counts
 	depCounts, err := store.GetDependencyCounts(ctx, issueIDs)
 	if err != nil {
 		// On error, return empty scores - fallback to text refs only
 		return scores
 	}
 	// Populate scores from dependency counts
 	for id, counts := range depCounts {
 		if score, ok := scores[id]; ok {
 			score.dependentCount = counts.DependentCount // Issues that depend on this one (children, etc)
 			score.dependsOnCount = counts.DependencyCount
 		}
 	}
 	return scores
 }
 // chooseMergeTarget selects the best issue to merge into
-// Priority: highest reference count, then lexicographically smallest ID
+// Priority order:
-func chooseMergeTarget(group []*types.Issue, refCounts map[string]int) *types.Issue {
+// 1. Highest dependent count (children, blocked-by relationships) - most connected issue wins
 // 2. Highest text reference count (mentions in descriptions/notes)
 // 3. Lexicographically smallest ID (stable tiebreaker)
 func chooseMergeTarget(group []*types.Issue, refCounts map[string]int, structuralScores map[string]*issueScore) *types.Issue {
 	if len(group) == 0 {
 		return nil
 	}
 	getScore := func(id string) (int, int) {
 		depCount := 0
 		if score, ok := structuralScores[id]; ok {
 			depCount = score.dependentCount
 		}
 		textRefs := refCounts[id]
 		return depCount, textRefs
 	}
 	target := group[0]
-	targetRefs := refCounts[target.ID]
+	targetDeps, targetRefs := getScore(target.ID)
 	for _, issue := range group[1:] {
-		issueRefs := refCounts[issue.ID]
+		issueDeps, issueRefs := getScore(issue.ID)
-		if issueRefs > targetRefs || (issueRefs == targetRefs && issue.ID < target.ID) {
+
 		// Compare by dependent count first (children/blocked-by)
 		if issueDeps > targetDeps {
 			target = issue
-			targetRefs = issueRefs
+			targetDeps, targetRefs = issueDeps, issueRefs
 			continue
 		}
 		if issueDeps < targetDeps {
 			continue
 		}
 		// Equal dependent count - compare by text references
 		if issueRefs > targetRefs {
 			target = issue
 			targetDeps, targetRefs = issueDeps, issueRefs
 			continue
 		}
 		if issueRefs < targetRefs {
 			continue
 		}
 		// Equal on both - use lexicographically smallest ID as tiebreaker
 		if issue.ID < target.ID {
 			target = issue
 			targetDeps, targetRefs = issueDeps, issueRefs
 		}
 	}
 	return target
 }
 // formatDuplicateGroupsJSON formats duplicate groups for JSON output
-func formatDuplicateGroupsJSON(groups [][]*types.Issue, refCounts map[string]int) []map[string]interface{} {
+func formatDuplicateGroupsJSON(groups [][]*types.Issue, refCounts map[string]int, structuralScores map[string]*issueScore) []map[string]interface{} {
 	var result []map[string]interface{}
 	for _, group := range groups {
-		target := chooseMergeTarget(group, refCounts)
+		target := chooseMergeTarget(group, refCounts, structuralScores)
 		issues := make([]map[string]interface{}, len(group))
 		for i, issue := range group {
 			depCount := 0
 			if score, ok := structuralScores[issue.ID]; ok {
 				depCount = score.dependentCount
 			}
 			issues[i] = map[string]interface{}{
 				"id":              issue.ID,
 				"title":           issue.Title,
 				"status":          issue.Status,
 				"priority":        issue.Priority,
 				"references":      refCounts[issue.ID],
 				"dependents":      depCount,
 				"is_merge_target": issue.ID == target.ID,
 			}
 		}
--- a/cmd/bd/duplicates_test.go
+++ b/cmd/bd/duplicates_test.go
@@ -86,13 +86,14 @@ func TestFindDuplicateGroups(t *testing.T) {
 func TestChooseMergeTarget(t *testing.T) {
 	tests := []struct {
-		name      string
+		name             string
-		group     []*types.Issue
+		group            []*types.Issue
-		refCounts map[string]int
+		refCounts        map[string]int
-		wantID    string
+		structuralScores map[string]*issueScore
 		wantID           string
 	}{
 		{
-			name: "choose by reference count",
+			name: "choose by reference count when no structural data",
 			group: []*types.Issue{
 				{ID: "bd-2", Title: "Task"},
 				{ID: "bd-1", Title: "Task"},
@@ -101,7 +102,8 @@ func TestChooseMergeTarget(t *testing.T) {
 				"bd-1": 5,
 				"bd-2": 0,
 			},
-			wantID: "bd-1",
+			structuralScores: map[string]*issueScore{},
 			wantID:           "bd-1",
 		},
 		{
 			name: "choose by lexicographic order if same references",
@@ -113,7 +115,8 @@ func TestChooseMergeTarget(t *testing.T) {
 				"bd-1": 0,
 				"bd-2": 0,
 			},
-			wantID: "bd-1",
+			structuralScores: map[string]*issueScore{},
 			wantID:           "bd-1",
 		},
 		{
 			name: "prefer higher references even with larger ID",
@@ -125,13 +128,46 @@ func TestChooseMergeTarget(t *testing.T) {
 				"bd-1":   1,
 				"bd-100": 10,
 			},
-			wantID: "bd-100",
+			structuralScores: map[string]*issueScore{},
 			wantID:           "bd-100",
 		},
 		{
 			name: "prefer dependents over text references (GH#1022)",
 			group: []*types.Issue{
 				{ID: "HONEY-s2g1", Title: "P1 / Foundations"}, // Has 17 children
 				{ID: "HONEY-d0mw", Title: "P1 / Foundations"}, // Empty shell
 			},
 			refCounts: map[string]int{
 				"HONEY-s2g1": 0,
 				"HONEY-d0mw": 0,
 			},
 			structuralScores: map[string]*issueScore{
 				"HONEY-s2g1": {dependentCount: 17, dependsOnCount: 2, textRefs: 0},
 				"HONEY-d0mw": {dependentCount: 0, dependsOnCount: 0, textRefs: 0},
 			},
 			wantID: "HONEY-s2g1", // Should keep the one with children
 		},
 		{
 			name: "dependents beat text references",
 			group: []*types.Issue{
 				{ID: "bd-1", Title: "Task"}, // Has text refs but no deps
 				{ID: "bd-2", Title: "Task"}, // Has deps but no text refs
 			},
 			refCounts: map[string]int{
 				"bd-1": 100, // Lots of text references
 				"bd-2": 0,
 			},
 			structuralScores: map[string]*issueScore{
 				"bd-1": {dependentCount: 0, dependsOnCount: 0, textRefs: 100},
 				"bd-2": {dependentCount: 5, dependsOnCount: 0, textRefs: 0}, // 5 children/dependents
 			},
 			wantID: "bd-2", // Dependents take priority
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			target := chooseMergeTarget(tt.group, tt.refCounts)
+			target := chooseMergeTarget(tt.group, tt.refCounts, tt.structuralScores)
 			if target.ID != tt.wantID {
 				t.Errorf("chooseMergeTarget() = %v, want %v", target.ID, tt.wantID)
 			}
--- a/cmd/bd/import.go
+++ b/cmd/bd/import.go
@@ -466,21 +466,26 @@ NOTE: Import requires direct database access and does not work with daemon mode.
 			}
 			refCounts := countReferences(allIssues)
 			structuralScores := countStructuralRelationships(duplicateGroups)
 			fmt.Fprintf(os.Stderr, "Found %d duplicate group(s)\n\n", len(duplicateGroups))
 			for i, group := range duplicateGroups {
-				target := chooseMergeTarget(group, refCounts)
+				target := chooseMergeTarget(group, refCounts, structuralScores)
 				fmt.Fprintf(os.Stderr, "Group %d: %s\n", i+1, group[0].Title)
 				for _, issue := range group {
 					refs := refCounts[issue.ID]
 					depCount := 0
 					if score, ok := structuralScores[issue.ID]; ok {
 						depCount = score.dependentCount
 					}
 					marker := "  "
 					if issue.ID == target.ID {
 						marker = "→ "
 					}
-					fmt.Fprintf(os.Stderr, "  %s%s (%s, P%d, %d refs)\n",
+					fmt.Fprintf(os.Stderr, "  %s%s (%s, P%d, %d dependents, %d refs)\n",
-						marker, issue.ID, issue.Status, issue.Priority, refs)
+						marker, issue.ID, issue.Status, issue.Priority, depCount, refs)
 				}
 				sources := make([]string, 0, len(group)-1)