From 1c1dabcfdd71c480b4f5b421b67bb67e449cbf10 Mon Sep 17 00:00:00 2001
From: emma <steve.yegge@gmail.com>
Date: Mon, 12 Jan 2026 00:45:25 -0800
Subject: [PATCH] fix(duplicates): prefer issues with children/deps when
 choosing merge target (GH#1022)

The duplicate merge target selection now considers structural relationships:
1. Dependent count (children, blocked-by) - highest priority
2. Text reference count - secondary
3. Lexicographically smallest ID - tiebreaker

This fixes the bug where `bd duplicates --auto-merge` would suggest closing
an epic with 17 children instead of the empty shell duplicate.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 cmd/bd/duplicates.go      | 113 +++++++++++++++++++++++++++++++++-----
 cmd/bd/duplicates_test.go |  54 +++++++++++++++---
 cmd/bd/import.go          |  11 +++-
 3 files changed, 153 insertions(+), 25 deletions(-)

diff --git a/cmd/bd/duplicates.go b/cmd/bd/duplicates.go
index f8a217f2..c12471de 100644
--- a/cmd/bd/duplicates.go
+++ b/cmd/bd/duplicates.go
@@ -75,11 +75,13 @@ Example:
 		}
 		// Count references for each issue
 		refCounts := countReferences(allIssues)
+		// Count structural relationships (children, dependencies) for duplicate groups
+		structuralScores := countStructuralRelationships(duplicateGroups)
 		// Prepare output
 		var mergeCommands []string
 		var mergeResults []map[string]interface{}
 		for _, group := range duplicateGroups {
-			target := chooseMergeTarget(group, refCounts)
+			target := chooseMergeTarget(group, refCounts, structuralScores)
 			sources := make([]string, 0, len(group)-1)
 			for _, issue := range group {
 				if issue.ID != target.ID {
@@ -110,7 +112,7 @@ Example:
 		if jsonOutput {
 			output := map[string]interface{}{
 				"duplicate_groups": len(duplicateGroups),
-				"groups":           formatDuplicateGroupsJSON(duplicateGroups, refCounts),
+				"groups":           formatDuplicateGroupsJSON(duplicateGroups, refCounts, structuralScores),
 			}
 			if autoMerge || dryRun {
 				output["merge_commands"] = mergeCommands
@@ -122,16 +124,20 @@ Example:
 		} else {
 			fmt.Printf("%s Found %d duplicate group(s):\n\n", ui.RenderWarn("🔍"), len(duplicateGroups))
 			for i, group := range duplicateGroups {
-				target := chooseMergeTarget(group, refCounts)
+				target := chooseMergeTarget(group, refCounts, structuralScores)
 				fmt.Printf("%s Group %d: %s\n", ui.RenderAccent("━━"), i+1, group[0].Title)
 				for _, issue := range group {
 					refs := refCounts[issue.ID]
+					depCount := 0
+					if score, ok := structuralScores[issue.ID]; ok {
+						depCount = score.dependentCount
+					}
 					marker := "  "
 					if issue.ID == target.ID {
 						marker = ui.RenderPass("→ ")
 					}
-					fmt.Printf("%s%s (%s, P%d, %d references)\n",
-						marker, issue.ID, issue.Status, issue.Priority, refs)
+					fmt.Printf("%s%s (%s, P%d, %d dependents, %d refs)\n",
+						marker, issue.ID, issue.Status, issue.Priority, depCount, refs)
 				}
 				sources := make([]string, 0, len(group)-1)
 				for _, issue := range group {
@@ -190,6 +196,13 @@ func findDuplicateGroups(issues []*types.Issue) [][]*types.Issue {
 	}
 	return duplicates
 }
+// issueScore captures all factors used to choose which duplicate to keep
+type issueScore struct {
+	dependentCount int // Issues that depend on this one (children, blocked-by) - highest priority
+	dependsOnCount int // Issues this one depends on
+	textRefs       int // Text mentions in other issues' descriptions/notes
+}
+
 // countReferences counts how many times each issue is referenced in text fields
 func countReferences(issues []*types.Issue) map[string]int {
 	counts := make(map[string]int)
@@ -211,36 +224,110 @@ func countReferences(issues []*types.Issue) map[string]int {
 	}
 	return counts
 }
+
+// countStructuralRelationships counts dependency relationships for issues in duplicate groups.
+// Uses the efficient GetDependencyCounts batch query.
+func countStructuralRelationships(groups [][]*types.Issue) map[string]*issueScore {
+	scores := make(map[string]*issueScore)
+	ctx := rootCtx
+
+	// Collect all issue IDs from all groups
+	var issueIDs []string
+	for _, group := range groups {
+		for _, issue := range group {
+			issueIDs = append(issueIDs, issue.ID)
+			scores[issue.ID] = &issueScore{}
+		}
+	}
+
+	// Batch query for dependency counts
+	depCounts, err := store.GetDependencyCounts(ctx, issueIDs)
+	if err != nil {
+		// On error, return empty scores - fallback to text refs only
+		return scores
+	}
+
+	// Populate scores from dependency counts
+	for id, counts := range depCounts {
+		if score, ok := scores[id]; ok {
+			score.dependentCount = counts.DependentCount // Issues that depend on this one (children, etc)
+			score.dependsOnCount = counts.DependencyCount
+		}
+	}
+
+	return scores
+}
 // chooseMergeTarget selects the best issue to merge into
-// Priority: highest reference count, then lexicographically smallest ID
-func chooseMergeTarget(group []*types.Issue, refCounts map[string]int) *types.Issue {
+// Priority order:
+// 1. Highest dependent count (children, blocked-by relationships) - most connected issue wins
+// 2. Highest text reference count (mentions in descriptions/notes)
+// 3. Lexicographically smallest ID (stable tiebreaker)
+func chooseMergeTarget(group []*types.Issue, refCounts map[string]int, structuralScores map[string]*issueScore) *types.Issue {
 	if len(group) == 0 {
 		return nil
 	}
+
+	getScore := func(id string) (int, int) {
+		depCount := 0
+		if score, ok := structuralScores[id]; ok {
+			depCount = score.dependentCount
+		}
+		textRefs := refCounts[id]
+		return depCount, textRefs
+	}
+
 	target := group[0]
-	targetRefs := refCounts[target.ID]
+	targetDeps, targetRefs := getScore(target.ID)
+
 	for _, issue := range group[1:] {
-		issueRefs := refCounts[issue.ID]
-		if issueRefs > targetRefs || (issueRefs == targetRefs && issue.ID < target.ID) {
+		issueDeps, issueRefs := getScore(issue.ID)
+
+		// Compare by dependent count first (children/blocked-by)
+		if issueDeps > targetDeps {
 			target = issue
-			targetRefs = issueRefs
+			targetDeps, targetRefs = issueDeps, issueRefs
+			continue
+		}
+		if issueDeps < targetDeps {
+			continue
+		}
+
+		// Equal dependent count - compare by text references
+		if issueRefs > targetRefs {
+			target = issue
+			targetDeps, targetRefs = issueDeps, issueRefs
+			continue
+		}
+		if issueRefs < targetRefs {
+			continue
+		}
+
+		// Equal on both - use lexicographically smallest ID as tiebreaker
+		if issue.ID < target.ID {
+			target = issue
+			targetDeps, targetRefs = issueDeps, issueRefs
 		}
 	}
 	return target
 }
 // formatDuplicateGroupsJSON formats duplicate groups for JSON output
-func formatDuplicateGroupsJSON(groups [][]*types.Issue, refCounts map[string]int) []map[string]interface{} {
+func formatDuplicateGroupsJSON(groups [][]*types.Issue, refCounts map[string]int, structuralScores map[string]*issueScore) []map[string]interface{} {
 	var result []map[string]interface{}
 	for _, group := range groups {
-		target := chooseMergeTarget(group, refCounts)
+		target := chooseMergeTarget(group, refCounts, structuralScores)
 		issues := make([]map[string]interface{}, len(group))
 		for i, issue := range group {
+			depCount := 0
+			if score, ok := structuralScores[issue.ID]; ok {
+				depCount = score.dependentCount
+			}
 			issues[i] = map[string]interface{}{
 				"id":              issue.ID,
 				"title":           issue.Title,
 				"status":          issue.Status,
 				"priority":        issue.Priority,
 				"references":      refCounts[issue.ID],
+				"dependents":      depCount,
 				"is_merge_target": issue.ID == target.ID,
 			}
 		}
diff --git a/cmd/bd/duplicates_test.go b/cmd/bd/duplicates_test.go
index b59c042f..36df246a 100644
--- a/cmd/bd/duplicates_test.go
+++ b/cmd/bd/duplicates_test.go
@@ -86,13 +86,14 @@ func TestFindDuplicateGroups(t *testing.T) {
 
 func TestChooseMergeTarget(t *testing.T) {
 	tests := []struct {
-		name      string
-		group     []*types.Issue
-		refCounts map[string]int
-		wantID    string
+		name             string
+		group            []*types.Issue
+		refCounts        map[string]int
+		structuralScores map[string]*issueScore
+		wantID           string
 	}{
 		{
-			name: "choose by reference count",
+			name: "choose by reference count when no structural data",
 			group: []*types.Issue{
 				{ID: "bd-2", Title: "Task"},
 				{ID: "bd-1", Title: "Task"},
@@ -101,7 +102,8 @@ func TestChooseMergeTarget(t *testing.T) {
 				"bd-1": 5,
 				"bd-2": 0,
 			},
-			wantID: "bd-1",
+			structuralScores: map[string]*issueScore{},
+			wantID:           "bd-1",
 		},
 		{
 			name: "choose by lexicographic order if same references",
@@ -113,7 +115,8 @@ func TestChooseMergeTarget(t *testing.T) {
 				"bd-1": 0,
 				"bd-2": 0,
 			},
-			wantID: "bd-1",
+			structuralScores: map[string]*issueScore{},
+			wantID:           "bd-1",
 		},
 		{
 			name: "prefer higher references even with larger ID",
@@ -125,13 +128,46 @@ func TestChooseMergeTarget(t *testing.T) {
 				"bd-1":   1,
 				"bd-100": 10,
 			},
-			wantID: "bd-100",
+			structuralScores: map[string]*issueScore{},
+			wantID:           "bd-100",
+		},
+		{
+			name: "prefer dependents over text references (GH#1022)",
+			group: []*types.Issue{
+				{ID: "HONEY-s2g1", Title: "P1 / Foundations"}, // Has 17 children
+				{ID: "HONEY-d0mw", Title: "P1 / Foundations"}, // Empty shell
+			},
+			refCounts: map[string]int{
+				"HONEY-s2g1": 0,
+				"HONEY-d0mw": 0,
+			},
+			structuralScores: map[string]*issueScore{
+				"HONEY-s2g1": {dependentCount: 17, dependsOnCount: 2, textRefs: 0},
+				"HONEY-d0mw": {dependentCount: 0, dependsOnCount: 0, textRefs: 0},
+			},
+			wantID: "HONEY-s2g1", // Should keep the one with children
+		},
+		{
+			name: "dependents beat text references",
+			group: []*types.Issue{
+				{ID: "bd-1", Title: "Task"}, // Has text refs but no deps
+				{ID: "bd-2", Title: "Task"}, // Has deps but no text refs
+			},
+			refCounts: map[string]int{
+				"bd-1": 100, // Lots of text references
+				"bd-2": 0,
+			},
+			structuralScores: map[string]*issueScore{
+				"bd-1": {dependentCount: 0, dependsOnCount: 0, textRefs: 100},
+				"bd-2": {dependentCount: 5, dependsOnCount: 0, textRefs: 0}, // 5 children/dependents
+			},
+			wantID: "bd-2", // Dependents take priority
 		},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			target := chooseMergeTarget(tt.group, tt.refCounts)
+			target := chooseMergeTarget(tt.group, tt.refCounts, tt.structuralScores)
 			if target.ID != tt.wantID {
 				t.Errorf("chooseMergeTarget() = %v, want %v", target.ID, tt.wantID)
 			}
diff --git a/cmd/bd/import.go b/cmd/bd/import.go
index f13e0e04..b132a36b 100644
--- a/cmd/bd/import.go
+++ b/cmd/bd/import.go
@@ -466,21 +466,26 @@ NOTE: Import requires direct database access and does not work with daemon mode.
 			}
 
 			refCounts := countReferences(allIssues)
+			structuralScores := countStructuralRelationships(duplicateGroups)
 
 			fmt.Fprintf(os.Stderr, "Found %d duplicate group(s)\n\n", len(duplicateGroups))
 
 			for i, group := range duplicateGroups {
-				target := chooseMergeTarget(group, refCounts)
+				target := chooseMergeTarget(group, refCounts, structuralScores)
 				fmt.Fprintf(os.Stderr, "Group %d: %s\n", i+1, group[0].Title)
 
 				for _, issue := range group {
 					refs := refCounts[issue.ID]
+					depCount := 0
+					if score, ok := structuralScores[issue.ID]; ok {
+						depCount = score.dependentCount
+					}
 					marker := "  "
 					if issue.ID == target.ID {
 						marker = "→ "
 					}
-					fmt.Fprintf(os.Stderr, "  %s%s (%s, P%d, %d refs)\n",
-						marker, issue.ID, issue.Status, issue.Priority, refs)
+					fmt.Fprintf(os.Stderr, "  %s%s (%s, P%d, %d dependents, %d refs)\n",
+						marker, issue.ID, issue.Status, issue.Priority, depCount, refs)
 				}
 
 				sources := make([]string, 0, len(group)-1)