From 1c1dabcfdd71c480b4f5b421b67bb67e449cbf10 Mon Sep 17 00:00:00 2001 From: emma Date: Mon, 12 Jan 2026 00:45:25 -0800 Subject: [PATCH] fix(duplicates): prefer issues with children/deps when choosing merge target (GH#1022) The duplicate merge target selection now considers structural relationships: 1. Dependent count (children, blocked-by) - highest priority 2. Text reference count - secondary 3. Lexicographically smallest ID - tiebreaker This fixes the bug where `bd duplicates --auto-merge` would suggest closing an epic with 17 children instead of the empty shell duplicate. Co-Authored-By: Claude Opus 4.5 --- cmd/bd/duplicates.go | 113 +++++++++++++++++++++++++++++++++----- cmd/bd/duplicates_test.go | 54 +++++++++++++++--- cmd/bd/import.go | 11 +++- 3 files changed, 153 insertions(+), 25 deletions(-) diff --git a/cmd/bd/duplicates.go b/cmd/bd/duplicates.go index f8a217f2..c12471de 100644 --- a/cmd/bd/duplicates.go +++ b/cmd/bd/duplicates.go @@ -75,11 +75,13 @@ Example: } // Count references for each issue refCounts := countReferences(allIssues) + // Count structural relationships (children, dependencies) for duplicate groups + structuralScores := countStructuralRelationships(duplicateGroups) // Prepare output var mergeCommands []string var mergeResults []map[string]interface{} for _, group := range duplicateGroups { - target := chooseMergeTarget(group, refCounts) + target := chooseMergeTarget(group, refCounts, structuralScores) sources := make([]string, 0, len(group)-1) for _, issue := range group { if issue.ID != target.ID { @@ -110,7 +112,7 @@ Example: if jsonOutput { output := map[string]interface{}{ "duplicate_groups": len(duplicateGroups), - "groups": formatDuplicateGroupsJSON(duplicateGroups, refCounts), + "groups": formatDuplicateGroupsJSON(duplicateGroups, refCounts, structuralScores), } if autoMerge || dryRun { output["merge_commands"] = mergeCommands @@ -122,16 +124,20 @@ Example: } else { fmt.Printf("%s Found %d duplicate group(s):\n\n", ui.RenderWarn("🔍"), len(duplicateGroups)) for i, group := range duplicateGroups { - target := chooseMergeTarget(group, refCounts) + target := chooseMergeTarget(group, refCounts, structuralScores) fmt.Printf("%s Group %d: %s\n", ui.RenderAccent("━━"), i+1, group[0].Title) for _, issue := range group { refs := refCounts[issue.ID] + depCount := 0 + if score, ok := structuralScores[issue.ID]; ok { + depCount = score.dependentCount + } marker := " " if issue.ID == target.ID { marker = ui.RenderPass("→ ") } - fmt.Printf("%s%s (%s, P%d, %d references)\n", - marker, issue.ID, issue.Status, issue.Priority, refs) + fmt.Printf("%s%s (%s, P%d, %d dependents, %d refs)\n", + marker, issue.ID, issue.Status, issue.Priority, depCount, refs) } sources := make([]string, 0, len(group)-1) for _, issue := range group { @@ -190,6 +196,13 @@ func findDuplicateGroups(issues []*types.Issue) [][]*types.Issue { } return duplicates } +// issueScore captures all factors used to choose which duplicate to keep +type issueScore struct { + dependentCount int // Issues that depend on this one (children, blocked-by) - highest priority + dependsOnCount int // Issues this one depends on + textRefs int // Text mentions in other issues' descriptions/notes +} + // countReferences counts how many times each issue is referenced in text fields func countReferences(issues []*types.Issue) map[string]int { counts := make(map[string]int) @@ -211,36 +224,110 @@ func countReferences(issues []*types.Issue) map[string]int { } return counts } + +// countStructuralRelationships counts dependency relationships for issues in duplicate groups. +// Uses the efficient GetDependencyCounts batch query. +func countStructuralRelationships(groups [][]*types.Issue) map[string]*issueScore { + scores := make(map[string]*issueScore) + ctx := rootCtx + + // Collect all issue IDs from all groups + var issueIDs []string + for _, group := range groups { + for _, issue := range group { + issueIDs = append(issueIDs, issue.ID) + scores[issue.ID] = &issueScore{} + } + } + + // Batch query for dependency counts + depCounts, err := store.GetDependencyCounts(ctx, issueIDs) + if err != nil { + // On error, return empty scores - fallback to text refs only + return scores + } + + // Populate scores from dependency counts + for id, counts := range depCounts { + if score, ok := scores[id]; ok { + score.dependentCount = counts.DependentCount // Issues that depend on this one (children, etc) + score.dependsOnCount = counts.DependencyCount + } + } + + return scores +} // chooseMergeTarget selects the best issue to merge into -// Priority: highest reference count, then lexicographically smallest ID -func chooseMergeTarget(group []*types.Issue, refCounts map[string]int) *types.Issue { +// Priority order: +// 1. Highest dependent count (children, blocked-by relationships) - most connected issue wins +// 2. Highest text reference count (mentions in descriptions/notes) +// 3. Lexicographically smallest ID (stable tiebreaker) +func chooseMergeTarget(group []*types.Issue, refCounts map[string]int, structuralScores map[string]*issueScore) *types.Issue { if len(group) == 0 { return nil } + + getScore := func(id string) (int, int) { + depCount := 0 + if score, ok := structuralScores[id]; ok { + depCount = score.dependentCount + } + textRefs := refCounts[id] + return depCount, textRefs + } + target := group[0] - targetRefs := refCounts[target.ID] + targetDeps, targetRefs := getScore(target.ID) + for _, issue := range group[1:] { - issueRefs := refCounts[issue.ID] - if issueRefs > targetRefs || (issueRefs == targetRefs && issue.ID < target.ID) { + issueDeps, issueRefs := getScore(issue.ID) + + // Compare by dependent count first (children/blocked-by) + if issueDeps > targetDeps { target = issue - targetRefs = issueRefs + targetDeps, targetRefs = issueDeps, issueRefs + continue + } + if issueDeps < targetDeps { + continue + } + + // Equal dependent count - compare by text references + if issueRefs > targetRefs { + target = issue + targetDeps, targetRefs = issueDeps, issueRefs + continue + } + if issueRefs < targetRefs { + continue + } + + // Equal on both - use lexicographically smallest ID as tiebreaker + if issue.ID < target.ID { + target = issue + targetDeps, targetRefs = issueDeps, issueRefs } } return target } // formatDuplicateGroupsJSON formats duplicate groups for JSON output -func formatDuplicateGroupsJSON(groups [][]*types.Issue, refCounts map[string]int) []map[string]interface{} { +func formatDuplicateGroupsJSON(groups [][]*types.Issue, refCounts map[string]int, structuralScores map[string]*issueScore) []map[string]interface{} { var result []map[string]interface{} for _, group := range groups { - target := chooseMergeTarget(group, refCounts) + target := chooseMergeTarget(group, refCounts, structuralScores) issues := make([]map[string]interface{}, len(group)) for i, issue := range group { + depCount := 0 + if score, ok := structuralScores[issue.ID]; ok { + depCount = score.dependentCount + } issues[i] = map[string]interface{}{ "id": issue.ID, "title": issue.Title, "status": issue.Status, "priority": issue.Priority, "references": refCounts[issue.ID], + "dependents": depCount, "is_merge_target": issue.ID == target.ID, } } diff --git a/cmd/bd/duplicates_test.go b/cmd/bd/duplicates_test.go index b59c042f..36df246a 100644 --- a/cmd/bd/duplicates_test.go +++ b/cmd/bd/duplicates_test.go @@ -86,13 +86,14 @@ func TestFindDuplicateGroups(t *testing.T) { func TestChooseMergeTarget(t *testing.T) { tests := []struct { - name string - group []*types.Issue - refCounts map[string]int - wantID string + name string + group []*types.Issue + refCounts map[string]int + structuralScores map[string]*issueScore + wantID string }{ { - name: "choose by reference count", + name: "choose by reference count when no structural data", group: []*types.Issue{ {ID: "bd-2", Title: "Task"}, {ID: "bd-1", Title: "Task"}, @@ -101,7 +102,8 @@ func TestChooseMergeTarget(t *testing.T) { "bd-1": 5, "bd-2": 0, }, - wantID: "bd-1", + structuralScores: map[string]*issueScore{}, + wantID: "bd-1", }, { name: "choose by lexicographic order if same references", @@ -113,7 +115,8 @@ func TestChooseMergeTarget(t *testing.T) { "bd-1": 0, "bd-2": 0, }, - wantID: "bd-1", + structuralScores: map[string]*issueScore{}, + wantID: "bd-1", }, { name: "prefer higher references even with larger ID", @@ -125,13 +128,46 @@ func TestChooseMergeTarget(t *testing.T) { "bd-1": 1, "bd-100": 10, }, - wantID: "bd-100", + structuralScores: map[string]*issueScore{}, + wantID: "bd-100", + }, + { + name: "prefer dependents over text references (GH#1022)", + group: []*types.Issue{ + {ID: "HONEY-s2g1", Title: "P1 / Foundations"}, // Has 17 children + {ID: "HONEY-d0mw", Title: "P1 / Foundations"}, // Empty shell + }, + refCounts: map[string]int{ + "HONEY-s2g1": 0, + "HONEY-d0mw": 0, + }, + structuralScores: map[string]*issueScore{ + "HONEY-s2g1": {dependentCount: 17, dependsOnCount: 2, textRefs: 0}, + "HONEY-d0mw": {dependentCount: 0, dependsOnCount: 0, textRefs: 0}, + }, + wantID: "HONEY-s2g1", // Should keep the one with children + }, + { + name: "dependents beat text references", + group: []*types.Issue{ + {ID: "bd-1", Title: "Task"}, // Has text refs but no deps + {ID: "bd-2", Title: "Task"}, // Has deps but no text refs + }, + refCounts: map[string]int{ + "bd-1": 100, // Lots of text references + "bd-2": 0, + }, + structuralScores: map[string]*issueScore{ + "bd-1": {dependentCount: 0, dependsOnCount: 0, textRefs: 100}, + "bd-2": {dependentCount: 5, dependsOnCount: 0, textRefs: 0}, // 5 children/dependents + }, + wantID: "bd-2", // Dependents take priority }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - target := chooseMergeTarget(tt.group, tt.refCounts) + target := chooseMergeTarget(tt.group, tt.refCounts, tt.structuralScores) if target.ID != tt.wantID { t.Errorf("chooseMergeTarget() = %v, want %v", target.ID, tt.wantID) } diff --git a/cmd/bd/import.go b/cmd/bd/import.go index f13e0e04..b132a36b 100644 --- a/cmd/bd/import.go +++ b/cmd/bd/import.go @@ -466,21 +466,26 @@ NOTE: Import requires direct database access and does not work with daemon mode. } refCounts := countReferences(allIssues) + structuralScores := countStructuralRelationships(duplicateGroups) fmt.Fprintf(os.Stderr, "Found %d duplicate group(s)\n\n", len(duplicateGroups)) for i, group := range duplicateGroups { - target := chooseMergeTarget(group, refCounts) + target := chooseMergeTarget(group, refCounts, structuralScores) fmt.Fprintf(os.Stderr, "Group %d: %s\n", i+1, group[0].Title) for _, issue := range group { refs := refCounts[issue.ID] + depCount := 0 + if score, ok := structuralScores[issue.ID]; ok { + depCount = score.dependentCount + } marker := " " if issue.ID == target.ID { marker = "→ " } - fmt.Fprintf(os.Stderr, " %s%s (%s, P%d, %d refs)\n", - marker, issue.ID, issue.Status, issue.Priority, refs) + fmt.Fprintf(os.Stderr, " %s%s (%s, P%d, %d dependents, %d refs)\n", + marker, issue.ID, issue.Status, issue.Priority, depCount, refs) } sources := make([]string, 0, len(group)-1)