Files
beads/cmd/bd/duplicates.go
lydia feed888b57 fix(duplicates): use combined weight (dependents + dependencies) for merge target selection (GH#1022)
When choosing which duplicate to keep, the merge target now considers
both dependentCount (children/blocked-by) AND dependsOnCount (dependencies).
This ensures issues with ANY structural connections are preferred over
empty shells, rather than only considering children.

- Updated chooseMergeTarget to calculate weight = dependentCount + dependsOnCount
- Updated display output to show weight instead of just dependents
- Updated JSON output to include dependencies and weight fields
- Added tests for dependsOnCount inclusion and combined weight calculation

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-17 03:44:13 -08:00

404 lines
13 KiB
Go

package main
import (
"fmt"
"os"
"regexp"
"strings"
"github.com/spf13/cobra"
"github.com/steveyegge/beads/internal/types"
"github.com/steveyegge/beads/internal/ui"
)
var duplicatesCmd = &cobra.Command{
Use: "duplicates",
GroupID: "deps",
Short: "Find and optionally merge duplicate issues",
Long: `Find issues with identical content (title, description, design, acceptance criteria).
Groups issues by content hash and reports duplicates with suggested merge targets.
The merge target is chosen by:
1. Reference count (most referenced issue wins)
2. Lexicographically smallest ID if reference counts are equal
Only groups issues with matching status (open with open, closed with closed).
Example:
bd duplicates # Show all duplicate groups
bd duplicates --auto-merge # Automatically merge all duplicates
bd duplicates --dry-run # Show what would be merged`,
Run: func(cmd *cobra.Command, _ []string) {
autoMerge, _ := cmd.Flags().GetBool("auto-merge")
dryRun, _ := cmd.Flags().GetBool("dry-run")
// Block writes in readonly mode (merging modifies data)
if autoMerge && !dryRun {
CheckReadonly("duplicates --auto-merge")
}
// Check daemon mode - not supported yet (merge command limitation)
if daemonClient != nil {
fmt.Fprintf(os.Stderr, "Error: duplicates command not yet supported in daemon mode (see bd-190)\n")
fmt.Fprintf(os.Stderr, "Use: bd --no-daemon duplicates\n")
os.Exit(1)
}
// Use global jsonOutput set by PersistentPreRun
ctx := rootCtx
// Check database freshness before reading (bd-2q6d, bd-c4rq)
// Skip check when using daemon (daemon auto-imports on staleness)
if daemonClient == nil {
if err := ensureDatabaseFresh(ctx); err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1)
}
}
// Get all issues
allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
if err != nil {
fmt.Fprintf(os.Stderr, "Error fetching issues: %v\n", err)
os.Exit(1)
}
// Filter out closed issues - they're done, no point detecting duplicates
openIssues := make([]*types.Issue, 0, len(allIssues))
for _, issue := range allIssues {
if issue.Status != types.StatusClosed {
openIssues = append(openIssues, issue)
}
}
// Find duplicates (only among open issues)
duplicateGroups := findDuplicateGroups(openIssues)
if len(duplicateGroups) == 0 {
if !jsonOutput {
fmt.Println("No duplicates found!")
} else {
outputJSON(map[string]interface{}{
"duplicate_groups": 0,
"groups": []interface{}{},
})
}
return
}
// Count references for each issue
refCounts := countReferences(allIssues)
// Count structural relationships (children, dependencies) for duplicate groups
structuralScores := countStructuralRelationships(duplicateGroups)
// Prepare output
var mergeCommands []string
var mergeResults []map[string]interface{}
for _, group := range duplicateGroups {
target := chooseMergeTarget(group, refCounts, structuralScores)
sources := make([]string, 0, len(group)-1)
for _, issue := range group {
if issue.ID != target.ID {
sources = append(sources, issue.ID)
}
}
// Generate actionable command suggestion
cmd := fmt.Sprintf("# Duplicate: %s (same content as %s)\n# Suggested action: bd close %s && bd dep add %s %s --type related",
strings.Join(sources, " "),
target.ID,
strings.Join(sources, " "),
strings.Join(sources, " "),
target.ID)
mergeCommands = append(mergeCommands, cmd)
if autoMerge || dryRun {
if !dryRun {
result := performMerge(target.ID, sources)
mergeResults = append(mergeResults, result)
}
}
}
// Mark dirty if we performed merges
if autoMerge && !dryRun && len(mergeCommands) > 0 {
markDirtyAndScheduleFlush()
}
// Output results
if jsonOutput {
output := map[string]interface{}{
"duplicate_groups": len(duplicateGroups),
"groups": formatDuplicateGroupsJSON(duplicateGroups, refCounts, structuralScores),
}
if autoMerge || dryRun {
output["merge_commands"] = mergeCommands
if autoMerge && !dryRun {
output["merge_results"] = mergeResults
}
}
outputJSON(output)
} else {
fmt.Printf("%s Found %d duplicate group(s):\n\n", ui.RenderWarn("🔍"), len(duplicateGroups))
for i, group := range duplicateGroups {
target := chooseMergeTarget(group, refCounts, structuralScores)
fmt.Printf("%s Group %d: %s\n", ui.RenderAccent("━━"), i+1, group[0].Title)
for _, issue := range group {
refs := refCounts[issue.ID]
weight := 0
if score, ok := structuralScores[issue.ID]; ok {
weight = score.dependentCount + score.dependsOnCount
}
marker := " "
if issue.ID == target.ID {
marker = ui.RenderPass("→ ")
}
fmt.Printf("%s%s (%s, P%d, weight=%d, %d refs)\n",
marker, issue.ID, issue.Status, issue.Priority, weight, refs)
}
sources := make([]string, 0, len(group)-1)
for _, issue := range group {
if issue.ID != target.ID {
sources = append(sources, issue.ID)
}
}
fmt.Printf(" %s Duplicate: %s (same content as %s)\n", ui.RenderAccent("Note:"), strings.Join(sources, " "), target.ID)
fmt.Printf(" %s bd close %s && bd dep add %s %s --type related\n\n",
ui.RenderAccent("Suggested:"), strings.Join(sources, " "), strings.Join(sources, " "), target.ID)
}
if autoMerge {
if dryRun {
fmt.Printf("%s Dry run - would execute %d merge(s)\n", ui.RenderWarn("⚠"), len(mergeCommands))
} else {
fmt.Printf("%s Merged %d group(s)\n", ui.RenderPass("✓"), len(mergeCommands))
}
} else {
fmt.Printf("%s Run with --auto-merge to execute all suggested merges\n", ui.RenderAccent("💡"))
}
}
},
}
func init() {
duplicatesCmd.Flags().Bool("auto-merge", false, "Automatically merge all duplicates")
duplicatesCmd.Flags().Bool("dry-run", false, "Show what would be merged without making changes")
rootCmd.AddCommand(duplicatesCmd)
}
// contentKey represents the fields we use to identify duplicate issues
type contentKey struct {
title string
description string
design string
acceptanceCriteria string
status string // Only group issues with same status
}
// findDuplicateGroups groups issues by content hash
func findDuplicateGroups(issues []*types.Issue) [][]*types.Issue {
groups := make(map[contentKey][]*types.Issue)
for _, issue := range issues {
key := contentKey{
title: issue.Title,
description: issue.Description,
design: issue.Design,
acceptanceCriteria: issue.AcceptanceCriteria,
status: string(issue.Status),
}
groups[key] = append(groups[key], issue)
}
// Filter to only groups with duplicates
var duplicates [][]*types.Issue
for _, group := range groups {
if len(group) > 1 {
duplicates = append(duplicates, group)
}
}
return duplicates
}
// issueScore captures all factors used to choose which duplicate to keep
type issueScore struct {
dependentCount int // Issues that depend on this one (children, blocked-by) - highest priority
dependsOnCount int // Issues this one depends on
textRefs int // Text mentions in other issues' descriptions/notes
}
// countReferences counts how many times each issue is referenced in text fields
func countReferences(issues []*types.Issue) map[string]int {
counts := make(map[string]int)
idPattern := regexp.MustCompile(`\b[a-zA-Z][-a-zA-Z0-9]*-\d+\b`)
for _, issue := range issues {
// Search in all text fields
textFields := []string{
issue.Description,
issue.Design,
issue.AcceptanceCriteria,
issue.Notes,
}
for _, text := range textFields {
matches := idPattern.FindAllString(text, -1)
for _, match := range matches {
counts[match]++
}
}
}
return counts
}
// countStructuralRelationships counts dependency relationships for issues in duplicate groups.
// Uses the efficient GetDependencyCounts batch query.
func countStructuralRelationships(groups [][]*types.Issue) map[string]*issueScore {
scores := make(map[string]*issueScore)
ctx := rootCtx
// Collect all issue IDs from all groups
var issueIDs []string
for _, group := range groups {
for _, issue := range group {
issueIDs = append(issueIDs, issue.ID)
scores[issue.ID] = &issueScore{}
}
}
// Batch query for dependency counts
depCounts, err := store.GetDependencyCounts(ctx, issueIDs)
if err != nil {
// On error, return empty scores - fallback to text refs only
return scores
}
// Populate scores from dependency counts
for id, counts := range depCounts {
if score, ok := scores[id]; ok {
score.dependentCount = counts.DependentCount // Issues that depend on this one (children, etc)
score.dependsOnCount = counts.DependencyCount
}
}
return scores
}
// chooseMergeTarget selects the best issue to merge into
// Priority order:
// 1. Highest structural weight (dependents + dependencies) - most connected issue wins
// 2. Highest text reference count (mentions in descriptions/notes)
// 3. Lexicographically smallest ID (stable tiebreaker)
func chooseMergeTarget(group []*types.Issue, refCounts map[string]int, structuralScores map[string]*issueScore) *types.Issue {
if len(group) == 0 {
return nil
}
getScore := func(id string) (int, int) {
weight := 0
if score, ok := structuralScores[id]; ok {
// Weight = children/dependents + dependencies
// An issue with ANY structural connections should be preferred over an empty shell
weight = score.dependentCount + score.dependsOnCount
}
textRefs := refCounts[id]
return weight, textRefs
}
target := group[0]
targetWeight, targetRefs := getScore(target.ID)
for _, issue := range group[1:] {
issueWeight, issueRefs := getScore(issue.ID)
// Compare by structural weight first (dependents + dependencies)
if issueWeight > targetWeight {
target = issue
targetWeight, targetRefs = issueWeight, issueRefs
continue
}
if issueWeight < targetWeight {
continue
}
// Equal weight - compare by text references
if issueRefs > targetRefs {
target = issue
targetWeight, targetRefs = issueWeight, issueRefs
continue
}
if issueRefs < targetRefs {
continue
}
// Equal on both - use lexicographically smallest ID as tiebreaker
if issue.ID < target.ID {
target = issue
targetWeight, targetRefs = issueWeight, issueRefs
}
}
return target
}
// formatDuplicateGroupsJSON formats duplicate groups for JSON output
func formatDuplicateGroupsJSON(groups [][]*types.Issue, refCounts map[string]int, structuralScores map[string]*issueScore) []map[string]interface{} {
var result []map[string]interface{}
for _, group := range groups {
target := chooseMergeTarget(group, refCounts, structuralScores)
issues := make([]map[string]interface{}, len(group))
for i, issue := range group {
dependents := 0
dependencies := 0
if score, ok := structuralScores[issue.ID]; ok {
dependents = score.dependentCount
dependencies = score.dependsOnCount
}
issues[i] = map[string]interface{}{
"id": issue.ID,
"title": issue.Title,
"status": issue.Status,
"priority": issue.Priority,
"references": refCounts[issue.ID],
"dependents": dependents,
"dependencies": dependencies,
"weight": dependents + dependencies,
"is_merge_target": issue.ID == target.ID,
}
}
sources := make([]string, 0, len(group)-1)
for _, issue := range group {
if issue.ID != target.ID {
sources = append(sources, issue.ID)
}
}
result = append(result, map[string]interface{}{
"title": group[0].Title,
"issues": issues,
"suggested_target": target.ID,
"suggested_sources": sources,
"suggested_action": fmt.Sprintf("bd close %s && bd dep add %s %s --type related", strings.Join(sources, " "), strings.Join(sources, " "), target.ID),
"note": fmt.Sprintf("Duplicate: %s (same content as %s)", strings.Join(sources, " "), target.ID),
})
}
return result
}
// performMerge executes the merge operation:
// 1. Closes all source issues with a reason indicating they are duplicates
// 2. Links each source to the target with a "related" dependency
// Returns a map with the merge result for JSON output
func performMerge(targetID string, sourceIDs []string) map[string]interface{} {
ctx := rootCtx
result := map[string]interface{}{
"target": targetID,
"sources": sourceIDs,
"closed": []string{},
"linked": []string{},
"errors": []string{},
}
closedIDs := []string{}
linkedIDs := []string{}
errors := []string{}
for _, sourceID := range sourceIDs {
// Close the duplicate issue
reason := fmt.Sprintf("Duplicate of %s", targetID)
if err := store.CloseIssue(ctx, sourceID, reason, actor, ""); err != nil {
errors = append(errors, fmt.Sprintf("failed to close %s: %v", sourceID, err))
continue
}
closedIDs = append(closedIDs, sourceID)
// Add dependency linking source to target
dep := &types.Dependency{
IssueID: sourceID,
DependsOnID: targetID,
Type: types.DependencyType("related"),
}
if err := store.AddDependency(ctx, dep, actor); err != nil {
errors = append(errors, fmt.Sprintf("failed to link %s to %s: %v", sourceID, targetID, err))
continue
}
linkedIDs = append(linkedIDs, sourceID)
}
result["closed"] = closedIDs
result["linked"] = linkedIDs
result["errors"] = errors
return result
}