package main import ( "fmt" "os" "regexp" "strings" "github.com/spf13/cobra" "github.com/steveyegge/beads/internal/types" "github.com/steveyegge/beads/internal/ui" ) var duplicatesCmd = &cobra.Command{ Use: "duplicates", GroupID: "deps", Short: "Find and optionally merge duplicate issues", Long: `Find issues with identical content (title, description, design, acceptance criteria). Groups issues by content hash and reports duplicates with suggested merge targets. The merge target is chosen by: 1. Reference count (most referenced issue wins) 2. Lexicographically smallest ID if reference counts are equal Only groups issues with matching status (open with open, closed with closed). Example: bd duplicates # Show all duplicate groups bd duplicates --auto-merge # Automatically merge all duplicates bd duplicates --dry-run # Show what would be merged`, Run: func(cmd *cobra.Command, _ []string) { autoMerge, _ := cmd.Flags().GetBool("auto-merge") dryRun, _ := cmd.Flags().GetBool("dry-run") // Block writes in readonly mode (merging modifies data) if autoMerge && !dryRun { CheckReadonly("duplicates --auto-merge") } // Check daemon mode - not supported yet (merge command limitation) if daemonClient != nil { fmt.Fprintf(os.Stderr, "Error: duplicates command not yet supported in daemon mode (see bd-190)\n") fmt.Fprintf(os.Stderr, "Use: bd --no-daemon duplicates\n") os.Exit(1) } // Use global jsonOutput set by PersistentPreRun ctx := rootCtx // Check database freshness before reading (bd-2q6d, bd-c4rq) // Skip check when using daemon (daemon auto-imports on staleness) if daemonClient == nil { if err := ensureDatabaseFresh(ctx); err != nil { fmt.Fprintf(os.Stderr, "Error: %v\n", err) os.Exit(1) } } // Get all issues allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{}) if err != nil { fmt.Fprintf(os.Stderr, "Error fetching issues: %v\n", err) os.Exit(1) } // Filter out closed issues - they're done, no point detecting duplicates openIssues := make([]*types.Issue, 0, len(allIssues)) for _, issue := range allIssues { if issue.Status != types.StatusClosed { openIssues = append(openIssues, issue) } } // Find duplicates (only among open issues) duplicateGroups := findDuplicateGroups(openIssues) if len(duplicateGroups) == 0 { if !jsonOutput { fmt.Println("No duplicates found!") } else { outputJSON(map[string]interface{}{ "duplicate_groups": 0, "groups": []interface{}{}, }) } return } // Count references for each issue refCounts := countReferences(allIssues) // Count structural relationships (children, dependencies) for duplicate groups structuralScores := countStructuralRelationships(duplicateGroups) // Prepare output var mergeCommands []string var mergeResults []map[string]interface{} for _, group := range duplicateGroups { target := chooseMergeTarget(group, refCounts, structuralScores) sources := make([]string, 0, len(group)-1) for _, issue := range group { if issue.ID != target.ID { sources = append(sources, issue.ID) } } // Generate actionable command suggestion cmd := fmt.Sprintf("# Duplicate: %s (same content as %s)\n# Suggested action: bd close %s && bd dep add %s %s --type related", strings.Join(sources, " "), target.ID, strings.Join(sources, " "), strings.Join(sources, " "), target.ID) mergeCommands = append(mergeCommands, cmd) if autoMerge || dryRun { if !dryRun { result := performMerge(target.ID, sources) mergeResults = append(mergeResults, result) } } } // Mark dirty if we performed merges if autoMerge && !dryRun && len(mergeCommands) > 0 { markDirtyAndScheduleFlush() } // Output results if jsonOutput { output := map[string]interface{}{ "duplicate_groups": len(duplicateGroups), "groups": formatDuplicateGroupsJSON(duplicateGroups, refCounts, structuralScores), } if autoMerge || dryRun { output["merge_commands"] = mergeCommands if autoMerge && !dryRun { output["merge_results"] = mergeResults } } outputJSON(output) } else { fmt.Printf("%s Found %d duplicate group(s):\n\n", ui.RenderWarn("🔍"), len(duplicateGroups)) for i, group := range duplicateGroups { target := chooseMergeTarget(group, refCounts, structuralScores) fmt.Printf("%s Group %d: %s\n", ui.RenderAccent("━━"), i+1, group[0].Title) for _, issue := range group { refs := refCounts[issue.ID] weight := 0 if score, ok := structuralScores[issue.ID]; ok { weight = score.dependentCount + score.dependsOnCount } marker := " " if issue.ID == target.ID { marker = ui.RenderPass("→ ") } fmt.Printf("%s%s (%s, P%d, weight=%d, %d refs)\n", marker, issue.ID, issue.Status, issue.Priority, weight, refs) } sources := make([]string, 0, len(group)-1) for _, issue := range group { if issue.ID != target.ID { sources = append(sources, issue.ID) } } fmt.Printf(" %s Duplicate: %s (same content as %s)\n", ui.RenderAccent("Note:"), strings.Join(sources, " "), target.ID) fmt.Printf(" %s bd close %s && bd dep add %s %s --type related\n\n", ui.RenderAccent("Suggested:"), strings.Join(sources, " "), strings.Join(sources, " "), target.ID) } if autoMerge { if dryRun { fmt.Printf("%s Dry run - would execute %d merge(s)\n", ui.RenderWarn("⚠"), len(mergeCommands)) } else { fmt.Printf("%s Merged %d group(s)\n", ui.RenderPass("✓"), len(mergeCommands)) } } else { fmt.Printf("%s Run with --auto-merge to execute all suggested merges\n", ui.RenderAccent("💡")) } } }, } func init() { duplicatesCmd.Flags().Bool("auto-merge", false, "Automatically merge all duplicates") duplicatesCmd.Flags().Bool("dry-run", false, "Show what would be merged without making changes") rootCmd.AddCommand(duplicatesCmd) } // contentKey represents the fields we use to identify duplicate issues type contentKey struct { title string description string design string acceptanceCriteria string status string // Only group issues with same status } // findDuplicateGroups groups issues by content hash func findDuplicateGroups(issues []*types.Issue) [][]*types.Issue { groups := make(map[contentKey][]*types.Issue) for _, issue := range issues { key := contentKey{ title: issue.Title, description: issue.Description, design: issue.Design, acceptanceCriteria: issue.AcceptanceCriteria, status: string(issue.Status), } groups[key] = append(groups[key], issue) } // Filter to only groups with duplicates var duplicates [][]*types.Issue for _, group := range groups { if len(group) > 1 { duplicates = append(duplicates, group) } } return duplicates } // issueScore captures all factors used to choose which duplicate to keep type issueScore struct { dependentCount int // Issues that depend on this one (children, blocked-by) - highest priority dependsOnCount int // Issues this one depends on textRefs int // Text mentions in other issues' descriptions/notes } // countReferences counts how many times each issue is referenced in text fields func countReferences(issues []*types.Issue) map[string]int { counts := make(map[string]int) idPattern := regexp.MustCompile(`\b[a-zA-Z][-a-zA-Z0-9]*-\d+\b`) for _, issue := range issues { // Search in all text fields textFields := []string{ issue.Description, issue.Design, issue.AcceptanceCriteria, issue.Notes, } for _, text := range textFields { matches := idPattern.FindAllString(text, -1) for _, match := range matches { counts[match]++ } } } return counts } // countStructuralRelationships counts dependency relationships for issues in duplicate groups. // Uses the efficient GetDependencyCounts batch query. func countStructuralRelationships(groups [][]*types.Issue) map[string]*issueScore { scores := make(map[string]*issueScore) ctx := rootCtx // Collect all issue IDs from all groups var issueIDs []string for _, group := range groups { for _, issue := range group { issueIDs = append(issueIDs, issue.ID) scores[issue.ID] = &issueScore{} } } // Batch query for dependency counts depCounts, err := store.GetDependencyCounts(ctx, issueIDs) if err != nil { // On error, return empty scores - fallback to text refs only return scores } // Populate scores from dependency counts for id, counts := range depCounts { if score, ok := scores[id]; ok { score.dependentCount = counts.DependentCount // Issues that depend on this one (children, etc) score.dependsOnCount = counts.DependencyCount } } return scores } // chooseMergeTarget selects the best issue to merge into // Priority order: // 1. Highest structural weight (dependents + dependencies) - most connected issue wins // 2. Highest text reference count (mentions in descriptions/notes) // 3. Lexicographically smallest ID (stable tiebreaker) func chooseMergeTarget(group []*types.Issue, refCounts map[string]int, structuralScores map[string]*issueScore) *types.Issue { if len(group) == 0 { return nil } getScore := func(id string) (int, int) { weight := 0 if score, ok := structuralScores[id]; ok { // Weight = children/dependents + dependencies // An issue with ANY structural connections should be preferred over an empty shell weight = score.dependentCount + score.dependsOnCount } textRefs := refCounts[id] return weight, textRefs } target := group[0] targetWeight, targetRefs := getScore(target.ID) for _, issue := range group[1:] { issueWeight, issueRefs := getScore(issue.ID) // Compare by structural weight first (dependents + dependencies) if issueWeight > targetWeight { target = issue targetWeight, targetRefs = issueWeight, issueRefs continue } if issueWeight < targetWeight { continue } // Equal weight - compare by text references if issueRefs > targetRefs { target = issue targetWeight, targetRefs = issueWeight, issueRefs continue } if issueRefs < targetRefs { continue } // Equal on both - use lexicographically smallest ID as tiebreaker if issue.ID < target.ID { target = issue targetWeight, targetRefs = issueWeight, issueRefs } } return target } // formatDuplicateGroupsJSON formats duplicate groups for JSON output func formatDuplicateGroupsJSON(groups [][]*types.Issue, refCounts map[string]int, structuralScores map[string]*issueScore) []map[string]interface{} { var result []map[string]interface{} for _, group := range groups { target := chooseMergeTarget(group, refCounts, structuralScores) issues := make([]map[string]interface{}, len(group)) for i, issue := range group { dependents := 0 dependencies := 0 if score, ok := structuralScores[issue.ID]; ok { dependents = score.dependentCount dependencies = score.dependsOnCount } issues[i] = map[string]interface{}{ "id": issue.ID, "title": issue.Title, "status": issue.Status, "priority": issue.Priority, "references": refCounts[issue.ID], "dependents": dependents, "dependencies": dependencies, "weight": dependents + dependencies, "is_merge_target": issue.ID == target.ID, } } sources := make([]string, 0, len(group)-1) for _, issue := range group { if issue.ID != target.ID { sources = append(sources, issue.ID) } } result = append(result, map[string]interface{}{ "title": group[0].Title, "issues": issues, "suggested_target": target.ID, "suggested_sources": sources, "suggested_action": fmt.Sprintf("bd close %s && bd dep add %s %s --type related", strings.Join(sources, " "), strings.Join(sources, " "), target.ID), "note": fmt.Sprintf("Duplicate: %s (same content as %s)", strings.Join(sources, " "), target.ID), }) } return result } // performMerge executes the merge operation: // 1. Closes all source issues with a reason indicating they are duplicates // 2. Links each source to the target with a "related" dependency // Returns a map with the merge result for JSON output func performMerge(targetID string, sourceIDs []string) map[string]interface{} { ctx := rootCtx result := map[string]interface{}{ "target": targetID, "sources": sourceIDs, "closed": []string{}, "linked": []string{}, "errors": []string{}, } closedIDs := []string{} linkedIDs := []string{} errors := []string{} for _, sourceID := range sourceIDs { // Close the duplicate issue reason := fmt.Sprintf("Duplicate of %s", targetID) if err := store.CloseIssue(ctx, sourceID, reason, actor, ""); err != nil { errors = append(errors, fmt.Sprintf("failed to close %s: %v", sourceID, err)) continue } closedIDs = append(closedIDs, sourceID) // Add dependency linking source to target dep := &types.Dependency{ IssueID: sourceID, DependsOnID: targetID, Type: types.DependencyType("related"), } if err := store.AddDependency(ctx, dep, actor); err != nil { errors = append(errors, fmt.Sprintf("failed to link %s to %s: %v", sourceID, targetID, err)) continue } linkedIDs = append(linkedIDs, sourceID) } result["closed"] = closedIDs result["linked"] = linkedIDs result["errors"] = errors return result }