From a40b27a03d20a6fe4446cc351f2427be4437962d Mon Sep 17 00:00:00 2001
From: Steve Yegge <stevey@sourcegraph.com>
Date: Tue, 28 Oct 2025 19:46:12 -0700
Subject: [PATCH] Add repair commands: bd repair-deps, bd detect-pollution, bd
 validate

Implements Phase 1 of bd-56 (Repair Commands & AI-Assisted Tooling):

New commands:
- bd repair-deps: Find and fix orphaned dependency references
- bd detect-pollution: Detect test issues using pattern matching
- bd validate: Comprehensive health check (orphans, duplicates, pollution)

Features:
- JSON output support for all commands
- Safe deletion with backup for detect-pollution
- Auto-fix support for orphaned dependencies
- Direct storage access (requires BEADS_NO_DAEMON=1)

Closes bd-56 (Phase 1 complete)
Related: bd-103, bd-105, bd-106

Amp-Thread-ID: https://ampcode.com/threads/T-5822c6d2-d645-4043-9a8d-3c51ac93bbb7
Co-authored-by: Amp <amp@ampcode.com>
---
 cmd/bd/detect_pollution.go | 270 ++++++++++++++++++++++++++++++++
 cmd/bd/repair_deps.go      | 162 +++++++++++++++++++
 cmd/bd/validate.go         | 312 +++++++++++++++++++++++++++++++++++++
 3 files changed, 744 insertions(+)
 create mode 100644 cmd/bd/detect_pollution.go
 create mode 100644 cmd/bd/repair_deps.go
 create mode 100644 cmd/bd/validate.go

diff --git a/cmd/bd/detect_pollution.go b/cmd/bd/detect_pollution.go
new file mode 100644
index 00000000..c844038e
--- /dev/null
+++ b/cmd/bd/detect_pollution.go
@@ -0,0 +1,270 @@
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"regexp"
+	"strings"
+
+	"github.com/fatih/color"
+	"github.com/spf13/cobra"
+	"github.com/steveyegge/beads/internal/types"
+)
+
+var detectPollutionCmd = &cobra.Command{
+	Use:   "detect-pollution",
+	Short: "Detect test issues that leaked into production database",
+	Long: `Detect test issues using pattern matching:
+- Titles starting with 'test', 'benchmark', 'sample', 'tmp', 'temp'
+- Sequential numbering (test-1, test-2, ...)
+- Generic descriptions or no description
+- Created in rapid succession
+
+Example:
+  bd detect-pollution                 # Show potential test issues
+  bd detect-pollution --clean         # Delete test issues (with confirmation)
+  bd detect-pollution --clean --yes   # Delete without confirmation
+  bd detect-pollution --json          # Output in JSON format`,
+	Run: func(cmd *cobra.Command, _ []string) {
+		// Check daemon mode - not supported yet (uses direct storage access)
+		if daemonClient != nil {
+			fmt.Fprintf(os.Stderr, "Error: detect-pollution command not yet supported in daemon mode\n")
+			fmt.Fprintf(os.Stderr, "Use: bd --no-daemon detect-pollution\n")
+			os.Exit(1)
+		}
+
+		clean, _ := cmd.Flags().GetBool("clean")
+		yes, _ := cmd.Flags().GetBool("yes")
+
+		ctx := context.Background()
+
+		// Get all issues
+		allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Error fetching issues: %v\n", err)
+			os.Exit(1)
+		}
+
+		// Detect pollution
+		polluted := detectTestPollution(allIssues)
+
+		if len(polluted) == 0 {
+			if !jsonOutput {
+				fmt.Println("No test pollution detected!")
+			} else {
+				outputJSON(map[string]interface{}{
+					"polluted_count": 0,
+					"issues":         []interface{}{},
+				})
+			}
+			return
+		}
+
+		// Categorize by confidence
+		highConfidence := []pollutionResult{}
+		mediumConfidence := []pollutionResult{}
+		
+		for _, p := range polluted {
+			if p.score >= 0.9 {
+				highConfidence = append(highConfidence, p)
+			} else {
+				mediumConfidence = append(mediumConfidence, p)
+			}
+		}
+
+		if jsonOutput {
+			result := map[string]interface{}{
+				"polluted_count":    len(polluted),
+				"high_confidence":   len(highConfidence),
+				"medium_confidence": len(mediumConfidence),
+				"issues":            []map[string]interface{}{},
+			}
+
+			for _, p := range polluted {
+				result["issues"] = append(result["issues"].([]map[string]interface{}), map[string]interface{}{
+					"id":         p.issue.ID,
+					"title":      p.issue.Title,
+					"score":      p.score,
+					"reasons":    p.reasons,
+					"created_at": p.issue.CreatedAt,
+				})
+			}
+
+			outputJSON(result)
+			return
+		}
+
+		// Human-readable output
+		fmt.Printf("Found %d potential test issues:\n\n", len(polluted))
+		
+		if len(highConfidence) > 0 {
+			fmt.Printf("High Confidence (score ≥ 0.9):\n")
+			for _, p := range highConfidence {
+				fmt.Printf("  %s: %q (score: %.2f)\n", p.issue.ID, p.issue.Title, p.score)
+				for _, reason := range p.reasons {
+					fmt.Printf("    - %s\n", reason)
+				}
+			}
+			fmt.Printf("  (Total: %d issues)\n\n", len(highConfidence))
+		}
+		
+		if len(mediumConfidence) > 0 {
+			fmt.Printf("Medium Confidence (score 0.7-0.9):\n")
+			for _, p := range mediumConfidence {
+				fmt.Printf("  %s: %q (score: %.2f)\n", p.issue.ID, p.issue.Title, p.score)
+				for _, reason := range p.reasons {
+					fmt.Printf("    - %s\n", reason)
+				}
+			}
+			fmt.Printf("  (Total: %d issues)\n\n", len(mediumConfidence))
+		}
+
+		if !clean {
+			fmt.Printf("Run 'bd detect-pollution --clean' to delete these issues (with confirmation).\n")
+			return
+		}
+
+		// Confirmation prompt
+		if !yes {
+			fmt.Printf("\nDelete %d test issues? [y/N] ", len(polluted))
+			var response string
+			fmt.Scanln(&response)
+			if strings.ToLower(response) != "y" {
+				fmt.Println("Cancelled.")
+				return
+			}
+		}
+
+		// Backup to JSONL before deleting
+		backupPath := ".beads/pollution-backup.jsonl"
+		if err := backupPollutedIssues(polluted, backupPath); err != nil {
+			fmt.Fprintf(os.Stderr, "Error backing up issues: %v\n", err)
+			os.Exit(1)
+		}
+		fmt.Printf("Backed up %d issues to %s\n", len(polluted), backupPath)
+
+		// Delete issues
+		fmt.Printf("\nDeleting %d issues...\n", len(polluted))
+		deleted := 0
+		for _, p := range polluted {
+			if err := deleteIssue(ctx, p.issue.ID); err != nil {
+				fmt.Fprintf(os.Stderr, "Error deleting %s: %v\n", p.issue.ID, err)
+				continue
+			}
+			deleted++
+		}
+
+		// Schedule auto-flush
+		markDirtyAndScheduleFlush()
+
+		green := color.New(color.FgGreen).SprintFunc()
+		fmt.Printf("%s Deleted %d test issues\n", green("✓"), deleted)
+		fmt.Printf("\nCleanup complete. To restore, run: bd import %s\n", backupPath)
+	},
+}
+
+type pollutionResult struct {
+	issue   *types.Issue
+	score   float64
+	reasons []string
+}
+
+func detectTestPollution(issues []*types.Issue) []pollutionResult {
+	var results []pollutionResult
+	
+	// Patterns for test issue titles
+	testPrefixPattern := regexp.MustCompile(`^(test|benchmark|sample|tmp|temp|debug|dummy)[-_\s]`)
+	sequentialPattern := regexp.MustCompile(`^[a-z]+-\d+$`)
+	
+	// Group issues by creation time to detect rapid succession
+	issuesByMinute := make(map[int64][]*types.Issue)
+	for _, issue := range issues {
+		minute := issue.CreatedAt.Unix() / 60
+		issuesByMinute[minute] = append(issuesByMinute[minute], issue)
+	}
+	
+	for _, issue := range issues {
+		score := 0.0
+		var reasons []string
+		
+		title := strings.ToLower(issue.Title)
+		
+		// Check for test prefixes (strong signal)
+		if testPrefixPattern.MatchString(title) {
+			score += 0.7
+			reasons = append(reasons, "Title starts with test prefix")
+		}
+		
+		// Check for sequential numbering (medium signal)
+		if sequentialPattern.MatchString(issue.ID) && len(issue.Description) < 20 {
+			score += 0.4
+			reasons = append(reasons, "Sequential ID with minimal description")
+		}
+		
+		// Check for generic/empty description (weak signal)
+		if len(strings.TrimSpace(issue.Description)) == 0 {
+			score += 0.2
+			reasons = append(reasons, "No description")
+		} else if len(issue.Description) < 20 {
+			score += 0.1
+			reasons = append(reasons, "Very short description")
+		}
+		
+		// Check for rapid creation (created with many others in same minute)
+		minute := issue.CreatedAt.Unix() / 60
+		if len(issuesByMinute[minute]) >= 10 {
+			score += 0.3
+			reasons = append(reasons, fmt.Sprintf("Created with %d other issues in same minute", len(issuesByMinute[minute])-1))
+		}
+		
+		// Check for generic test titles
+		if strings.Contains(title, "issue for testing") ||
+		   strings.Contains(title, "test issue") ||
+		   strings.Contains(title, "sample issue") {
+			score += 0.5
+			reasons = append(reasons, "Generic test title")
+		}
+		
+		// Only include if score is above threshold
+		if score >= 0.7 {
+			results = append(results, pollutionResult{
+				issue:   issue,
+				score:   score,
+				reasons: reasons,
+			})
+		}
+	}
+	
+	return results
+}
+
+func backupPollutedIssues(polluted []pollutionResult, path string) error {
+	// Create backup file
+	file, err := os.Create(path)
+	if err != nil {
+		return fmt.Errorf("failed to create backup file: %w", err)
+	}
+	defer file.Close()
+	
+	// Write each issue as JSONL
+	for _, p := range polluted {
+		data, err := json.Marshal(p.issue)
+		if err != nil {
+			return fmt.Errorf("failed to marshal issue %s: %w", p.issue.ID, err)
+		}
+		
+		if _, err := file.WriteString(string(data) + "\n"); err != nil {
+			return fmt.Errorf("failed to write issue %s: %w", p.issue.ID, err)
+		}
+	}
+	
+	return nil
+}
+
+func init() {
+	detectPollutionCmd.Flags().Bool("clean", false, "Delete detected test issues")
+	detectPollutionCmd.Flags().Bool("yes", false, "Skip confirmation prompt")
+	rootCmd.AddCommand(detectPollutionCmd)
+}
diff --git a/cmd/bd/repair_deps.go b/cmd/bd/repair_deps.go
new file mode 100644
index 00000000..bccaa009
--- /dev/null
+++ b/cmd/bd/repair_deps.go
@@ -0,0 +1,162 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"os"
+
+	"github.com/spf13/cobra"
+	"github.com/steveyegge/beads/internal/types"
+)
+
+var repairDepsCmd = &cobra.Command{
+	Use:   "repair-deps",
+	Short: "Find and fix orphaned dependency references",
+	Long: `Find issues that reference non-existent dependencies and optionally remove them.
+
+This command scans all issues for dependency references (both blocks and related-to)
+that point to issues that no longer exist in the database.
+
+Example:
+  bd repair-deps             # Show orphaned dependencies
+  bd repair-deps --fix       # Remove orphaned references
+  bd repair-deps --json      # Output in JSON format`,
+	Run: func(cmd *cobra.Command, _ []string) {
+		// Check daemon mode - not supported yet (uses direct storage access)
+		if daemonClient != nil {
+			fmt.Fprintf(os.Stderr, "Error: repair-deps command not yet supported in daemon mode\n")
+			fmt.Fprintf(os.Stderr, "Use: bd --no-daemon repair-deps\n")
+			os.Exit(1)
+		}
+
+		fix, _ := cmd.Flags().GetBool("fix")
+
+		ctx := context.Background()
+
+		// Get all issues
+		allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Error fetching issues: %v\n", err)
+			os.Exit(1)
+		}
+
+		// Build ID existence map
+		existingIDs := make(map[string]bool)
+		for _, issue := range allIssues {
+			existingIDs[issue.ID] = true
+		}
+
+		// Find orphaned dependencies
+		type orphanedDep struct {
+			IssueID    string
+			OrphanedID string
+			DepType    string
+		}
+		
+		var orphaned []orphanedDep
+
+		for _, issue := range allIssues {
+			// Check dependencies
+			for _, dep := range issue.Dependencies {
+				if !existingIDs[dep.DependsOnID] {
+					orphaned = append(orphaned, orphanedDep{
+						IssueID:    issue.ID,
+						OrphanedID: dep.DependsOnID,
+						DepType:    string(dep.Type),
+					})
+				}
+			}
+		}
+
+		// Output results
+		if jsonOutput {
+			result := map[string]interface{}{
+				"orphaned_count": len(orphaned),
+				"fixed":          fix,
+				"orphaned_deps":  []map[string]interface{}{},
+			}
+
+			for _, o := range orphaned {
+				result["orphaned_deps"] = append(result["orphaned_deps"].([]map[string]interface{}), map[string]interface{}{
+					"issue_id":     o.IssueID,
+					"orphaned_id":  o.OrphanedID,
+					"dep_type":     o.DepType,
+				})
+			}
+
+			outputJSON(result)
+			return
+		}
+
+		// Human-readable output
+		if len(orphaned) == 0 {
+			fmt.Println("No orphaned dependencies found!")
+			return
+		}
+
+		fmt.Printf("Found %d orphaned dependencies:\n\n", len(orphaned))
+		for _, o := range orphaned {
+			fmt.Printf("  %s: depends on %s (%s) - DELETED\n", o.IssueID, o.OrphanedID, o.DepType)
+		}
+
+		if !fix {
+			fmt.Printf("\nRun 'bd repair-deps --fix' to remove these references.\n")
+			return
+		}
+
+		// Fix orphaned dependencies
+		fmt.Printf("\nRemoving orphaned dependencies...\n")
+		
+		// Group by issue for efficient updates
+		orphansByIssue := make(map[string][]string)
+		for _, o := range orphaned {
+			orphansByIssue[o.IssueID] = append(orphansByIssue[o.IssueID], o.OrphanedID)
+		}
+
+		fixed := 0
+		for issueID, orphanedIDs := range orphansByIssue {
+			// Get current issue to verify
+			issue, err := store.GetIssue(ctx, issueID)
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "Error fetching %s: %v\n", issueID, err)
+				continue
+			}
+
+			// Collect orphaned dependency IDs to remove
+			orphanedSet := make(map[string]bool)
+			for _, orphanedID := range orphanedIDs {
+				orphanedSet[orphanedID] = true
+			}
+
+			// Build list of dependencies to keep
+			validDeps := []*types.Dependency{}
+			for _, dep := range issue.Dependencies {
+				if !orphanedSet[dep.DependsOnID] {
+					validDeps = append(validDeps, dep)
+				}
+			}
+
+			// Update via storage layer
+			// We need to remove each orphaned dependency individually
+			for _, orphanedID := range orphanedIDs {
+				if err := store.RemoveDependency(ctx, issueID, orphanedID, actor); err != nil {
+					fmt.Fprintf(os.Stderr, "Error removing %s from %s: %v\n", orphanedID, issueID, err)
+					continue
+				}
+				
+				fmt.Printf("✓ Removed %s from %s dependencies\n", orphanedID, issueID)
+				fixed++
+			}
+		}
+
+		// Schedule auto-flush
+		markDirtyAndScheduleFlush()
+
+		fmt.Printf("\nRepaired %d orphaned dependencies.\n", fixed)
+	},
+}
+
+func init() {
+	repairDepsCmd.Flags().Bool("fix", false, "Remove orphaned dependency references")
+	rootCmd.AddCommand(repairDepsCmd)
+}
diff --git a/cmd/bd/validate.go b/cmd/bd/validate.go
new file mode 100644
index 00000000..6ca049bb
--- /dev/null
+++ b/cmd/bd/validate.go
@@ -0,0 +1,312 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/fatih/color"
+	"github.com/spf13/cobra"
+	"github.com/steveyegge/beads/internal/types"
+)
+
+var validateCmd = &cobra.Command{
+	Use:   "validate",
+	Short: "Run comprehensive database health checks",
+	Long: `Run all validation checks to ensure database integrity:
+- Orphaned dependencies (references to deleted issues)
+- Duplicate issues (identical content)
+- Test pollution (leaked test issues)
+- Git merge conflicts in JSONL
+
+Example:
+  bd validate                         # Run all checks
+  bd validate --fix-all               # Auto-fix all issues
+  bd validate --checks=orphans,dupes  # Run specific checks
+  bd validate --json                  # Output in JSON format`,
+	Run: func(cmd *cobra.Command, _ []string) {
+		// Check daemon mode - not supported yet (uses direct storage access)
+		if daemonClient != nil {
+			fmt.Fprintf(os.Stderr, "Error: validate command not yet supported in daemon mode\n")
+			fmt.Fprintf(os.Stderr, "Use: bd --no-daemon validate\n")
+			os.Exit(1)
+		}
+
+		fixAll, _ := cmd.Flags().GetBool("fix-all")
+		checksFlag, _ := cmd.Flags().GetString("checks")
+
+		ctx := context.Background()
+
+		// Determine which checks to run
+		var checks []string
+		if checksFlag == "" {
+			checks = []string{"orphans", "duplicates", "pollution"}
+		} else {
+			checks = strings.Split(checksFlag, ",")
+		}
+
+		results := validationResults{
+			checks: make(map[string]checkResult),
+		}
+
+		// Run each check
+		for _, check := range checks {
+			switch check {
+			case "orphans":
+				results.checks["orphans"] = validateOrphanedDeps(ctx, fixAll)
+			case "duplicates", "dupes":
+				results.checks["duplicates"] = validateDuplicates(ctx, fixAll)
+			case "pollution":
+				results.checks["pollution"] = validatePollution(ctx, fixAll)
+			default:
+				fmt.Fprintf(os.Stderr, "Unknown check: %s\n", check)
+			}
+		}
+
+		// Output results
+		if jsonOutput {
+			outputJSON(results.toJSON())
+		} else {
+			results.print(fixAll)
+		}
+
+		// Exit with error code if issues found
+		if results.hasIssues() {
+			os.Exit(1)
+		}
+	},
+}
+
+type checkResult struct {
+	name        string
+	issueCount  int
+	fixedCount  int
+	err         error
+	suggestions []string
+}
+
+type validationResults struct {
+	checks map[string]checkResult
+}
+
+func (r *validationResults) hasIssues() bool {
+	for _, result := range r.checks {
+		if result.issueCount > 0 && result.fixedCount < result.issueCount {
+			return true
+		}
+	}
+	return false
+}
+
+func (r *validationResults) toJSON() map[string]interface{} {
+	output := map[string]interface{}{
+		"checks": map[string]interface{}{},
+	}
+
+	totalIssues := 0
+	totalFixed := 0
+
+	for name, result := range r.checks {
+		output["checks"].(map[string]interface{})[name] = map[string]interface{}{
+			"issue_count":  result.issueCount,
+			"fixed_count":  result.fixedCount,
+			"error":        result.err,
+			"suggestions":  result.suggestions,
+		}
+		totalIssues += result.issueCount
+		totalFixed += result.fixedCount
+	}
+
+	output["total_issues"] = totalIssues
+	output["total_fixed"] = totalFixed
+	output["healthy"] = totalIssues == 0 || totalIssues == totalFixed
+
+	return output
+}
+
+func (r *validationResults) print(fixAll bool) {
+	green := color.New(color.FgGreen).SprintFunc()
+	yellow := color.New(color.FgYellow).SprintFunc()
+	red := color.New(color.FgRed).SprintFunc()
+
+	fmt.Println("\nValidation Results:")
+	fmt.Println("===================")
+
+	totalIssues := 0
+	totalFixed := 0
+
+	for name, result := range r.checks {
+		prefix := "✓"
+		colorFunc := green
+		
+		if result.err != nil {
+			prefix = "✗"
+			colorFunc = red
+			fmt.Printf("%s %s: ERROR - %v\n", colorFunc(prefix), name, result.err)
+		} else if result.issueCount > 0 {
+			prefix = "⚠"
+			colorFunc = yellow
+			if result.fixedCount > 0 {
+				fmt.Printf("%s %s: %d found, %d fixed\n", colorFunc(prefix), name, result.issueCount, result.fixedCount)
+			} else {
+				fmt.Printf("%s %s: %d found\n", colorFunc(prefix), name, result.issueCount)
+			}
+		} else {
+			fmt.Printf("%s %s: OK\n", colorFunc(prefix), name)
+		}
+
+		totalIssues += result.issueCount
+		totalFixed += result.fixedCount
+	}
+
+	fmt.Println()
+
+	if totalIssues == 0 {
+		fmt.Printf("%s Database is healthy!\n", green("✓"))
+	} else if totalFixed == totalIssues {
+		fmt.Printf("%s Fixed all %d issues\n", green("✓"), totalFixed)
+	} else {
+		remaining := totalIssues - totalFixed
+		fmt.Printf("%s Found %d issues", yellow("⚠"), totalIssues)
+		if totalFixed > 0 {
+			fmt.Printf(" (fixed %d, %d remaining)", totalFixed, remaining)
+		}
+		fmt.Println()
+
+		// Print suggestions
+		fmt.Println("\nRecommendations:")
+		for _, result := range r.checks {
+			for _, suggestion := range result.suggestions {
+				fmt.Printf("  - %s\n", suggestion)
+			}
+		}
+	}
+}
+
+func validateOrphanedDeps(ctx context.Context, fix bool) checkResult {
+	result := checkResult{name: "orphaned dependencies"}
+
+	// Get all issues
+	allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
+	if err != nil {
+		result.err = err
+		return result
+	}
+
+	// Build ID existence map
+	existingIDs := make(map[string]bool)
+	for _, issue := range allIssues {
+		existingIDs[issue.ID] = true
+	}
+
+	// Find orphaned dependencies
+	type orphanedDep struct {
+		issueID    string
+		orphanedID string
+	}
+	var orphaned []orphanedDep
+
+	for _, issue := range allIssues {
+		for _, dep := range issue.Dependencies {
+			if !existingIDs[dep.DependsOnID] {
+				orphaned = append(orphaned, orphanedDep{
+					issueID:    issue.ID,
+					orphanedID: dep.DependsOnID,
+				})
+			}
+		}
+	}
+
+	result.issueCount = len(orphaned)
+
+	if fix && len(orphaned) > 0 {
+		// Group by issue
+		orphansByIssue := make(map[string][]string)
+		for _, o := range orphaned {
+			orphansByIssue[o.issueID] = append(orphansByIssue[o.issueID], o.orphanedID)
+		}
+
+		// Fix each issue
+		for issueID, orphanedIDs := range orphansByIssue {
+			for _, orphanedID := range orphanedIDs {
+				if err := store.RemoveDependency(ctx, issueID, orphanedID, actor); err == nil {
+					result.fixedCount++
+				}
+			}
+		}
+
+		if result.fixedCount > 0 {
+			markDirtyAndScheduleFlush()
+		}
+	}
+
+	if result.issueCount > result.fixedCount {
+		result.suggestions = append(result.suggestions, "Run 'bd repair-deps --fix' to remove orphaned dependencies")
+	}
+
+	return result
+}
+
+func validateDuplicates(ctx context.Context, fix bool) checkResult {
+	result := checkResult{name: "duplicates"}
+
+	// Get all issues
+	allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
+	if err != nil {
+		result.err = err
+		return result
+	}
+
+	// Find duplicates
+	duplicateGroups := findDuplicateGroups(allIssues)
+	
+	// Count total duplicate issues (excluding one canonical per group)
+	for _, group := range duplicateGroups {
+		result.issueCount += len(group) - 1
+	}
+
+	if fix && len(duplicateGroups) > 0 {
+		// Note: Auto-merge is complex and requires user review
+		// We don't auto-fix duplicates, just report them
+		result.suggestions = append(result.suggestions, 
+			fmt.Sprintf("Run 'bd duplicates --auto-merge' to merge %d duplicate groups", len(duplicateGroups)))
+	} else if result.issueCount > 0 {
+		result.suggestions = append(result.suggestions,
+			fmt.Sprintf("Run 'bd duplicates' to review %d duplicate groups", len(duplicateGroups)))
+	}
+
+	return result
+}
+
+func validatePollution(ctx context.Context, fix bool) checkResult {
+	result := checkResult{name: "test pollution"}
+
+	// Get all issues
+	allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
+	if err != nil {
+		result.err = err
+		return result
+	}
+
+	// Detect pollution
+	polluted := detectTestPollution(allIssues)
+	result.issueCount = len(polluted)
+
+	if fix && len(polluted) > 0 {
+		// Note: Deleting issues is destructive, we just suggest it
+		result.suggestions = append(result.suggestions,
+			fmt.Sprintf("Run 'bd detect-pollution --clean' to delete %d test issues", len(polluted)))
+	} else if result.issueCount > 0 {
+		result.suggestions = append(result.suggestions,
+			fmt.Sprintf("Run 'bd detect-pollution' to review %d potential test issues", len(polluted)))
+	}
+
+	return result
+}
+
+func init() {
+	validateCmd.Flags().Bool("fix-all", false, "Auto-fix all fixable issues")
+	validateCmd.Flags().String("checks", "", "Comma-separated list of checks (orphans,duplicates,pollution)")
+	rootCmd.AddCommand(validateCmd)
+}