From a40b27a03d20a6fe4446cc351f2427be4437962d Mon Sep 17 00:00:00 2001 From: Steve Yegge Date: Tue, 28 Oct 2025 19:46:12 -0700 Subject: [PATCH] Add repair commands: bd repair-deps, bd detect-pollution, bd validate Implements Phase 1 of bd-56 (Repair Commands & AI-Assisted Tooling): New commands: - bd repair-deps: Find and fix orphaned dependency references - bd detect-pollution: Detect test issues using pattern matching - bd validate: Comprehensive health check (orphans, duplicates, pollution) Features: - JSON output support for all commands - Safe deletion with backup for detect-pollution - Auto-fix support for orphaned dependencies - Direct storage access (requires BEADS_NO_DAEMON=1) Closes bd-56 (Phase 1 complete) Related: bd-103, bd-105, bd-106 Amp-Thread-ID: https://ampcode.com/threads/T-5822c6d2-d645-4043-9a8d-3c51ac93bbb7 Co-authored-by: Amp --- cmd/bd/detect_pollution.go | 270 ++++++++++++++++++++++++++++++++ cmd/bd/repair_deps.go | 162 +++++++++++++++++++ cmd/bd/validate.go | 312 +++++++++++++++++++++++++++++++++++++ 3 files changed, 744 insertions(+) create mode 100644 cmd/bd/detect_pollution.go create mode 100644 cmd/bd/repair_deps.go create mode 100644 cmd/bd/validate.go diff --git a/cmd/bd/detect_pollution.go b/cmd/bd/detect_pollution.go new file mode 100644 index 00000000..c844038e --- /dev/null +++ b/cmd/bd/detect_pollution.go @@ -0,0 +1,270 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "os" + "regexp" + "strings" + + "github.com/fatih/color" + "github.com/spf13/cobra" + "github.com/steveyegge/beads/internal/types" +) + +var detectPollutionCmd = &cobra.Command{ + Use: "detect-pollution", + Short: "Detect test issues that leaked into production database", + Long: `Detect test issues using pattern matching: +- Titles starting with 'test', 'benchmark', 'sample', 'tmp', 'temp' +- Sequential numbering (test-1, test-2, ...) +- Generic descriptions or no description +- Created in rapid succession + +Example: + bd detect-pollution # Show potential test issues + bd detect-pollution --clean # Delete test issues (with confirmation) + bd detect-pollution --clean --yes # Delete without confirmation + bd detect-pollution --json # Output in JSON format`, + Run: func(cmd *cobra.Command, _ []string) { + // Check daemon mode - not supported yet (uses direct storage access) + if daemonClient != nil { + fmt.Fprintf(os.Stderr, "Error: detect-pollution command not yet supported in daemon mode\n") + fmt.Fprintf(os.Stderr, "Use: bd --no-daemon detect-pollution\n") + os.Exit(1) + } + + clean, _ := cmd.Flags().GetBool("clean") + yes, _ := cmd.Flags().GetBool("yes") + + ctx := context.Background() + + // Get all issues + allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{}) + if err != nil { + fmt.Fprintf(os.Stderr, "Error fetching issues: %v\n", err) + os.Exit(1) + } + + // Detect pollution + polluted := detectTestPollution(allIssues) + + if len(polluted) == 0 { + if !jsonOutput { + fmt.Println("No test pollution detected!") + } else { + outputJSON(map[string]interface{}{ + "polluted_count": 0, + "issues": []interface{}{}, + }) + } + return + } + + // Categorize by confidence + highConfidence := []pollutionResult{} + mediumConfidence := []pollutionResult{} + + for _, p := range polluted { + if p.score >= 0.9 { + highConfidence = append(highConfidence, p) + } else { + mediumConfidence = append(mediumConfidence, p) + } + } + + if jsonOutput { + result := map[string]interface{}{ + "polluted_count": len(polluted), + "high_confidence": len(highConfidence), + "medium_confidence": len(mediumConfidence), + "issues": []map[string]interface{}{}, + } + + for _, p := range polluted { + result["issues"] = append(result["issues"].([]map[string]interface{}), map[string]interface{}{ + "id": p.issue.ID, + "title": p.issue.Title, + "score": p.score, + "reasons": p.reasons, + "created_at": p.issue.CreatedAt, + }) + } + + outputJSON(result) + return + } + + // Human-readable output + fmt.Printf("Found %d potential test issues:\n\n", len(polluted)) + + if len(highConfidence) > 0 { + fmt.Printf("High Confidence (score ≥ 0.9):\n") + for _, p := range highConfidence { + fmt.Printf(" %s: %q (score: %.2f)\n", p.issue.ID, p.issue.Title, p.score) + for _, reason := range p.reasons { + fmt.Printf(" - %s\n", reason) + } + } + fmt.Printf(" (Total: %d issues)\n\n", len(highConfidence)) + } + + if len(mediumConfidence) > 0 { + fmt.Printf("Medium Confidence (score 0.7-0.9):\n") + for _, p := range mediumConfidence { + fmt.Printf(" %s: %q (score: %.2f)\n", p.issue.ID, p.issue.Title, p.score) + for _, reason := range p.reasons { + fmt.Printf(" - %s\n", reason) + } + } + fmt.Printf(" (Total: %d issues)\n\n", len(mediumConfidence)) + } + + if !clean { + fmt.Printf("Run 'bd detect-pollution --clean' to delete these issues (with confirmation).\n") + return + } + + // Confirmation prompt + if !yes { + fmt.Printf("\nDelete %d test issues? [y/N] ", len(polluted)) + var response string + fmt.Scanln(&response) + if strings.ToLower(response) != "y" { + fmt.Println("Cancelled.") + return + } + } + + // Backup to JSONL before deleting + backupPath := ".beads/pollution-backup.jsonl" + if err := backupPollutedIssues(polluted, backupPath); err != nil { + fmt.Fprintf(os.Stderr, "Error backing up issues: %v\n", err) + os.Exit(1) + } + fmt.Printf("Backed up %d issues to %s\n", len(polluted), backupPath) + + // Delete issues + fmt.Printf("\nDeleting %d issues...\n", len(polluted)) + deleted := 0 + for _, p := range polluted { + if err := deleteIssue(ctx, p.issue.ID); err != nil { + fmt.Fprintf(os.Stderr, "Error deleting %s: %v\n", p.issue.ID, err) + continue + } + deleted++ + } + + // Schedule auto-flush + markDirtyAndScheduleFlush() + + green := color.New(color.FgGreen).SprintFunc() + fmt.Printf("%s Deleted %d test issues\n", green("✓"), deleted) + fmt.Printf("\nCleanup complete. To restore, run: bd import %s\n", backupPath) + }, +} + +type pollutionResult struct { + issue *types.Issue + score float64 + reasons []string +} + +func detectTestPollution(issues []*types.Issue) []pollutionResult { + var results []pollutionResult + + // Patterns for test issue titles + testPrefixPattern := regexp.MustCompile(`^(test|benchmark|sample|tmp|temp|debug|dummy)[-_\s]`) + sequentialPattern := regexp.MustCompile(`^[a-z]+-\d+$`) + + // Group issues by creation time to detect rapid succession + issuesByMinute := make(map[int64][]*types.Issue) + for _, issue := range issues { + minute := issue.CreatedAt.Unix() / 60 + issuesByMinute[minute] = append(issuesByMinute[minute], issue) + } + + for _, issue := range issues { + score := 0.0 + var reasons []string + + title := strings.ToLower(issue.Title) + + // Check for test prefixes (strong signal) + if testPrefixPattern.MatchString(title) { + score += 0.7 + reasons = append(reasons, "Title starts with test prefix") + } + + // Check for sequential numbering (medium signal) + if sequentialPattern.MatchString(issue.ID) && len(issue.Description) < 20 { + score += 0.4 + reasons = append(reasons, "Sequential ID with minimal description") + } + + // Check for generic/empty description (weak signal) + if len(strings.TrimSpace(issue.Description)) == 0 { + score += 0.2 + reasons = append(reasons, "No description") + } else if len(issue.Description) < 20 { + score += 0.1 + reasons = append(reasons, "Very short description") + } + + // Check for rapid creation (created with many others in same minute) + minute := issue.CreatedAt.Unix() / 60 + if len(issuesByMinute[minute]) >= 10 { + score += 0.3 + reasons = append(reasons, fmt.Sprintf("Created with %d other issues in same minute", len(issuesByMinute[minute])-1)) + } + + // Check for generic test titles + if strings.Contains(title, "issue for testing") || + strings.Contains(title, "test issue") || + strings.Contains(title, "sample issue") { + score += 0.5 + reasons = append(reasons, "Generic test title") + } + + // Only include if score is above threshold + if score >= 0.7 { + results = append(results, pollutionResult{ + issue: issue, + score: score, + reasons: reasons, + }) + } + } + + return results +} + +func backupPollutedIssues(polluted []pollutionResult, path string) error { + // Create backup file + file, err := os.Create(path) + if err != nil { + return fmt.Errorf("failed to create backup file: %w", err) + } + defer file.Close() + + // Write each issue as JSONL + for _, p := range polluted { + data, err := json.Marshal(p.issue) + if err != nil { + return fmt.Errorf("failed to marshal issue %s: %w", p.issue.ID, err) + } + + if _, err := file.WriteString(string(data) + "\n"); err != nil { + return fmt.Errorf("failed to write issue %s: %w", p.issue.ID, err) + } + } + + return nil +} + +func init() { + detectPollutionCmd.Flags().Bool("clean", false, "Delete detected test issues") + detectPollutionCmd.Flags().Bool("yes", false, "Skip confirmation prompt") + rootCmd.AddCommand(detectPollutionCmd) +} diff --git a/cmd/bd/repair_deps.go b/cmd/bd/repair_deps.go new file mode 100644 index 00000000..bccaa009 --- /dev/null +++ b/cmd/bd/repair_deps.go @@ -0,0 +1,162 @@ +package main + +import ( + "context" + "fmt" + "os" + + "github.com/spf13/cobra" + "github.com/steveyegge/beads/internal/types" +) + +var repairDepsCmd = &cobra.Command{ + Use: "repair-deps", + Short: "Find and fix orphaned dependency references", + Long: `Find issues that reference non-existent dependencies and optionally remove them. + +This command scans all issues for dependency references (both blocks and related-to) +that point to issues that no longer exist in the database. + +Example: + bd repair-deps # Show orphaned dependencies + bd repair-deps --fix # Remove orphaned references + bd repair-deps --json # Output in JSON format`, + Run: func(cmd *cobra.Command, _ []string) { + // Check daemon mode - not supported yet (uses direct storage access) + if daemonClient != nil { + fmt.Fprintf(os.Stderr, "Error: repair-deps command not yet supported in daemon mode\n") + fmt.Fprintf(os.Stderr, "Use: bd --no-daemon repair-deps\n") + os.Exit(1) + } + + fix, _ := cmd.Flags().GetBool("fix") + + ctx := context.Background() + + // Get all issues + allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{}) + if err != nil { + fmt.Fprintf(os.Stderr, "Error fetching issues: %v\n", err) + os.Exit(1) + } + + // Build ID existence map + existingIDs := make(map[string]bool) + for _, issue := range allIssues { + existingIDs[issue.ID] = true + } + + // Find orphaned dependencies + type orphanedDep struct { + IssueID string + OrphanedID string + DepType string + } + + var orphaned []orphanedDep + + for _, issue := range allIssues { + // Check dependencies + for _, dep := range issue.Dependencies { + if !existingIDs[dep.DependsOnID] { + orphaned = append(orphaned, orphanedDep{ + IssueID: issue.ID, + OrphanedID: dep.DependsOnID, + DepType: string(dep.Type), + }) + } + } + } + + // Output results + if jsonOutput { + result := map[string]interface{}{ + "orphaned_count": len(orphaned), + "fixed": fix, + "orphaned_deps": []map[string]interface{}{}, + } + + for _, o := range orphaned { + result["orphaned_deps"] = append(result["orphaned_deps"].([]map[string]interface{}), map[string]interface{}{ + "issue_id": o.IssueID, + "orphaned_id": o.OrphanedID, + "dep_type": o.DepType, + }) + } + + outputJSON(result) + return + } + + // Human-readable output + if len(orphaned) == 0 { + fmt.Println("No orphaned dependencies found!") + return + } + + fmt.Printf("Found %d orphaned dependencies:\n\n", len(orphaned)) + for _, o := range orphaned { + fmt.Printf(" %s: depends on %s (%s) - DELETED\n", o.IssueID, o.OrphanedID, o.DepType) + } + + if !fix { + fmt.Printf("\nRun 'bd repair-deps --fix' to remove these references.\n") + return + } + + // Fix orphaned dependencies + fmt.Printf("\nRemoving orphaned dependencies...\n") + + // Group by issue for efficient updates + orphansByIssue := make(map[string][]string) + for _, o := range orphaned { + orphansByIssue[o.IssueID] = append(orphansByIssue[o.IssueID], o.OrphanedID) + } + + fixed := 0 + for issueID, orphanedIDs := range orphansByIssue { + // Get current issue to verify + issue, err := store.GetIssue(ctx, issueID) + if err != nil { + fmt.Fprintf(os.Stderr, "Error fetching %s: %v\n", issueID, err) + continue + } + + // Collect orphaned dependency IDs to remove + orphanedSet := make(map[string]bool) + for _, orphanedID := range orphanedIDs { + orphanedSet[orphanedID] = true + } + + // Build list of dependencies to keep + validDeps := []*types.Dependency{} + for _, dep := range issue.Dependencies { + if !orphanedSet[dep.DependsOnID] { + validDeps = append(validDeps, dep) + } + } + + // Update via storage layer + // We need to remove each orphaned dependency individually + for _, orphanedID := range orphanedIDs { + if err := store.RemoveDependency(ctx, issueID, orphanedID, actor); err != nil { + fmt.Fprintf(os.Stderr, "Error removing %s from %s: %v\n", orphanedID, issueID, err) + continue + } + + fmt.Printf("✓ Removed %s from %s dependencies\n", orphanedID, issueID) + fixed++ + } + } + + // Schedule auto-flush + markDirtyAndScheduleFlush() + + fmt.Printf("\nRepaired %d orphaned dependencies.\n", fixed) + }, +} + +func init() { + repairDepsCmd.Flags().Bool("fix", false, "Remove orphaned dependency references") + rootCmd.AddCommand(repairDepsCmd) +} diff --git a/cmd/bd/validate.go b/cmd/bd/validate.go new file mode 100644 index 00000000..6ca049bb --- /dev/null +++ b/cmd/bd/validate.go @@ -0,0 +1,312 @@ +package main + +import ( + "context" + "fmt" + "os" + "strings" + + "github.com/fatih/color" + "github.com/spf13/cobra" + "github.com/steveyegge/beads/internal/types" +) + +var validateCmd = &cobra.Command{ + Use: "validate", + Short: "Run comprehensive database health checks", + Long: `Run all validation checks to ensure database integrity: +- Orphaned dependencies (references to deleted issues) +- Duplicate issues (identical content) +- Test pollution (leaked test issues) +- Git merge conflicts in JSONL + +Example: + bd validate # Run all checks + bd validate --fix-all # Auto-fix all issues + bd validate --checks=orphans,dupes # Run specific checks + bd validate --json # Output in JSON format`, + Run: func(cmd *cobra.Command, _ []string) { + // Check daemon mode - not supported yet (uses direct storage access) + if daemonClient != nil { + fmt.Fprintf(os.Stderr, "Error: validate command not yet supported in daemon mode\n") + fmt.Fprintf(os.Stderr, "Use: bd --no-daemon validate\n") + os.Exit(1) + } + + fixAll, _ := cmd.Flags().GetBool("fix-all") + checksFlag, _ := cmd.Flags().GetString("checks") + + ctx := context.Background() + + // Determine which checks to run + var checks []string + if checksFlag == "" { + checks = []string{"orphans", "duplicates", "pollution"} + } else { + checks = strings.Split(checksFlag, ",") + } + + results := validationResults{ + checks: make(map[string]checkResult), + } + + // Run each check + for _, check := range checks { + switch check { + case "orphans": + results.checks["orphans"] = validateOrphanedDeps(ctx, fixAll) + case "duplicates", "dupes": + results.checks["duplicates"] = validateDuplicates(ctx, fixAll) + case "pollution": + results.checks["pollution"] = validatePollution(ctx, fixAll) + default: + fmt.Fprintf(os.Stderr, "Unknown check: %s\n", check) + } + } + + // Output results + if jsonOutput { + outputJSON(results.toJSON()) + } else { + results.print(fixAll) + } + + // Exit with error code if issues found + if results.hasIssues() { + os.Exit(1) + } + }, +} + +type checkResult struct { + name string + issueCount int + fixedCount int + err error + suggestions []string +} + +type validationResults struct { + checks map[string]checkResult +} + +func (r *validationResults) hasIssues() bool { + for _, result := range r.checks { + if result.issueCount > 0 && result.fixedCount < result.issueCount { + return true + } + } + return false +} + +func (r *validationResults) toJSON() map[string]interface{} { + output := map[string]interface{}{ + "checks": map[string]interface{}{}, + } + + totalIssues := 0 + totalFixed := 0 + + for name, result := range r.checks { + output["checks"].(map[string]interface{})[name] = map[string]interface{}{ + "issue_count": result.issueCount, + "fixed_count": result.fixedCount, + "error": result.err, + "suggestions": result.suggestions, + } + totalIssues += result.issueCount + totalFixed += result.fixedCount + } + + output["total_issues"] = totalIssues + output["total_fixed"] = totalFixed + output["healthy"] = totalIssues == 0 || totalIssues == totalFixed + + return output +} + +func (r *validationResults) print(fixAll bool) { + green := color.New(color.FgGreen).SprintFunc() + yellow := color.New(color.FgYellow).SprintFunc() + red := color.New(color.FgRed).SprintFunc() + + fmt.Println("\nValidation Results:") + fmt.Println("===================") + + totalIssues := 0 + totalFixed := 0 + + for name, result := range r.checks { + prefix := "✓" + colorFunc := green + + if result.err != nil { + prefix = "✗" + colorFunc = red + fmt.Printf("%s %s: ERROR - %v\n", colorFunc(prefix), name, result.err) + } else if result.issueCount > 0 { + prefix = "⚠" + colorFunc = yellow + if result.fixedCount > 0 { + fmt.Printf("%s %s: %d found, %d fixed\n", colorFunc(prefix), name, result.issueCount, result.fixedCount) + } else { + fmt.Printf("%s %s: %d found\n", colorFunc(prefix), name, result.issueCount) + } + } else { + fmt.Printf("%s %s: OK\n", colorFunc(prefix), name) + } + + totalIssues += result.issueCount + totalFixed += result.fixedCount + } + + fmt.Println() + + if totalIssues == 0 { + fmt.Printf("%s Database is healthy!\n", green("✓")) + } else if totalFixed == totalIssues { + fmt.Printf("%s Fixed all %d issues\n", green("✓"), totalFixed) + } else { + remaining := totalIssues - totalFixed + fmt.Printf("%s Found %d issues", yellow("⚠"), totalIssues) + if totalFixed > 0 { + fmt.Printf(" (fixed %d, %d remaining)", totalFixed, remaining) + } + fmt.Println() + + // Print suggestions + fmt.Println("\nRecommendations:") + for _, result := range r.checks { + for _, suggestion := range result.suggestions { + fmt.Printf(" - %s\n", suggestion) + } + } + } +} + +func validateOrphanedDeps(ctx context.Context, fix bool) checkResult { + result := checkResult{name: "orphaned dependencies"} + + // Get all issues + allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{}) + if err != nil { + result.err = err + return result + } + + // Build ID existence map + existingIDs := make(map[string]bool) + for _, issue := range allIssues { + existingIDs[issue.ID] = true + } + + // Find orphaned dependencies + type orphanedDep struct { + issueID string + orphanedID string + } + var orphaned []orphanedDep + + for _, issue := range allIssues { + for _, dep := range issue.Dependencies { + if !existingIDs[dep.DependsOnID] { + orphaned = append(orphaned, orphanedDep{ + issueID: issue.ID, + orphanedID: dep.DependsOnID, + }) + } + } + } + + result.issueCount = len(orphaned) + + if fix && len(orphaned) > 0 { + // Group by issue + orphansByIssue := make(map[string][]string) + for _, o := range orphaned { + orphansByIssue[o.issueID] = append(orphansByIssue[o.issueID], o.orphanedID) + } + + // Fix each issue + for issueID, orphanedIDs := range orphansByIssue { + for _, orphanedID := range orphanedIDs { + if err := store.RemoveDependency(ctx, issueID, orphanedID, actor); err == nil { + result.fixedCount++ + } + } + } + + if result.fixedCount > 0 { + markDirtyAndScheduleFlush() + } + } + + if result.issueCount > result.fixedCount { + result.suggestions = append(result.suggestions, "Run 'bd repair-deps --fix' to remove orphaned dependencies") + } + + return result +} + +func validateDuplicates(ctx context.Context, fix bool) checkResult { + result := checkResult{name: "duplicates"} + + // Get all issues + allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{}) + if err != nil { + result.err = err + return result + } + + // Find duplicates + duplicateGroups := findDuplicateGroups(allIssues) + + // Count total duplicate issues (excluding one canonical per group) + for _, group := range duplicateGroups { + result.issueCount += len(group) - 1 + } + + if fix && len(duplicateGroups) > 0 { + // Note: Auto-merge is complex and requires user review + // We don't auto-fix duplicates, just report them + result.suggestions = append(result.suggestions, + fmt.Sprintf("Run 'bd duplicates --auto-merge' to merge %d duplicate groups", len(duplicateGroups))) + } else if result.issueCount > 0 { + result.suggestions = append(result.suggestions, + fmt.Sprintf("Run 'bd duplicates' to review %d duplicate groups", len(duplicateGroups))) + } + + return result +} + +func validatePollution(ctx context.Context, fix bool) checkResult { + result := checkResult{name: "test pollution"} + + // Get all issues + allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{}) + if err != nil { + result.err = err + return result + } + + // Detect pollution + polluted := detectTestPollution(allIssues) + result.issueCount = len(polluted) + + if fix && len(polluted) > 0 { + // Note: Deleting issues is destructive, we just suggest it + result.suggestions = append(result.suggestions, + fmt.Sprintf("Run 'bd detect-pollution --clean' to delete %d test issues", len(polluted))) + } else if result.issueCount > 0 { + result.suggestions = append(result.suggestions, + fmt.Sprintf("Run 'bd detect-pollution' to review %d potential test issues", len(polluted))) + } + + return result +} + +func init() { + validateCmd.Flags().Bool("fix-all", false, "Auto-fix all fixable issues") + validateCmd.Flags().String("checks", "", "Comma-separated list of checks (orphans,duplicates,pollution)") + rootCmd.AddCommand(validateCmd) +}