Add repair commands: bd repair-deps, bd detect-pollution, bd validate

Implements Phase 1 of bd-56 (Repair Commands & AI-Assisted Tooling):

New commands:
- bd repair-deps: Find and fix orphaned dependency references
- bd detect-pollution: Detect test issues using pattern matching
- bd validate: Comprehensive health check (orphans, duplicates, pollution)

Features:
- JSON output support for all commands
- Safe deletion with backup for detect-pollution
- Auto-fix support for orphaned dependencies
- Direct storage access (requires BEADS_NO_DAEMON=1)

Closes bd-56 (Phase 1 complete)
Related: bd-103, bd-105, bd-106

Amp-Thread-ID: https://ampcode.com/threads/T-5822c6d2-d645-4043-9a8d-3c51ac93bbb7
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-28 19:46:12 -07:00
parent dd6af39906
commit a40b27a03d
3 changed files with 744 additions and 0 deletions

270
cmd/bd/detect_pollution.go Normal file
View File

@@ -0,0 +1,270 @@
package main
import (
"context"
"encoding/json"
"fmt"
"os"
"regexp"
"strings"
"github.com/fatih/color"
"github.com/spf13/cobra"
"github.com/steveyegge/beads/internal/types"
)
var detectPollutionCmd = &cobra.Command{
Use: "detect-pollution",
Short: "Detect test issues that leaked into production database",
Long: `Detect test issues using pattern matching:
- Titles starting with 'test', 'benchmark', 'sample', 'tmp', 'temp'
- Sequential numbering (test-1, test-2, ...)
- Generic descriptions or no description
- Created in rapid succession
Example:
bd detect-pollution # Show potential test issues
bd detect-pollution --clean # Delete test issues (with confirmation)
bd detect-pollution --clean --yes # Delete without confirmation
bd detect-pollution --json # Output in JSON format`,
Run: func(cmd *cobra.Command, _ []string) {
// Check daemon mode - not supported yet (uses direct storage access)
if daemonClient != nil {
fmt.Fprintf(os.Stderr, "Error: detect-pollution command not yet supported in daemon mode\n")
fmt.Fprintf(os.Stderr, "Use: bd --no-daemon detect-pollution\n")
os.Exit(1)
}
clean, _ := cmd.Flags().GetBool("clean")
yes, _ := cmd.Flags().GetBool("yes")
ctx := context.Background()
// Get all issues
allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
if err != nil {
fmt.Fprintf(os.Stderr, "Error fetching issues: %v\n", err)
os.Exit(1)
}
// Detect pollution
polluted := detectTestPollution(allIssues)
if len(polluted) == 0 {
if !jsonOutput {
fmt.Println("No test pollution detected!")
} else {
outputJSON(map[string]interface{}{
"polluted_count": 0,
"issues": []interface{}{},
})
}
return
}
// Categorize by confidence
highConfidence := []pollutionResult{}
mediumConfidence := []pollutionResult{}
for _, p := range polluted {
if p.score >= 0.9 {
highConfidence = append(highConfidence, p)
} else {
mediumConfidence = append(mediumConfidence, p)
}
}
if jsonOutput {
result := map[string]interface{}{
"polluted_count": len(polluted),
"high_confidence": len(highConfidence),
"medium_confidence": len(mediumConfidence),
"issues": []map[string]interface{}{},
}
for _, p := range polluted {
result["issues"] = append(result["issues"].([]map[string]interface{}), map[string]interface{}{
"id": p.issue.ID,
"title": p.issue.Title,
"score": p.score,
"reasons": p.reasons,
"created_at": p.issue.CreatedAt,
})
}
outputJSON(result)
return
}
// Human-readable output
fmt.Printf("Found %d potential test issues:\n\n", len(polluted))
if len(highConfidence) > 0 {
fmt.Printf("High Confidence (score ≥ 0.9):\n")
for _, p := range highConfidence {
fmt.Printf(" %s: %q (score: %.2f)\n", p.issue.ID, p.issue.Title, p.score)
for _, reason := range p.reasons {
fmt.Printf(" - %s\n", reason)
}
}
fmt.Printf(" (Total: %d issues)\n\n", len(highConfidence))
}
if len(mediumConfidence) > 0 {
fmt.Printf("Medium Confidence (score 0.7-0.9):\n")
for _, p := range mediumConfidence {
fmt.Printf(" %s: %q (score: %.2f)\n", p.issue.ID, p.issue.Title, p.score)
for _, reason := range p.reasons {
fmt.Printf(" - %s\n", reason)
}
}
fmt.Printf(" (Total: %d issues)\n\n", len(mediumConfidence))
}
if !clean {
fmt.Printf("Run 'bd detect-pollution --clean' to delete these issues (with confirmation).\n")
return
}
// Confirmation prompt
if !yes {
fmt.Printf("\nDelete %d test issues? [y/N] ", len(polluted))
var response string
fmt.Scanln(&response)
if strings.ToLower(response) != "y" {
fmt.Println("Cancelled.")
return
}
}
// Backup to JSONL before deleting
backupPath := ".beads/pollution-backup.jsonl"
if err := backupPollutedIssues(polluted, backupPath); err != nil {
fmt.Fprintf(os.Stderr, "Error backing up issues: %v\n", err)
os.Exit(1)
}
fmt.Printf("Backed up %d issues to %s\n", len(polluted), backupPath)
// Delete issues
fmt.Printf("\nDeleting %d issues...\n", len(polluted))
deleted := 0
for _, p := range polluted {
if err := deleteIssue(ctx, p.issue.ID); err != nil {
fmt.Fprintf(os.Stderr, "Error deleting %s: %v\n", p.issue.ID, err)
continue
}
deleted++
}
// Schedule auto-flush
markDirtyAndScheduleFlush()
green := color.New(color.FgGreen).SprintFunc()
fmt.Printf("%s Deleted %d test issues\n", green("✓"), deleted)
fmt.Printf("\nCleanup complete. To restore, run: bd import %s\n", backupPath)
},
}
type pollutionResult struct {
issue *types.Issue
score float64
reasons []string
}
func detectTestPollution(issues []*types.Issue) []pollutionResult {
var results []pollutionResult
// Patterns for test issue titles
testPrefixPattern := regexp.MustCompile(`^(test|benchmark|sample|tmp|temp|debug|dummy)[-_\s]`)
sequentialPattern := regexp.MustCompile(`^[a-z]+-\d+$`)
// Group issues by creation time to detect rapid succession
issuesByMinute := make(map[int64][]*types.Issue)
for _, issue := range issues {
minute := issue.CreatedAt.Unix() / 60
issuesByMinute[minute] = append(issuesByMinute[minute], issue)
}
for _, issue := range issues {
score := 0.0
var reasons []string
title := strings.ToLower(issue.Title)
// Check for test prefixes (strong signal)
if testPrefixPattern.MatchString(title) {
score += 0.7
reasons = append(reasons, "Title starts with test prefix")
}
// Check for sequential numbering (medium signal)
if sequentialPattern.MatchString(issue.ID) && len(issue.Description) < 20 {
score += 0.4
reasons = append(reasons, "Sequential ID with minimal description")
}
// Check for generic/empty description (weak signal)
if len(strings.TrimSpace(issue.Description)) == 0 {
score += 0.2
reasons = append(reasons, "No description")
} else if len(issue.Description) < 20 {
score += 0.1
reasons = append(reasons, "Very short description")
}
// Check for rapid creation (created with many others in same minute)
minute := issue.CreatedAt.Unix() / 60
if len(issuesByMinute[minute]) >= 10 {
score += 0.3
reasons = append(reasons, fmt.Sprintf("Created with %d other issues in same minute", len(issuesByMinute[minute])-1))
}
// Check for generic test titles
if strings.Contains(title, "issue for testing") ||
strings.Contains(title, "test issue") ||
strings.Contains(title, "sample issue") {
score += 0.5
reasons = append(reasons, "Generic test title")
}
// Only include if score is above threshold
if score >= 0.7 {
results = append(results, pollutionResult{
issue: issue,
score: score,
reasons: reasons,
})
}
}
return results
}
func backupPollutedIssues(polluted []pollutionResult, path string) error {
// Create backup file
file, err := os.Create(path)
if err != nil {
return fmt.Errorf("failed to create backup file: %w", err)
}
defer file.Close()
// Write each issue as JSONL
for _, p := range polluted {
data, err := json.Marshal(p.issue)
if err != nil {
return fmt.Errorf("failed to marshal issue %s: %w", p.issue.ID, err)
}
if _, err := file.WriteString(string(data) + "\n"); err != nil {
return fmt.Errorf("failed to write issue %s: %w", p.issue.ID, err)
}
}
return nil
}
func init() {
detectPollutionCmd.Flags().Bool("clean", false, "Delete detected test issues")
detectPollutionCmd.Flags().Bool("yes", false, "Skip confirmation prompt")
rootCmd.AddCommand(detectPollutionCmd)
}

162
cmd/bd/repair_deps.go Normal file
View File

@@ -0,0 +1,162 @@
package main
import (
"context"
"fmt"
"os"
"github.com/spf13/cobra"
"github.com/steveyegge/beads/internal/types"
)
var repairDepsCmd = &cobra.Command{
Use: "repair-deps",
Short: "Find and fix orphaned dependency references",
Long: `Find issues that reference non-existent dependencies and optionally remove them.
This command scans all issues for dependency references (both blocks and related-to)
that point to issues that no longer exist in the database.
Example:
bd repair-deps # Show orphaned dependencies
bd repair-deps --fix # Remove orphaned references
bd repair-deps --json # Output in JSON format`,
Run: func(cmd *cobra.Command, _ []string) {
// Check daemon mode - not supported yet (uses direct storage access)
if daemonClient != nil {
fmt.Fprintf(os.Stderr, "Error: repair-deps command not yet supported in daemon mode\n")
fmt.Fprintf(os.Stderr, "Use: bd --no-daemon repair-deps\n")
os.Exit(1)
}
fix, _ := cmd.Flags().GetBool("fix")
ctx := context.Background()
// Get all issues
allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
if err != nil {
fmt.Fprintf(os.Stderr, "Error fetching issues: %v\n", err)
os.Exit(1)
}
// Build ID existence map
existingIDs := make(map[string]bool)
for _, issue := range allIssues {
existingIDs[issue.ID] = true
}
// Find orphaned dependencies
type orphanedDep struct {
IssueID string
OrphanedID string
DepType string
}
var orphaned []orphanedDep
for _, issue := range allIssues {
// Check dependencies
for _, dep := range issue.Dependencies {
if !existingIDs[dep.DependsOnID] {
orphaned = append(orphaned, orphanedDep{
IssueID: issue.ID,
OrphanedID: dep.DependsOnID,
DepType: string(dep.Type),
})
}
}
}
// Output results
if jsonOutput {
result := map[string]interface{}{
"orphaned_count": len(orphaned),
"fixed": fix,
"orphaned_deps": []map[string]interface{}{},
}
for _, o := range orphaned {
result["orphaned_deps"] = append(result["orphaned_deps"].([]map[string]interface{}), map[string]interface{}{
"issue_id": o.IssueID,
"orphaned_id": o.OrphanedID,
"dep_type": o.DepType,
})
}
outputJSON(result)
return
}
// Human-readable output
if len(orphaned) == 0 {
fmt.Println("No orphaned dependencies found!")
return
}
fmt.Printf("Found %d orphaned dependencies:\n\n", len(orphaned))
for _, o := range orphaned {
fmt.Printf(" %s: depends on %s (%s) - DELETED\n", o.IssueID, o.OrphanedID, o.DepType)
}
if !fix {
fmt.Printf("\nRun 'bd repair-deps --fix' to remove these references.\n")
return
}
// Fix orphaned dependencies
fmt.Printf("\nRemoving orphaned dependencies...\n")
// Group by issue for efficient updates
orphansByIssue := make(map[string][]string)
for _, o := range orphaned {
orphansByIssue[o.IssueID] = append(orphansByIssue[o.IssueID], o.OrphanedID)
}
fixed := 0
for issueID, orphanedIDs := range orphansByIssue {
// Get current issue to verify
issue, err := store.GetIssue(ctx, issueID)
if err != nil {
fmt.Fprintf(os.Stderr, "Error fetching %s: %v\n", issueID, err)
continue
}
// Collect orphaned dependency IDs to remove
orphanedSet := make(map[string]bool)
for _, orphanedID := range orphanedIDs {
orphanedSet[orphanedID] = true
}
// Build list of dependencies to keep
validDeps := []*types.Dependency{}
for _, dep := range issue.Dependencies {
if !orphanedSet[dep.DependsOnID] {
validDeps = append(validDeps, dep)
}
}
// Update via storage layer
// We need to remove each orphaned dependency individually
for _, orphanedID := range orphanedIDs {
if err := store.RemoveDependency(ctx, issueID, orphanedID, actor); err != nil {
fmt.Fprintf(os.Stderr, "Error removing %s from %s: %v\n", orphanedID, issueID, err)
continue
}
fmt.Printf("✓ Removed %s from %s dependencies\n", orphanedID, issueID)
fixed++
}
}
// Schedule auto-flush
markDirtyAndScheduleFlush()
fmt.Printf("\nRepaired %d orphaned dependencies.\n", fixed)
},
}
func init() {
repairDepsCmd.Flags().Bool("fix", false, "Remove orphaned dependency references")
rootCmd.AddCommand(repairDepsCmd)
}

312
cmd/bd/validate.go Normal file
View File

@@ -0,0 +1,312 @@
package main
import (
"context"
"fmt"
"os"
"strings"
"github.com/fatih/color"
"github.com/spf13/cobra"
"github.com/steveyegge/beads/internal/types"
)
var validateCmd = &cobra.Command{
Use: "validate",
Short: "Run comprehensive database health checks",
Long: `Run all validation checks to ensure database integrity:
- Orphaned dependencies (references to deleted issues)
- Duplicate issues (identical content)
- Test pollution (leaked test issues)
- Git merge conflicts in JSONL
Example:
bd validate # Run all checks
bd validate --fix-all # Auto-fix all issues
bd validate --checks=orphans,dupes # Run specific checks
bd validate --json # Output in JSON format`,
Run: func(cmd *cobra.Command, _ []string) {
// Check daemon mode - not supported yet (uses direct storage access)
if daemonClient != nil {
fmt.Fprintf(os.Stderr, "Error: validate command not yet supported in daemon mode\n")
fmt.Fprintf(os.Stderr, "Use: bd --no-daemon validate\n")
os.Exit(1)
}
fixAll, _ := cmd.Flags().GetBool("fix-all")
checksFlag, _ := cmd.Flags().GetString("checks")
ctx := context.Background()
// Determine which checks to run
var checks []string
if checksFlag == "" {
checks = []string{"orphans", "duplicates", "pollution"}
} else {
checks = strings.Split(checksFlag, ",")
}
results := validationResults{
checks: make(map[string]checkResult),
}
// Run each check
for _, check := range checks {
switch check {
case "orphans":
results.checks["orphans"] = validateOrphanedDeps(ctx, fixAll)
case "duplicates", "dupes":
results.checks["duplicates"] = validateDuplicates(ctx, fixAll)
case "pollution":
results.checks["pollution"] = validatePollution(ctx, fixAll)
default:
fmt.Fprintf(os.Stderr, "Unknown check: %s\n", check)
}
}
// Output results
if jsonOutput {
outputJSON(results.toJSON())
} else {
results.print(fixAll)
}
// Exit with error code if issues found
if results.hasIssues() {
os.Exit(1)
}
},
}
type checkResult struct {
name string
issueCount int
fixedCount int
err error
suggestions []string
}
type validationResults struct {
checks map[string]checkResult
}
func (r *validationResults) hasIssues() bool {
for _, result := range r.checks {
if result.issueCount > 0 && result.fixedCount < result.issueCount {
return true
}
}
return false
}
func (r *validationResults) toJSON() map[string]interface{} {
output := map[string]interface{}{
"checks": map[string]interface{}{},
}
totalIssues := 0
totalFixed := 0
for name, result := range r.checks {
output["checks"].(map[string]interface{})[name] = map[string]interface{}{
"issue_count": result.issueCount,
"fixed_count": result.fixedCount,
"error": result.err,
"suggestions": result.suggestions,
}
totalIssues += result.issueCount
totalFixed += result.fixedCount
}
output["total_issues"] = totalIssues
output["total_fixed"] = totalFixed
output["healthy"] = totalIssues == 0 || totalIssues == totalFixed
return output
}
func (r *validationResults) print(fixAll bool) {
green := color.New(color.FgGreen).SprintFunc()
yellow := color.New(color.FgYellow).SprintFunc()
red := color.New(color.FgRed).SprintFunc()
fmt.Println("\nValidation Results:")
fmt.Println("===================")
totalIssues := 0
totalFixed := 0
for name, result := range r.checks {
prefix := "✓"
colorFunc := green
if result.err != nil {
prefix = "✗"
colorFunc = red
fmt.Printf("%s %s: ERROR - %v\n", colorFunc(prefix), name, result.err)
} else if result.issueCount > 0 {
prefix = "⚠"
colorFunc = yellow
if result.fixedCount > 0 {
fmt.Printf("%s %s: %d found, %d fixed\n", colorFunc(prefix), name, result.issueCount, result.fixedCount)
} else {
fmt.Printf("%s %s: %d found\n", colorFunc(prefix), name, result.issueCount)
}
} else {
fmt.Printf("%s %s: OK\n", colorFunc(prefix), name)
}
totalIssues += result.issueCount
totalFixed += result.fixedCount
}
fmt.Println()
if totalIssues == 0 {
fmt.Printf("%s Database is healthy!\n", green("✓"))
} else if totalFixed == totalIssues {
fmt.Printf("%s Fixed all %d issues\n", green("✓"), totalFixed)
} else {
remaining := totalIssues - totalFixed
fmt.Printf("%s Found %d issues", yellow("⚠"), totalIssues)
if totalFixed > 0 {
fmt.Printf(" (fixed %d, %d remaining)", totalFixed, remaining)
}
fmt.Println()
// Print suggestions
fmt.Println("\nRecommendations:")
for _, result := range r.checks {
for _, suggestion := range result.suggestions {
fmt.Printf(" - %s\n", suggestion)
}
}
}
}
func validateOrphanedDeps(ctx context.Context, fix bool) checkResult {
result := checkResult{name: "orphaned dependencies"}
// Get all issues
allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
if err != nil {
result.err = err
return result
}
// Build ID existence map
existingIDs := make(map[string]bool)
for _, issue := range allIssues {
existingIDs[issue.ID] = true
}
// Find orphaned dependencies
type orphanedDep struct {
issueID string
orphanedID string
}
var orphaned []orphanedDep
for _, issue := range allIssues {
for _, dep := range issue.Dependencies {
if !existingIDs[dep.DependsOnID] {
orphaned = append(orphaned, orphanedDep{
issueID: issue.ID,
orphanedID: dep.DependsOnID,
})
}
}
}
result.issueCount = len(orphaned)
if fix && len(orphaned) > 0 {
// Group by issue
orphansByIssue := make(map[string][]string)
for _, o := range orphaned {
orphansByIssue[o.issueID] = append(orphansByIssue[o.issueID], o.orphanedID)
}
// Fix each issue
for issueID, orphanedIDs := range orphansByIssue {
for _, orphanedID := range orphanedIDs {
if err := store.RemoveDependency(ctx, issueID, orphanedID, actor); err == nil {
result.fixedCount++
}
}
}
if result.fixedCount > 0 {
markDirtyAndScheduleFlush()
}
}
if result.issueCount > result.fixedCount {
result.suggestions = append(result.suggestions, "Run 'bd repair-deps --fix' to remove orphaned dependencies")
}
return result
}
func validateDuplicates(ctx context.Context, fix bool) checkResult {
result := checkResult{name: "duplicates"}
// Get all issues
allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
if err != nil {
result.err = err
return result
}
// Find duplicates
duplicateGroups := findDuplicateGroups(allIssues)
// Count total duplicate issues (excluding one canonical per group)
for _, group := range duplicateGroups {
result.issueCount += len(group) - 1
}
if fix && len(duplicateGroups) > 0 {
// Note: Auto-merge is complex and requires user review
// We don't auto-fix duplicates, just report them
result.suggestions = append(result.suggestions,
fmt.Sprintf("Run 'bd duplicates --auto-merge' to merge %d duplicate groups", len(duplicateGroups)))
} else if result.issueCount > 0 {
result.suggestions = append(result.suggestions,
fmt.Sprintf("Run 'bd duplicates' to review %d duplicate groups", len(duplicateGroups)))
}
return result
}
func validatePollution(ctx context.Context, fix bool) checkResult {
result := checkResult{name: "test pollution"}
// Get all issues
allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
if err != nil {
result.err = err
return result
}
// Detect pollution
polluted := detectTestPollution(allIssues)
result.issueCount = len(polluted)
if fix && len(polluted) > 0 {
// Note: Deleting issues is destructive, we just suggest it
result.suggestions = append(result.suggestions,
fmt.Sprintf("Run 'bd detect-pollution --clean' to delete %d test issues", len(polluted)))
} else if result.issueCount > 0 {
result.suggestions = append(result.suggestions,
fmt.Sprintf("Run 'bd detect-pollution' to review %d potential test issues", len(polluted)))
}
return result
}
func init() {
validateCmd.Flags().Bool("fix-all", false, "Auto-fix all fixable issues")
validateCmd.Flags().String("checks", "", "Comma-separated list of checks (orphans,duplicates,pollution)")
rootCmd.AddCommand(validateCmd)
}