Add repair commands: bd repair-deps, bd detect-pollution, bd validate
Implements Phase 1 of bd-56 (Repair Commands & AI-Assisted Tooling): New commands: - bd repair-deps: Find and fix orphaned dependency references - bd detect-pollution: Detect test issues using pattern matching - bd validate: Comprehensive health check (orphans, duplicates, pollution) Features: - JSON output support for all commands - Safe deletion with backup for detect-pollution - Auto-fix support for orphaned dependencies - Direct storage access (requires BEADS_NO_DAEMON=1) Closes bd-56 (Phase 1 complete) Related: bd-103, bd-105, bd-106 Amp-Thread-ID: https://ampcode.com/threads/T-5822c6d2-d645-4043-9a8d-3c51ac93bbb7 Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
270
cmd/bd/detect_pollution.go
Normal file
270
cmd/bd/detect_pollution.go
Normal file
@@ -0,0 +1,270 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/fatih/color"
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/steveyegge/beads/internal/types"
|
||||
)
|
||||
|
||||
var detectPollutionCmd = &cobra.Command{
|
||||
Use: "detect-pollution",
|
||||
Short: "Detect test issues that leaked into production database",
|
||||
Long: `Detect test issues using pattern matching:
|
||||
- Titles starting with 'test', 'benchmark', 'sample', 'tmp', 'temp'
|
||||
- Sequential numbering (test-1, test-2, ...)
|
||||
- Generic descriptions or no description
|
||||
- Created in rapid succession
|
||||
|
||||
Example:
|
||||
bd detect-pollution # Show potential test issues
|
||||
bd detect-pollution --clean # Delete test issues (with confirmation)
|
||||
bd detect-pollution --clean --yes # Delete without confirmation
|
||||
bd detect-pollution --json # Output in JSON format`,
|
||||
Run: func(cmd *cobra.Command, _ []string) {
|
||||
// Check daemon mode - not supported yet (uses direct storage access)
|
||||
if daemonClient != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error: detect-pollution command not yet supported in daemon mode\n")
|
||||
fmt.Fprintf(os.Stderr, "Use: bd --no-daemon detect-pollution\n")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
clean, _ := cmd.Flags().GetBool("clean")
|
||||
yes, _ := cmd.Flags().GetBool("yes")
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// Get all issues
|
||||
allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error fetching issues: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Detect pollution
|
||||
polluted := detectTestPollution(allIssues)
|
||||
|
||||
if len(polluted) == 0 {
|
||||
if !jsonOutput {
|
||||
fmt.Println("No test pollution detected!")
|
||||
} else {
|
||||
outputJSON(map[string]interface{}{
|
||||
"polluted_count": 0,
|
||||
"issues": []interface{}{},
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Categorize by confidence
|
||||
highConfidence := []pollutionResult{}
|
||||
mediumConfidence := []pollutionResult{}
|
||||
|
||||
for _, p := range polluted {
|
||||
if p.score >= 0.9 {
|
||||
highConfidence = append(highConfidence, p)
|
||||
} else {
|
||||
mediumConfidence = append(mediumConfidence, p)
|
||||
}
|
||||
}
|
||||
|
||||
if jsonOutput {
|
||||
result := map[string]interface{}{
|
||||
"polluted_count": len(polluted),
|
||||
"high_confidence": len(highConfidence),
|
||||
"medium_confidence": len(mediumConfidence),
|
||||
"issues": []map[string]interface{}{},
|
||||
}
|
||||
|
||||
for _, p := range polluted {
|
||||
result["issues"] = append(result["issues"].([]map[string]interface{}), map[string]interface{}{
|
||||
"id": p.issue.ID,
|
||||
"title": p.issue.Title,
|
||||
"score": p.score,
|
||||
"reasons": p.reasons,
|
||||
"created_at": p.issue.CreatedAt,
|
||||
})
|
||||
}
|
||||
|
||||
outputJSON(result)
|
||||
return
|
||||
}
|
||||
|
||||
// Human-readable output
|
||||
fmt.Printf("Found %d potential test issues:\n\n", len(polluted))
|
||||
|
||||
if len(highConfidence) > 0 {
|
||||
fmt.Printf("High Confidence (score ≥ 0.9):\n")
|
||||
for _, p := range highConfidence {
|
||||
fmt.Printf(" %s: %q (score: %.2f)\n", p.issue.ID, p.issue.Title, p.score)
|
||||
for _, reason := range p.reasons {
|
||||
fmt.Printf(" - %s\n", reason)
|
||||
}
|
||||
}
|
||||
fmt.Printf(" (Total: %d issues)\n\n", len(highConfidence))
|
||||
}
|
||||
|
||||
if len(mediumConfidence) > 0 {
|
||||
fmt.Printf("Medium Confidence (score 0.7-0.9):\n")
|
||||
for _, p := range mediumConfidence {
|
||||
fmt.Printf(" %s: %q (score: %.2f)\n", p.issue.ID, p.issue.Title, p.score)
|
||||
for _, reason := range p.reasons {
|
||||
fmt.Printf(" - %s\n", reason)
|
||||
}
|
||||
}
|
||||
fmt.Printf(" (Total: %d issues)\n\n", len(mediumConfidence))
|
||||
}
|
||||
|
||||
if !clean {
|
||||
fmt.Printf("Run 'bd detect-pollution --clean' to delete these issues (with confirmation).\n")
|
||||
return
|
||||
}
|
||||
|
||||
// Confirmation prompt
|
||||
if !yes {
|
||||
fmt.Printf("\nDelete %d test issues? [y/N] ", len(polluted))
|
||||
var response string
|
||||
fmt.Scanln(&response)
|
||||
if strings.ToLower(response) != "y" {
|
||||
fmt.Println("Cancelled.")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Backup to JSONL before deleting
|
||||
backupPath := ".beads/pollution-backup.jsonl"
|
||||
if err := backupPollutedIssues(polluted, backupPath); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error backing up issues: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
fmt.Printf("Backed up %d issues to %s\n", len(polluted), backupPath)
|
||||
|
||||
// Delete issues
|
||||
fmt.Printf("\nDeleting %d issues...\n", len(polluted))
|
||||
deleted := 0
|
||||
for _, p := range polluted {
|
||||
if err := deleteIssue(ctx, p.issue.ID); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error deleting %s: %v\n", p.issue.ID, err)
|
||||
continue
|
||||
}
|
||||
deleted++
|
||||
}
|
||||
|
||||
// Schedule auto-flush
|
||||
markDirtyAndScheduleFlush()
|
||||
|
||||
green := color.New(color.FgGreen).SprintFunc()
|
||||
fmt.Printf("%s Deleted %d test issues\n", green("✓"), deleted)
|
||||
fmt.Printf("\nCleanup complete. To restore, run: bd import %s\n", backupPath)
|
||||
},
|
||||
}
|
||||
|
||||
type pollutionResult struct {
|
||||
issue *types.Issue
|
||||
score float64
|
||||
reasons []string
|
||||
}
|
||||
|
||||
func detectTestPollution(issues []*types.Issue) []pollutionResult {
|
||||
var results []pollutionResult
|
||||
|
||||
// Patterns for test issue titles
|
||||
testPrefixPattern := regexp.MustCompile(`^(test|benchmark|sample|tmp|temp|debug|dummy)[-_\s]`)
|
||||
sequentialPattern := regexp.MustCompile(`^[a-z]+-\d+$`)
|
||||
|
||||
// Group issues by creation time to detect rapid succession
|
||||
issuesByMinute := make(map[int64][]*types.Issue)
|
||||
for _, issue := range issues {
|
||||
minute := issue.CreatedAt.Unix() / 60
|
||||
issuesByMinute[minute] = append(issuesByMinute[minute], issue)
|
||||
}
|
||||
|
||||
for _, issue := range issues {
|
||||
score := 0.0
|
||||
var reasons []string
|
||||
|
||||
title := strings.ToLower(issue.Title)
|
||||
|
||||
// Check for test prefixes (strong signal)
|
||||
if testPrefixPattern.MatchString(title) {
|
||||
score += 0.7
|
||||
reasons = append(reasons, "Title starts with test prefix")
|
||||
}
|
||||
|
||||
// Check for sequential numbering (medium signal)
|
||||
if sequentialPattern.MatchString(issue.ID) && len(issue.Description) < 20 {
|
||||
score += 0.4
|
||||
reasons = append(reasons, "Sequential ID with minimal description")
|
||||
}
|
||||
|
||||
// Check for generic/empty description (weak signal)
|
||||
if len(strings.TrimSpace(issue.Description)) == 0 {
|
||||
score += 0.2
|
||||
reasons = append(reasons, "No description")
|
||||
} else if len(issue.Description) < 20 {
|
||||
score += 0.1
|
||||
reasons = append(reasons, "Very short description")
|
||||
}
|
||||
|
||||
// Check for rapid creation (created with many others in same minute)
|
||||
minute := issue.CreatedAt.Unix() / 60
|
||||
if len(issuesByMinute[minute]) >= 10 {
|
||||
score += 0.3
|
||||
reasons = append(reasons, fmt.Sprintf("Created with %d other issues in same minute", len(issuesByMinute[minute])-1))
|
||||
}
|
||||
|
||||
// Check for generic test titles
|
||||
if strings.Contains(title, "issue for testing") ||
|
||||
strings.Contains(title, "test issue") ||
|
||||
strings.Contains(title, "sample issue") {
|
||||
score += 0.5
|
||||
reasons = append(reasons, "Generic test title")
|
||||
}
|
||||
|
||||
// Only include if score is above threshold
|
||||
if score >= 0.7 {
|
||||
results = append(results, pollutionResult{
|
||||
issue: issue,
|
||||
score: score,
|
||||
reasons: reasons,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
func backupPollutedIssues(polluted []pollutionResult, path string) error {
|
||||
// Create backup file
|
||||
file, err := os.Create(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create backup file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Write each issue as JSONL
|
||||
for _, p := range polluted {
|
||||
data, err := json.Marshal(p.issue)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal issue %s: %w", p.issue.ID, err)
|
||||
}
|
||||
|
||||
if _, err := file.WriteString(string(data) + "\n"); err != nil {
|
||||
return fmt.Errorf("failed to write issue %s: %w", p.issue.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
detectPollutionCmd.Flags().Bool("clean", false, "Delete detected test issues")
|
||||
detectPollutionCmd.Flags().Bool("yes", false, "Skip confirmation prompt")
|
||||
rootCmd.AddCommand(detectPollutionCmd)
|
||||
}
|
||||
Reference in New Issue
Block a user