Files
beads/cmd/bd/detect_pollution.go
Steve Yegge 9dda75ef15 docs: Document deprecation timeline and add removal warnings (bd-pndo)
- Add Deprecated section to CHANGELOG.md listing all deprecated commands
- Set removal target: v1.0.0 for all deprecated commands
- Add '(will be removed in v1.0.0)' to all deprecation messages
- Add proper Deprecated field to admin_aliases.go (cleanup, compact, reset)
- Remove manual warning prints from admin aliases (Cobra handles it)

Deprecated commands documented:
- bd relate/unrelate → bd dep relate/unrelate
- bd daemons → bd daemon <subcommand>
- bd cleanup/compact/reset → bd admin <command>
- bd comment → bd comments add
- bd template* → bd mol/formula commands
- bd detect-pollution → bd doctor --check=pollution
- bd migrate-* → bd migrate <subcommand>

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-28 16:40:43 -08:00

290 lines
8.5 KiB
Go

package main
import (
"encoding/json"
"fmt"
"os"
"regexp"
"strings"
"github.com/spf13/cobra"
"github.com/steveyegge/beads/internal/types"
"github.com/steveyegge/beads/internal/ui"
)
// showDetectPollutionDeprecationHint shows a hint about bd doctor consolidation
func showDetectPollutionDeprecationHint() {
fmt.Fprintln(os.Stderr, ui.RenderMuted("💡 Tip: Use 'bd doctor --check=pollution' instead (this command is deprecated)"))
}
var detectPollutionCmd = &cobra.Command{
Use: "detect-pollution",
GroupID: "maint",
Hidden: true, // deprecated, use 'bd doctor --check=pollution' instead
Deprecated: "use 'bd doctor --check=pollution' instead (will be removed in v1.0.0)",
Short: "Detect and optionally clean test issues from database",
Long: `Detect test issues that leaked into production database using pattern matching.
This command finds issues that appear to be test data based on:
- Titles starting with 'test', 'benchmark', 'sample', 'tmp', 'temp'
- Sequential numbering patterns (test-1, test-2, ...)
- Generic or missing descriptions
- Created in rapid succession (potential script/automation artifacts)
USE CASES:
- Cleaning up after testing in a production database
- Identifying accidental test data from CI/automation
- Database hygiene after development experiments
- Quality checks before database backups
EXAMPLES:
bd detect-pollution # Show potential test issues
bd detect-pollution --clean # Delete test issues (with confirmation)
bd detect-pollution --clean --yes # Delete without confirmation
bd detect-pollution --json # Output in JSON format
NOTE: Review detected issues carefully before using --clean. False positives are possible.`,
Run: func(cmd *cobra.Command, _ []string) {
// Check daemon mode - not supported yet (uses direct storage access)
if daemonClient != nil {
fmt.Fprintf(os.Stderr, "Error: detect-pollution command not yet supported in daemon mode\n")
fmt.Fprintf(os.Stderr, "Use: bd --no-daemon detect-pollution\n")
os.Exit(1)
}
clean, _ := cmd.Flags().GetBool("clean")
yes, _ := cmd.Flags().GetBool("yes")
ctx := rootCtx
// Get all issues
allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
if err != nil {
fmt.Fprintf(os.Stderr, "Error fetching issues: %v\n", err)
os.Exit(1)
}
// Detect pollution
polluted := detectTestPollution(allIssues)
if len(polluted) == 0 {
if !jsonOutput {
fmt.Println("No test pollution detected!")
} else {
outputJSON(map[string]interface{}{
"polluted_count": 0,
"issues": []interface{}{},
})
}
return
}
// Categorize by confidence
highConfidence := []pollutionResult{}
mediumConfidence := []pollutionResult{}
for _, p := range polluted {
if p.score >= 0.9 {
highConfidence = append(highConfidence, p)
} else {
mediumConfidence = append(mediumConfidence, p)
}
}
if jsonOutput {
result := map[string]interface{}{
"polluted_count": len(polluted),
"high_confidence": len(highConfidence),
"medium_confidence": len(mediumConfidence),
"issues": []map[string]interface{}{},
}
for _, p := range polluted {
result["issues"] = append(result["issues"].([]map[string]interface{}), map[string]interface{}{
"id": p.issue.ID,
"title": p.issue.Title,
"score": p.score,
"reasons": p.reasons,
"created_at": p.issue.CreatedAt,
})
}
outputJSON(result)
return
}
// Human-readable output
fmt.Printf("Found %d potential test issues:\n\n", len(polluted))
if len(highConfidence) > 0 {
fmt.Printf("High Confidence (score ≥ 0.9):\n")
for _, p := range highConfidence {
fmt.Printf(" %s: %q (score: %.2f)\n", p.issue.ID, p.issue.Title, p.score)
for _, reason := range p.reasons {
fmt.Printf(" - %s\n", reason)
}
}
fmt.Printf(" (Total: %d issues)\n\n", len(highConfidence))
}
if len(mediumConfidence) > 0 {
fmt.Printf("Medium Confidence (score 0.7-0.9):\n")
for _, p := range mediumConfidence {
fmt.Printf(" %s: %q (score: %.2f)\n", p.issue.ID, p.issue.Title, p.score)
for _, reason := range p.reasons {
fmt.Printf(" - %s\n", reason)
}
}
fmt.Printf(" (Total: %d issues)\n\n", len(mediumConfidence))
}
if !clean {
fmt.Printf("Run 'bd detect-pollution --clean' to delete these issues (with confirmation).\n")
// Show hint about doctor consolidation
showDetectPollutionDeprecationHint()
return
}
// Confirmation prompt
if !yes {
fmt.Printf("\nDelete %d test issues? [y/N] ", len(polluted))
var response string
_, _ = fmt.Scanln(&response)
if strings.ToLower(response) != "y" {
fmt.Println("Canceled.")
return
}
}
// Backup to JSONL before deleting
backupPath := ".beads/pollution-backup.jsonl"
if err := backupPollutedIssues(polluted, backupPath); err != nil {
fmt.Fprintf(os.Stderr, "Error backing up issues: %v\n", err)
os.Exit(1)
}
fmt.Printf("Backed up %d issues to %s\n", len(polluted), backupPath)
// Delete issues
fmt.Printf("\nDeleting %d issues...\n", len(polluted))
deleted := 0
for _, p := range polluted {
if err := deleteIssue(ctx, p.issue.ID); err != nil {
fmt.Fprintf(os.Stderr, "Error deleting %s: %v\n", p.issue.ID, err)
continue
}
deleted++
}
// Schedule auto-flush
markDirtyAndScheduleFlush()
fmt.Printf("%s Deleted %d test issues\n", ui.RenderPass("✓"), deleted)
fmt.Printf("\nCleanup complete. To restore, run: bd import %s\n", backupPath)
},
}
type pollutionResult struct {
issue *types.Issue
score float64
reasons []string
}
func detectTestPollution(issues []*types.Issue) []pollutionResult {
var results []pollutionResult
// Patterns for test issue titles
testPrefixPattern := regexp.MustCompile(`^(test|benchmark|sample|tmp|temp|debug|dummy)[-_\s]`)
sequentialPattern := regexp.MustCompile(`^[a-z]+-\d+$`)
// Group issues by creation time to detect rapid succession
issuesByMinute := make(map[int64][]*types.Issue)
for _, issue := range issues {
minute := issue.CreatedAt.Unix() / 60
issuesByMinute[minute] = append(issuesByMinute[minute], issue)
}
for _, issue := range issues {
score := 0.0
var reasons []string
title := strings.ToLower(issue.Title)
// Check for test prefixes (strong signal)
if testPrefixPattern.MatchString(title) {
score += 0.7
reasons = append(reasons, "Title starts with test prefix")
}
// Check for sequential numbering (medium signal)
if sequentialPattern.MatchString(issue.ID) && len(issue.Description) < 20 {
score += 0.4
reasons = append(reasons, "Sequential ID with minimal description")
}
// Check for generic/empty description (weak signal)
if len(strings.TrimSpace(issue.Description)) == 0 {
score += 0.2
reasons = append(reasons, "No description")
} else if len(issue.Description) < 20 {
score += 0.1
reasons = append(reasons, "Very short description")
}
// Check for rapid creation (created with many others in same minute)
minute := issue.CreatedAt.Unix() / 60
if len(issuesByMinute[minute]) >= 10 {
score += 0.3
reasons = append(reasons, fmt.Sprintf("Created with %d other issues in same minute", len(issuesByMinute[minute])-1))
}
// Check for generic test titles
if strings.Contains(title, "issue for testing") ||
strings.Contains(title, "test issue") ||
strings.Contains(title, "sample issue") {
score += 0.5
reasons = append(reasons, "Generic test title")
}
// Only include if score is above threshold
if score >= 0.7 {
results = append(results, pollutionResult{
issue: issue,
score: score,
reasons: reasons,
})
}
}
return results
}
func backupPollutedIssues(polluted []pollutionResult, path string) error {
// Create backup file
// nolint:gosec // G304: path is provided by user as explicit backup location
file, err := os.Create(path)
if err != nil {
return fmt.Errorf("failed to create backup file: %w", err)
}
defer file.Close()
// Write each issue as JSONL
for _, p := range polluted {
data, err := json.Marshal(p.issue)
if err != nil {
return fmt.Errorf("failed to marshal issue %s: %w", p.issue.ID, err)
}
if _, err := file.WriteString(string(data) + "\n"); err != nil {
return fmt.Errorf("failed to write issue %s: %w", p.issue.ID, err)
}
}
return nil
}
func init() {
detectPollutionCmd.Flags().Bool("clean", false, "Delete detected test issues")
detectPollutionCmd.Flags().Bool("yes", false, "Skip confirmation prompt")
rootCmd.AddCommand(detectPollutionCmd)
}