Files
beads/cmd/bd/detect_pollution.go
Steve Yegge 57253f93a3 Context propagation with graceful cancellation (bd-rtp, bd-yb8, bd-2o2)
Complete implementation of signal-aware context propagation for graceful
cancellation across all commands and storage operations.

Key changes:

1. Signal-aware contexts (bd-rtp):
   - Added rootCtx/rootCancel in main.go using signal.NotifyContext()
   - Set up in PersistentPreRun, cancelled in PersistentPostRun
   - Daemon uses same pattern in runDaemonLoop()
   - Handles SIGINT/SIGTERM for graceful shutdown

2. Context propagation (bd-yb8):
   - All commands now use rootCtx instead of context.Background()
   - sqlite.New() receives context for cancellable operations
   - Database operations respect context cancellation
   - Storage layer propagates context through all queries

3. Cancellation tests (bd-2o2):
   - Added import_cancellation_test.go with comprehensive tests
   - Added export cancellation test in export_test.go
   - Tests verify database integrity after cancellation
   - All cancellation tests passing

Fixes applied during review:
   - Fixed rootCtx lifecycle (removed premature defer from PersistentPreRun)
   - Fixed test context contamination (reset rootCtx in test cleanup)
   - Fixed export tests missing context setup

Impact:
   - Pressing Ctrl+C during import/export now cancels gracefully
   - No database corruption or hanging transactions
   - Clean shutdown of all operations

Tested:
   - go build ./cmd/bd ✓
   - go test ./cmd/bd -run TestImportCancellation ✓
   - go test ./cmd/bd -run TestExportCommand ✓
   - Manual Ctrl+C testing verified

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-20 21:57:23 -05:00

271 lines
7.6 KiB
Go

package main
import (
"encoding/json"
"fmt"
"os"
"regexp"
"strings"
"github.com/fatih/color"
"github.com/spf13/cobra"
"github.com/steveyegge/beads/internal/types"
)
var detectPollutionCmd = &cobra.Command{
Use: "detect-pollution",
Short: "Detect test issues that leaked into production database",
Long: `Detect test issues using pattern matching:
- Titles starting with 'test', 'benchmark', 'sample', 'tmp', 'temp'
- Sequential numbering (test-1, test-2, ...)
- Generic descriptions or no description
- Created in rapid succession
Example:
bd detect-pollution # Show potential test issues
bd detect-pollution --clean # Delete test issues (with confirmation)
bd detect-pollution --clean --yes # Delete without confirmation
bd detect-pollution --json # Output in JSON format`,
Run: func(cmd *cobra.Command, _ []string) {
// Check daemon mode - not supported yet (uses direct storage access)
if daemonClient != nil {
fmt.Fprintf(os.Stderr, "Error: detect-pollution command not yet supported in daemon mode\n")
fmt.Fprintf(os.Stderr, "Use: bd --no-daemon detect-pollution\n")
os.Exit(1)
}
clean, _ := cmd.Flags().GetBool("clean")
yes, _ := cmd.Flags().GetBool("yes")
ctx := rootCtx
// Get all issues
allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
if err != nil {
fmt.Fprintf(os.Stderr, "Error fetching issues: %v\n", err)
os.Exit(1)
}
// Detect pollution
polluted := detectTestPollution(allIssues)
if len(polluted) == 0 {
if !jsonOutput {
fmt.Println("No test pollution detected!")
} else {
outputJSON(map[string]interface{}{
"polluted_count": 0,
"issues": []interface{}{},
})
}
return
}
// Categorize by confidence
highConfidence := []pollutionResult{}
mediumConfidence := []pollutionResult{}
for _, p := range polluted {
if p.score >= 0.9 {
highConfidence = append(highConfidence, p)
} else {
mediumConfidence = append(mediumConfidence, p)
}
}
if jsonOutput {
result := map[string]interface{}{
"polluted_count": len(polluted),
"high_confidence": len(highConfidence),
"medium_confidence": len(mediumConfidence),
"issues": []map[string]interface{}{},
}
for _, p := range polluted {
result["issues"] = append(result["issues"].([]map[string]interface{}), map[string]interface{}{
"id": p.issue.ID,
"title": p.issue.Title,
"score": p.score,
"reasons": p.reasons,
"created_at": p.issue.CreatedAt,
})
}
outputJSON(result)
return
}
// Human-readable output
fmt.Printf("Found %d potential test issues:\n\n", len(polluted))
if len(highConfidence) > 0 {
fmt.Printf("High Confidence (score ≥ 0.9):\n")
for _, p := range highConfidence {
fmt.Printf(" %s: %q (score: %.2f)\n", p.issue.ID, p.issue.Title, p.score)
for _, reason := range p.reasons {
fmt.Printf(" - %s\n", reason)
}
}
fmt.Printf(" (Total: %d issues)\n\n", len(highConfidence))
}
if len(mediumConfidence) > 0 {
fmt.Printf("Medium Confidence (score 0.7-0.9):\n")
for _, p := range mediumConfidence {
fmt.Printf(" %s: %q (score: %.2f)\n", p.issue.ID, p.issue.Title, p.score)
for _, reason := range p.reasons {
fmt.Printf(" - %s\n", reason)
}
}
fmt.Printf(" (Total: %d issues)\n\n", len(mediumConfidence))
}
if !clean {
fmt.Printf("Run 'bd detect-pollution --clean' to delete these issues (with confirmation).\n")
return
}
// Confirmation prompt
if !yes {
fmt.Printf("\nDelete %d test issues? [y/N] ", len(polluted))
var response string
_, _ = fmt.Scanln(&response)
if strings.ToLower(response) != "y" {
fmt.Println("Canceled.")
return
}
}
// Backup to JSONL before deleting
backupPath := ".beads/pollution-backup.jsonl"
if err := backupPollutedIssues(polluted, backupPath); err != nil {
fmt.Fprintf(os.Stderr, "Error backing up issues: %v\n", err)
os.Exit(1)
}
fmt.Printf("Backed up %d issues to %s\n", len(polluted), backupPath)
// Delete issues
fmt.Printf("\nDeleting %d issues...\n", len(polluted))
deleted := 0
for _, p := range polluted {
if err := deleteIssue(ctx, p.issue.ID); err != nil {
fmt.Fprintf(os.Stderr, "Error deleting %s: %v\n", p.issue.ID, err)
continue
}
deleted++
}
// Schedule auto-flush
markDirtyAndScheduleFlush()
green := color.New(color.FgGreen).SprintFunc()
fmt.Printf("%s Deleted %d test issues\n", green("✓"), deleted)
fmt.Printf("\nCleanup complete. To restore, run: bd import %s\n", backupPath)
},
}
type pollutionResult struct {
issue *types.Issue
score float64
reasons []string
}
func detectTestPollution(issues []*types.Issue) []pollutionResult {
var results []pollutionResult
// Patterns for test issue titles
testPrefixPattern := regexp.MustCompile(`^(test|benchmark|sample|tmp|temp|debug|dummy)[-_\s]`)
sequentialPattern := regexp.MustCompile(`^[a-z]+-\d+$`)
// Group issues by creation time to detect rapid succession
issuesByMinute := make(map[int64][]*types.Issue)
for _, issue := range issues {
minute := issue.CreatedAt.Unix() / 60
issuesByMinute[minute] = append(issuesByMinute[minute], issue)
}
for _, issue := range issues {
score := 0.0
var reasons []string
title := strings.ToLower(issue.Title)
// Check for test prefixes (strong signal)
if testPrefixPattern.MatchString(title) {
score += 0.7
reasons = append(reasons, "Title starts with test prefix")
}
// Check for sequential numbering (medium signal)
if sequentialPattern.MatchString(issue.ID) && len(issue.Description) < 20 {
score += 0.4
reasons = append(reasons, "Sequential ID with minimal description")
}
// Check for generic/empty description (weak signal)
if len(strings.TrimSpace(issue.Description)) == 0 {
score += 0.2
reasons = append(reasons, "No description")
} else if len(issue.Description) < 20 {
score += 0.1
reasons = append(reasons, "Very short description")
}
// Check for rapid creation (created with many others in same minute)
minute := issue.CreatedAt.Unix() / 60
if len(issuesByMinute[minute]) >= 10 {
score += 0.3
reasons = append(reasons, fmt.Sprintf("Created with %d other issues in same minute", len(issuesByMinute[minute])-1))
}
// Check for generic test titles
if strings.Contains(title, "issue for testing") ||
strings.Contains(title, "test issue") ||
strings.Contains(title, "sample issue") {
score += 0.5
reasons = append(reasons, "Generic test title")
}
// Only include if score is above threshold
if score >= 0.7 {
results = append(results, pollutionResult{
issue: issue,
score: score,
reasons: reasons,
})
}
}
return results
}
func backupPollutedIssues(polluted []pollutionResult, path string) error {
// Create backup file
// nolint:gosec // G304: path is provided by user as explicit backup location
file, err := os.Create(path)
if err != nil {
return fmt.Errorf("failed to create backup file: %w", err)
}
defer file.Close()
// Write each issue as JSONL
for _, p := range polluted {
data, err := json.Marshal(p.issue)
if err != nil {
return fmt.Errorf("failed to marshal issue %s: %w", p.issue.ID, err)
}
if _, err := file.WriteString(string(data) + "\n"); err != nil {
return fmt.Errorf("failed to write issue %s: %w", p.issue.ID, err)
}
}
return nil
}
func init() {
detectPollutionCmd.Flags().Bool("clean", false, "Delete detected test issues")
detectPollutionCmd.Flags().Bool("yes", false, "Skip confirmation prompt")
rootCmd.AddCommand(detectPollutionCmd)
}