Files
beads/cmd/bd/doctor/fix/deletions.go
matt wilkie a22d949cbd Fix bd-in7q: prevent migrate-tombstones from corrupting deletions manifest (#554)
Root cause: bd doctor hydrate was re-adding migrated tombstones to the
deletions manifest because getCurrentJSONLIDs() included all issues,
including tombstones. When compared against git history, tombstones
appeared as 'deleted' and were incorrectly added to the manifest as new
deletions, corrupting the database on next sync.

Fix: Skip tombstone-status issues in getCurrentJSONLIDs() so they don't
participate in deletion detection. Tombstones represent already-recorded
deletions/migrations and shouldn't be treated as active issues.

Changes:
- cmd/bd/doctor/fix/deletions.go: Skip tombstones in getCurrentJSONLIDs()
- cmd/bd/doctor/fix/deletions_test.go: New tests for tombstone skipping
- cmd/bd/migrate_tombstones_test.go: Test that tombstones are valid

This fixes the bug where 'bd migrate-tombstones' followed by 'bd sync'
would add thousands of deletion records with author 'bd-doctor-hydrate'
2025-12-14 14:11:27 -08:00

233 lines
6.5 KiB
Go

package fix
import (
"bufio"
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/steveyegge/beads/internal/deletions"
)
// HydrateDeletionsManifest populates deletions.jsonl from git history.
// It finds all issue IDs that were ever in the JSONL but are no longer present,
// and adds them to the deletions manifest.
func HydrateDeletionsManifest(path string) error {
if err := validateBeadsWorkspace(path); err != nil {
return err
}
beadsDir := filepath.Join(path, ".beads")
// bd-6xd: issues.jsonl is the canonical filename
jsonlPath := filepath.Join(beadsDir, "issues.jsonl")
// Also check for legacy beads.jsonl
if _, err := os.Stat(jsonlPath); os.IsNotExist(err) {
legacyPath := filepath.Join(beadsDir, "beads.jsonl")
if _, err := os.Stat(legacyPath); err == nil {
jsonlPath = legacyPath
} else {
return fmt.Errorf("no JSONL file found in .beads/")
}
}
// Load existing deletions manifest to avoid duplicates
deletionsPath := deletions.DefaultPath(beadsDir)
existingDeletions, err := deletions.LoadDeletions(deletionsPath)
if err != nil {
return fmt.Errorf("failed to load existing deletions: %w", err)
}
// Get current IDs from JSONL
currentIDs, err := getCurrentJSONLIDs(jsonlPath)
if err != nil {
return fmt.Errorf("failed to read current JSONL: %w", err)
}
// Get historical IDs from git
historicalIDs, err := getHistoricalJSONLIDs(path, jsonlPath)
if err != nil {
return fmt.Errorf("failed to get historical IDs from git: %w", err)
}
// Find deleted IDs (in history but not in current, and not already in manifest)
var deletedIDs []string
for id := range historicalIDs {
if !currentIDs[id] {
// Skip if already in deletions manifest
if _, exists := existingDeletions.Records[id]; exists {
continue
}
deletedIDs = append(deletedIDs, id)
}
}
if len(deletedIDs) == 0 {
// Create empty deletions manifest to signal hydration is complete
// This prevents the check from re-warning after --fix runs
if err := deletions.WriteDeletions(deletionsPath, nil); err != nil {
return fmt.Errorf("failed to create empty deletions manifest: %w", err)
}
fmt.Println(" No deleted issues found in git history (created empty manifest)")
return nil
}
// Add to deletions manifest
now := time.Now()
for _, id := range deletedIDs {
record := deletions.DeletionRecord{
ID: id,
Timestamp: now,
Actor: "bd-doctor-hydrate",
Reason: "Hydrated from git history",
}
if err := deletions.AppendDeletion(deletionsPath, record); err != nil {
return fmt.Errorf("failed to append deletion record for %s: %w", id, err)
}
}
fmt.Printf(" Added %d deletion records to manifest\n", len(deletedIDs))
return nil
}
// getCurrentJSONLIDs reads the current JSONL file and returns a set of IDs.
func getCurrentJSONLIDs(jsonlPath string) (map[string]bool, error) {
ids := make(map[string]bool)
file, err := os.Open(jsonlPath) // #nosec G304 - path validated by caller
if err != nil {
if os.IsNotExist(err) {
return ids, nil
}
return nil, err
}
defer file.Close()
scanner := bufio.NewScanner(file)
scanner.Buffer(make([]byte, 0, 64*1024), 10*1024*1024)
for scanner.Scan() {
line := scanner.Bytes()
if len(line) == 0 {
continue
}
var issue struct {
ID string `json:"id"`
Status string `json:"status"`
}
if err := json.Unmarshal(line, &issue); err != nil {
continue
}
// Skip tombstones - they represent migrated deletions and shouldn't
// be re-added to the deletions manifest (bd-in7q fix)
if issue.ID != "" && issue.Status != "tombstone" {
ids[issue.ID] = true
}
}
return ids, scanner.Err()
}
// getHistoricalJSONLIDs uses git log to find all IDs that were ever in the JSONL.
func getHistoricalJSONLIDs(repoPath, jsonlPath string) (map[string]bool, error) {
// Get the relative path for the JSONL file
relPath, err := filepath.Rel(repoPath, jsonlPath)
if err != nil {
relPath = jsonlPath
}
// Use the commit-by-commit approach which is more memory efficient
// and allows us to properly parse JSON rather than regex matching
return getHistoricalIDsViaDiff(repoPath, relPath)
}
// looksLikeIssueID validates that a string looks like a beads issue ID.
// Issue IDs have the format: prefix-hash or prefix-number (e.g., bd-abc123, myproject-42)
func looksLikeIssueID(id string) bool {
if id == "" {
return false
}
// Must contain at least one dash
dashIdx := strings.Index(id, "-")
if dashIdx <= 0 || dashIdx >= len(id)-1 {
return false
}
// Prefix should be alphanumeric (letters/numbers/underscores)
prefix := id[:dashIdx]
for _, c := range prefix {
if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') {
return false
}
}
// Suffix should be alphanumeric (base36 hash or number), may contain dots for children
suffix := id[dashIdx+1:]
for _, c := range suffix {
if !((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '.') {
return false
}
}
return true
}
// getHistoricalIDsViaDiff walks through git history commit-by-commit to find all IDs.
// This is more memory efficient than git log -p and allows proper JSON parsing.
func getHistoricalIDsViaDiff(repoPath, relPath string) (map[string]bool, error) {
ids := make(map[string]bool)
// Get list of all commits that touched the file
cmd := exec.Command("git", "log", "--all", "--format=%H", "--", relPath)
cmd.Dir = repoPath
output, err := cmd.Output()
if err != nil {
return ids, fmt.Errorf("git log failed: %w", err)
}
commits := strings.Split(strings.TrimSpace(string(output)), "\n")
if len(commits) == 0 || (len(commits) == 1 && commits[0] == "") {
return ids, nil
}
// For each commit, get the file content and extract IDs
for _, commit := range commits {
if commit == "" {
continue
}
// Get file content at this commit
showCmd := exec.Command("git", "show", commit+":"+relPath) // #nosec G204 - args are from git log output
showCmd.Dir = repoPath
content, err := showCmd.Output()
if err != nil {
// File might not exist at this commit
continue
}
// Parse each line for IDs
scanner := bufio.NewScanner(strings.NewReader(string(content)))
for scanner.Scan() {
line := scanner.Text()
if strings.Contains(line, `"id"`) {
var issue struct {
ID string `json:"id"`
}
if err := json.Unmarshal([]byte(line), &issue); err == nil && issue.ID != "" {
// Validate the ID looks like an issue ID to avoid false positives
if looksLikeIssueID(issue.ID) {
ids[issue.ID] = true
}
}
}
}
}
return ids, nil
}