feat(sync): prevent zombie resurrection from stale clones
Add JSONL sanitization after git pull to remove deleted issues that git's 3-way merge may resurrect. Also add bd doctor check to hydrate deletions.jsonl from git history for pre-v0.25.0 deletions. Changes: - Add sanitizeJSONLWithDeletions() in sync.go (Step 3.6) - Add checkDeletionsManifest() in doctor.go (Check 18) - Add HydrateDeletionsManifest() fix in doctor/fix/deletions.go - Add looksLikeIssueID() validation to prevent false positives - Add comprehensive tests for sanitization logic 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
223
cmd/bd/doctor/fix/deletions.go
Normal file
223
cmd/bd/doctor/fix/deletions.go
Normal file
@@ -0,0 +1,223 @@
|
||||
package fix
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/steveyegge/beads/internal/deletions"
|
||||
)
|
||||
|
||||
// HydrateDeletionsManifest populates deletions.jsonl from git history.
|
||||
// It finds all issue IDs that were ever in the JSONL but are no longer present,
|
||||
// and adds them to the deletions manifest.
|
||||
func HydrateDeletionsManifest(path string) error {
|
||||
if err := validateBeadsWorkspace(path); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
beadsDir := filepath.Join(path, ".beads")
|
||||
jsonlPath := filepath.Join(beadsDir, "beads.jsonl")
|
||||
|
||||
// Also check for legacy issues.jsonl
|
||||
if _, err := os.Stat(jsonlPath); os.IsNotExist(err) {
|
||||
legacyPath := filepath.Join(beadsDir, "issues.jsonl")
|
||||
if _, err := os.Stat(legacyPath); err == nil {
|
||||
jsonlPath = legacyPath
|
||||
} else {
|
||||
return fmt.Errorf("no JSONL file found in .beads/")
|
||||
}
|
||||
}
|
||||
|
||||
// Load existing deletions manifest to avoid duplicates
|
||||
deletionsPath := deletions.DefaultPath(beadsDir)
|
||||
existingDeletions, err := deletions.LoadDeletions(deletionsPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load existing deletions: %w", err)
|
||||
}
|
||||
|
||||
// Get current IDs from JSONL
|
||||
currentIDs, err := getCurrentJSONLIDs(jsonlPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read current JSONL: %w", err)
|
||||
}
|
||||
|
||||
// Get historical IDs from git
|
||||
historicalIDs, err := getHistoricalJSONLIDs(path, jsonlPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get historical IDs from git: %w", err)
|
||||
}
|
||||
|
||||
// Find deleted IDs (in history but not in current, and not already in manifest)
|
||||
var deletedIDs []string
|
||||
for id := range historicalIDs {
|
||||
if !currentIDs[id] {
|
||||
// Skip if already in deletions manifest
|
||||
if _, exists := existingDeletions.Records[id]; exists {
|
||||
continue
|
||||
}
|
||||
deletedIDs = append(deletedIDs, id)
|
||||
}
|
||||
}
|
||||
|
||||
if len(deletedIDs) == 0 {
|
||||
fmt.Println(" No new deleted issues found in git history")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Add to deletions manifest
|
||||
now := time.Now()
|
||||
|
||||
for _, id := range deletedIDs {
|
||||
record := deletions.DeletionRecord{
|
||||
ID: id,
|
||||
Timestamp: now,
|
||||
Actor: "bd-doctor-hydrate",
|
||||
Reason: "Hydrated from git history",
|
||||
}
|
||||
if err := deletions.AppendDeletion(deletionsPath, record); err != nil {
|
||||
return fmt.Errorf("failed to append deletion record for %s: %w", id, err)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf(" Added %d deletion records to manifest\n", len(deletedIDs))
|
||||
return nil
|
||||
}
|
||||
|
||||
// getCurrentJSONLIDs reads the current JSONL file and returns a set of IDs.
|
||||
func getCurrentJSONLIDs(jsonlPath string) (map[string]bool, error) {
|
||||
ids := make(map[string]bool)
|
||||
|
||||
file, err := os.Open(jsonlPath) // #nosec G304 - path validated by caller
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return ids, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
scanner.Buffer(make([]byte, 0, 64*1024), 10*1024*1024)
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var issue struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
if err := json.Unmarshal(line, &issue); err != nil {
|
||||
continue
|
||||
}
|
||||
if issue.ID != "" {
|
||||
ids[issue.ID] = true
|
||||
}
|
||||
}
|
||||
|
||||
return ids, scanner.Err()
|
||||
}
|
||||
|
||||
// getHistoricalJSONLIDs uses git log to find all IDs that were ever in the JSONL.
|
||||
func getHistoricalJSONLIDs(repoPath, jsonlPath string) (map[string]bool, error) {
|
||||
// Get the relative path for the JSONL file
|
||||
relPath, err := filepath.Rel(repoPath, jsonlPath)
|
||||
if err != nil {
|
||||
relPath = jsonlPath
|
||||
}
|
||||
|
||||
// Use the commit-by-commit approach which is more memory efficient
|
||||
// and allows us to properly parse JSON rather than regex matching
|
||||
return getHistoricalIDsViaDiff(repoPath, relPath)
|
||||
}
|
||||
|
||||
// looksLikeIssueID validates that a string looks like a beads issue ID.
|
||||
// Issue IDs have the format: prefix-hash or prefix-number (e.g., bd-abc123, myproject-42)
|
||||
func looksLikeIssueID(id string) bool {
|
||||
if id == "" {
|
||||
return false
|
||||
}
|
||||
// Must contain at least one dash
|
||||
dashIdx := strings.Index(id, "-")
|
||||
if dashIdx <= 0 || dashIdx >= len(id)-1 {
|
||||
return false
|
||||
}
|
||||
// Prefix should be alphanumeric (letters/numbers/underscores)
|
||||
prefix := id[:dashIdx]
|
||||
for _, c := range prefix {
|
||||
if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') {
|
||||
return false
|
||||
}
|
||||
}
|
||||
// Suffix should be alphanumeric (base36 hash or number), may contain dots for children
|
||||
suffix := id[dashIdx+1:]
|
||||
for _, c := range suffix {
|
||||
if !((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '.') {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// getHistoricalIDsViaDiff walks through git history commit-by-commit to find all IDs.
|
||||
// This is more memory efficient than git log -p and allows proper JSON parsing.
|
||||
func getHistoricalIDsViaDiff(repoPath, relPath string) (map[string]bool, error) {
|
||||
ids := make(map[string]bool)
|
||||
|
||||
// Get list of all commits that touched the file
|
||||
cmd := exec.Command("git", "log", "--all", "--format=%H", "--", relPath)
|
||||
cmd.Dir = repoPath
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return ids, fmt.Errorf("git log failed: %w", err)
|
||||
}
|
||||
|
||||
commits := strings.Split(strings.TrimSpace(string(output)), "\n")
|
||||
if len(commits) == 0 || (len(commits) == 1 && commits[0] == "") {
|
||||
return ids, nil
|
||||
}
|
||||
|
||||
// For each commit, get the file content and extract IDs
|
||||
for _, commit := range commits {
|
||||
if commit == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Get file content at this commit
|
||||
showCmd := exec.Command("git", "show", commit+":"+relPath)
|
||||
showCmd.Dir = repoPath
|
||||
|
||||
content, err := showCmd.Output()
|
||||
if err != nil {
|
||||
// File might not exist at this commit
|
||||
continue
|
||||
}
|
||||
|
||||
// Parse each line for IDs
|
||||
scanner := bufio.NewScanner(strings.NewReader(string(content)))
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.Contains(line, `"id"`) {
|
||||
var issue struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(line), &issue); err == nil && issue.ID != "" {
|
||||
// Validate the ID looks like an issue ID to avoid false positives
|
||||
if looksLikeIssueID(issue.ID) {
|
||||
ids[issue.ID] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ids, nil
|
||||
}
|
||||
Reference in New Issue
Block a user