feat(deletions): auto-compact during sync and git history fallback fixes

- Add Count function to deletions package for fast line counting
- Add maybeAutoCompactDeletions to sync (opt-in via deletions.auto_compact config)
- Fix regex escaping in batchCheckGitHistory (bd-bgs)
- Add 30s timeout to git history commands (bd-f0n)
- Use git rev-parse --show-toplevel for proper repo root detection (bd-bhd)
- Add tests for Count and auto-compact functionality

Closes: bd-qsm, bd-bgs, bd-f0n, bd-bhd
This commit is contained in:
Steve Yegge
2025-11-25 15:08:12 -08:00
parent 2f5ef33c08
commit 4898c424aa
5 changed files with 439 additions and 10 deletions

View File

@@ -180,6 +180,35 @@ func DefaultPath(beadsDir string) string {
return filepath.Join(beadsDir, "deletions.jsonl")
}
// Count returns the number of lines in the deletions manifest.
// This is a fast operation that doesn't parse JSON, just counts lines.
// Returns 0 if the file doesn't exist or is empty.
func Count(path string) (int, error) {
f, err := os.Open(path) // #nosec G304 - controlled path from caller
if err != nil {
if os.IsNotExist(err) {
return 0, nil
}
return 0, fmt.Errorf("failed to open deletions file: %w", err)
}
defer f.Close()
count := 0
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
if line != "" {
count++
}
}
if err := scanner.Err(); err != nil {
return 0, fmt.Errorf("error reading deletions file: %w", err)
}
return count, nil
}
// DefaultRetentionDays is the default number of days to retain deletion records.
const DefaultRetentionDays = 7

View File

@@ -546,3 +546,64 @@ func TestPruneDeletions_ZeroRetention(t *testing.T) {
t.Errorf("expected 1 pruned with 0 retention, got %d", result.PrunedCount)
}
}
func TestCount_Empty(t *testing.T) {
// Non-existent file should return 0
count, err := Count("/nonexistent/path/deletions.jsonl")
if err != nil {
t.Fatalf("expected no error for non-existent file, got: %v", err)
}
if count != 0 {
t.Errorf("expected 0 count for non-existent file, got %d", count)
}
}
func TestCount_WithRecords(t *testing.T) {
tmpDir := t.TempDir()
path := filepath.Join(tmpDir, "deletions.jsonl")
now := time.Now()
records := []DeletionRecord{
{ID: "bd-001", Timestamp: now, Actor: "user1"},
{ID: "bd-002", Timestamp: now, Actor: "user2"},
{ID: "bd-003", Timestamp: now, Actor: "user3"},
}
for _, r := range records {
if err := AppendDeletion(path, r); err != nil {
t.Fatalf("AppendDeletion failed: %v", err)
}
}
count, err := Count(path)
if err != nil {
t.Fatalf("Count failed: %v", err)
}
if count != 3 {
t.Errorf("expected 3, got %d", count)
}
}
func TestCount_WithEmptyLines(t *testing.T) {
tmpDir := t.TempDir()
path := filepath.Join(tmpDir, "deletions.jsonl")
// Write content with empty lines
content := `{"id":"bd-001","ts":"2024-01-01T00:00:00Z","by":"user1"}
{"id":"bd-002","ts":"2024-01-02T00:00:00Z","by":"user2"}
`
if err := os.WriteFile(path, []byte(content), 0644); err != nil {
t.Fatalf("failed to write test file: %v", err)
}
count, err := Count(path)
if err != nil {
t.Fatalf("Count failed: %v", err)
}
// Should count only non-empty lines
if count != 2 {
t.Errorf("expected 2 (excluding empty lines), got %d", count)
}
}

View File

@@ -7,6 +7,7 @@ import (
"os"
"os/exec"
"path/filepath"
"regexp"
"sort"
"strings"
"time"
@@ -862,11 +863,34 @@ func checkGitHistoryForDeletions(beadsDir string, ids []string) []string {
return nil
}
// Get the repo root directory (parent of .beads)
repoRoot := filepath.Dir(beadsDir)
// Find the actual git repo root using git rev-parse (bd-bhd)
// This handles monorepos and nested projects where .beads isn't at repo root
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
cmd := exec.CommandContext(ctx, "git", "rev-parse", "--show-toplevel")
cmd.Dir = beadsDir
output, err := cmd.Output()
if err != nil {
// Not in a git repo or git not available - can't do history check
return nil
}
repoRoot := strings.TrimSpace(string(output))
// Compute relative path from repo root to beads.jsonl
// beadsDir is absolute, compute its path relative to repoRoot
absBeadsDir, err := filepath.Abs(beadsDir)
if err != nil {
return nil
}
relBeadsDir, err := filepath.Rel(repoRoot, absBeadsDir)
if err != nil {
return nil
}
// Build JSONL path relative to repo root
jsonlPath := filepath.Join(".beads", "beads.jsonl")
jsonlPath := filepath.Join(relBeadsDir, "beads.jsonl")
var deleted []string
@@ -888,15 +912,24 @@ func checkGitHistoryForDeletions(beadsDir string, ids []string) []string {
return deleted
}
// gitHistoryTimeout is the maximum time to wait for git history searches.
// Prevents hangs on large repositories (bd-f0n).
const gitHistoryTimeout = 30 * time.Second
// wasInGitHistory checks if a single ID was ever in the JSONL via git history.
// Returns true if the ID was found in history (meaning it was deleted).
func wasInGitHistory(repoRoot, jsonlPath, id string) bool {
// git log --all -S "\"id\":\"bd-xxx\"" --oneline -- .beads/beads.jsonl
// This searches for commits that added or removed the ID string
// Note: -S uses literal string matching, not regex, so no escaping needed
searchPattern := fmt.Sprintf(`"id":"%s"`, id)
// Use context with timeout to prevent hangs on large repos (bd-f0n)
ctx, cancel := context.WithTimeout(context.Background(), gitHistoryTimeout)
defer cancel()
// #nosec G204 - searchPattern is constructed from validated issue IDs
cmd := exec.Command("git", "log", "--all", "-S", searchPattern, "--oneline", "--", jsonlPath)
cmd := exec.CommandContext(ctx, "git", "log", "--all", "-S", searchPattern, "--oneline", "--", jsonlPath)
cmd.Dir = repoRoot
var stdout bytes.Buffer
@@ -904,7 +937,7 @@ func wasInGitHistory(repoRoot, jsonlPath, id string) bool {
cmd.Stderr = nil // Ignore stderr
if err := cmd.Run(); err != nil {
// Git command failed - could be shallow clone, not a git repo, etc.
// Git command failed - could be shallow clone, not a git repo, timeout, etc.
// Conservative: assume issue is local work, don't delete
return false
}
@@ -919,15 +952,21 @@ func wasInGitHistory(repoRoot, jsonlPath, id string) bool {
func batchCheckGitHistory(repoRoot, jsonlPath string, ids []string) []string {
// Build a regex pattern to match any of the IDs
// Pattern: "id":"bd-xxx"|"id":"bd-yyy"|...
// Escape regex special characters in IDs to avoid malformed patterns (bd-bgs)
patterns := make([]string, 0, len(ids))
for _, id := range ids {
patterns = append(patterns, fmt.Sprintf(`"id":"%s"`, id))
escapedID := regexp.QuoteMeta(id)
patterns = append(patterns, fmt.Sprintf(`"id":"%s"`, escapedID))
}
searchPattern := strings.Join(patterns, "|")
// Use context with timeout to prevent hangs on large repos (bd-f0n)
ctx, cancel := context.WithTimeout(context.Background(), gitHistoryTimeout)
defer cancel()
// Use git log -G (regex) for batch search
// #nosec G204 - searchPattern is constructed from validated issue IDs
cmd := exec.Command("git", "log", "--all", "-G", searchPattern, "-p", "--", jsonlPath)
cmd := exec.CommandContext(ctx, "git", "log", "--all", "-G", searchPattern, "-p", "--", jsonlPath)
cmd.Dir = repoRoot
var stdout bytes.Buffer
@@ -935,7 +974,8 @@ func batchCheckGitHistory(repoRoot, jsonlPath string, ids []string) []string {
cmd.Stderr = nil // Ignore stderr
if err := cmd.Run(); err != nil {
// Git command failed - fall back to individual checks
// Git command failed (timeout, shallow clone, etc.) - fall back to individual checks
// Individual checks also have timeout protection
var deleted []string
for _, id := range ids {
if wasInGitHistory(repoRoot, jsonlPath, id) {