feat(deletions): add pruning and git history fallback

Implements two P1 tasks for the deletions manifest epic:

bd-v2x: Add deletions pruning to bd compact
- PruneDeletions function removes records older than retention period
- Default retention: 7 days (configurable via metadata.json)
- CLI --retention flag for override
- Atomic file rewrite prevents corruption
- Called automatically during all compact operations

bd-pnm: Add git history fallback for pruned deletions
- Catches deletions where manifest entry was pruned
- Uses git log -S to search for ID in JSONL history
- Batches multiple IDs for efficiency (git -G regex)
- Self-healing: backfills manifest on hit
- Conservative: keeps issue if git check fails (shallow clone)

Tests added for both features with edge cases covered.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Steve Yegge
2025-11-25 12:41:29 -08:00
parent 1804a91787
commit 3f84ec3774
7 changed files with 587 additions and 22 deletions

View File

@@ -13,6 +13,9 @@ type Config struct {
Database string `json:"database"`
JSONLExport string `json:"jsonl_export,omitempty"`
LastBdVersion string `json:"last_bd_version,omitempty"`
// Deletions configuration
DeletionsRetentionDays int `json:"deletions_retention_days,omitempty"` // 0 means use default (7 days)
}
func DefaultConfig() *Config {
@@ -94,3 +97,14 @@ func (c *Config) JSONLPath(beadsDir string) string {
}
return filepath.Join(beadsDir, c.JSONLExport)
}
// DefaultDeletionsRetentionDays is the default retention period for deletion records.
const DefaultDeletionsRetentionDays = 7
// GetDeletionsRetentionDays returns the configured retention days, or the default if not set.
func (c *Config) GetDeletionsRetentionDays() int {
if c.DeletionsRetentionDays <= 0 {
return DefaultDeletionsRetentionDays
}
return c.DeletionsRetentionDays
}

View File

@@ -114,8 +114,46 @@ func TestConfigPath(t *testing.T) {
beadsDir := "/home/user/project/.beads"
got := ConfigPath(beadsDir)
want := filepath.Join(beadsDir, "metadata.json")
if got != want {
t.Errorf("ConfigPath() = %q, want %q", got, want)
}
}
func TestGetDeletionsRetentionDays(t *testing.T) {
tests := []struct {
name string
cfg *Config
want int
}{
{
name: "zero uses default",
cfg: &Config{DeletionsRetentionDays: 0},
want: DefaultDeletionsRetentionDays,
},
{
name: "negative uses default",
cfg: &Config{DeletionsRetentionDays: -5},
want: DefaultDeletionsRetentionDays,
},
{
name: "custom value",
cfg: &Config{DeletionsRetentionDays: 14},
want: 14,
},
{
name: "minimum value 1",
cfg: &Config{DeletionsRetentionDays: 1},
want: 1,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := tt.cfg.GetDeletionsRetentionDays()
if got != tt.want {
t.Errorf("GetDeletionsRetentionDays() = %d, want %d", got, tt.want)
}
})
}
}

View File

@@ -179,3 +179,54 @@ func WriteDeletions(path string, records []DeletionRecord) error {
func DefaultPath(beadsDir string) string {
return filepath.Join(beadsDir, "deletions.jsonl")
}
// DefaultRetentionDays is the default number of days to retain deletion records.
const DefaultRetentionDays = 7
// PruneResult contains the result of a prune operation.
type PruneResult struct {
KeptCount int
PrunedCount int
PrunedIDs []string
}
// PruneDeletions removes deletion records older than the specified retention period.
// Returns PruneResult with counts and IDs of pruned records.
// If the file doesn't exist or is empty, returns zero counts with no error.
func PruneDeletions(path string, retentionDays int) (*PruneResult, error) {
result := &PruneResult{
PrunedIDs: []string{},
}
loadResult, err := LoadDeletions(path)
if err != nil {
return nil, fmt.Errorf("failed to load deletions: %w", err)
}
if len(loadResult.Records) == 0 {
return result, nil
}
cutoff := time.Now().AddDate(0, 0, -retentionDays)
var kept []DeletionRecord
for _, record := range loadResult.Records {
if record.Timestamp.After(cutoff) || record.Timestamp.Equal(cutoff) {
kept = append(kept, record)
} else {
result.PrunedCount++
result.PrunedIDs = append(result.PrunedIDs, record.ID)
}
}
result.KeptCount = len(kept)
// Only rewrite if we actually pruned something
if result.PrunedCount > 0 {
if err := WriteDeletions(path, kept); err != nil {
return nil, fmt.Errorf("failed to write pruned deletions: %w", err)
}
}
return result, nil
}

View File

@@ -333,3 +333,216 @@ func TestAppendDeletion_EmptyID(t *testing.T) {
t.Errorf("unexpected error message: %v", err)
}
}
func TestPruneDeletions_Empty(t *testing.T) {
tmpDir := t.TempDir()
path := filepath.Join(tmpDir, "deletions.jsonl")
// Prune non-existent file should succeed
result, err := PruneDeletions(path, 7)
if err != nil {
t.Fatalf("PruneDeletions should not fail on non-existent file: %v", err)
}
if result.KeptCount != 0 {
t.Errorf("expected 0 kept, got %d", result.KeptCount)
}
if result.PrunedCount != 0 {
t.Errorf("expected 0 pruned, got %d", result.PrunedCount)
}
}
func TestPruneDeletions_AllRecent(t *testing.T) {
tmpDir := t.TempDir()
path := filepath.Join(tmpDir, "deletions.jsonl")
now := time.Now()
records := []DeletionRecord{
{ID: "bd-001", Timestamp: now.Add(-1 * time.Hour), Actor: "user1"},
{ID: "bd-002", Timestamp: now.Add(-2 * time.Hour), Actor: "user2"},
{ID: "bd-003", Timestamp: now.Add(-3 * time.Hour), Actor: "user3"},
}
// Write records
for _, r := range records {
if err := AppendDeletion(path, r); err != nil {
t.Fatalf("AppendDeletion failed: %v", err)
}
}
// Prune with 7 day retention - nothing should be pruned
result, err := PruneDeletions(path, 7)
if err != nil {
t.Fatalf("PruneDeletions failed: %v", err)
}
if result.KeptCount != 3 {
t.Errorf("expected 3 kept, got %d", result.KeptCount)
}
if result.PrunedCount != 0 {
t.Errorf("expected 0 pruned, got %d", result.PrunedCount)
}
// Verify file unchanged
loaded, err := LoadDeletions(path)
if err != nil {
t.Fatalf("LoadDeletions failed: %v", err)
}
if len(loaded.Records) != 3 {
t.Errorf("expected 3 records after prune, got %d", len(loaded.Records))
}
}
func TestPruneDeletions_SomeOld(t *testing.T) {
tmpDir := t.TempDir()
path := filepath.Join(tmpDir, "deletions.jsonl")
now := time.Now()
// Two recent, two old
records := []DeletionRecord{
{ID: "bd-001", Timestamp: now.Add(-1 * time.Hour), Actor: "user1"}, // Recent
{ID: "bd-002", Timestamp: now.AddDate(0, 0, -10), Actor: "user2"}, // 10 days old
{ID: "bd-003", Timestamp: now.Add(-2 * time.Hour), Actor: "user3"}, // Recent
{ID: "bd-004", Timestamp: now.AddDate(0, 0, -15), Actor: "user4"}, // 15 days old
}
// Write records
for _, r := range records {
if err := AppendDeletion(path, r); err != nil {
t.Fatalf("AppendDeletion failed: %v", err)
}
}
// Prune with 7 day retention
result, err := PruneDeletions(path, 7)
if err != nil {
t.Fatalf("PruneDeletions failed: %v", err)
}
if result.KeptCount != 2 {
t.Errorf("expected 2 kept, got %d", result.KeptCount)
}
if result.PrunedCount != 2 {
t.Errorf("expected 2 pruned, got %d", result.PrunedCount)
}
// Verify pruned IDs
prunedMap := make(map[string]bool)
for _, id := range result.PrunedIDs {
prunedMap[id] = true
}
if !prunedMap["bd-002"] || !prunedMap["bd-004"] {
t.Errorf("expected bd-002 and bd-004 to be pruned, got %v", result.PrunedIDs)
}
// Verify file was updated
loaded, err := LoadDeletions(path)
if err != nil {
t.Fatalf("LoadDeletions failed: %v", err)
}
if len(loaded.Records) != 2 {
t.Errorf("expected 2 records after prune, got %d", len(loaded.Records))
}
if _, ok := loaded.Records["bd-001"]; !ok {
t.Error("expected bd-001 to remain")
}
if _, ok := loaded.Records["bd-003"]; !ok {
t.Error("expected bd-003 to remain")
}
}
func TestPruneDeletions_AllOld(t *testing.T) {
tmpDir := t.TempDir()
path := filepath.Join(tmpDir, "deletions.jsonl")
now := time.Now()
records := []DeletionRecord{
{ID: "bd-001", Timestamp: now.AddDate(0, 0, -30), Actor: "user1"},
{ID: "bd-002", Timestamp: now.AddDate(0, 0, -60), Actor: "user2"},
}
// Write records
for _, r := range records {
if err := AppendDeletion(path, r); err != nil {
t.Fatalf("AppendDeletion failed: %v", err)
}
}
// Prune with 7 day retention - all should be pruned
result, err := PruneDeletions(path, 7)
if err != nil {
t.Fatalf("PruneDeletions failed: %v", err)
}
if result.KeptCount != 0 {
t.Errorf("expected 0 kept, got %d", result.KeptCount)
}
if result.PrunedCount != 2 {
t.Errorf("expected 2 pruned, got %d", result.PrunedCount)
}
// Verify file is empty
loaded, err := LoadDeletions(path)
if err != nil {
t.Fatalf("LoadDeletions failed: %v", err)
}
if len(loaded.Records) != 0 {
t.Errorf("expected 0 records after prune, got %d", len(loaded.Records))
}
}
func TestPruneDeletions_NearBoundary(t *testing.T) {
tmpDir := t.TempDir()
path := filepath.Join(tmpDir, "deletions.jsonl")
now := time.Now()
// Record just inside retention should be kept (6 days 23 hours)
// Record just outside retention should be pruned (7 days 1 hour)
records := []DeletionRecord{
{ID: "bd-001", Timestamp: now.AddDate(0, 0, -6).Add(-23 * time.Hour), Actor: "user1"}, // ~6.96 days (kept)
{ID: "bd-002", Timestamp: now.AddDate(0, 0, -7).Add(-1 * time.Hour), Actor: "user2"}, // ~7.04 days (pruned)
}
for _, r := range records {
if err := AppendDeletion(path, r); err != nil {
t.Fatalf("AppendDeletion failed: %v", err)
}
}
result, err := PruneDeletions(path, 7)
if err != nil {
t.Fatalf("PruneDeletions failed: %v", err)
}
if result.KeptCount != 1 {
t.Errorf("expected 1 kept (inside boundary), got %d", result.KeptCount)
}
if result.PrunedCount != 1 {
t.Errorf("expected 1 pruned (outside boundary), got %d", result.PrunedCount)
}
}
func TestPruneDeletions_ZeroRetention(t *testing.T) {
tmpDir := t.TempDir()
path := filepath.Join(tmpDir, "deletions.jsonl")
now := time.Now()
records := []DeletionRecord{
{ID: "bd-001", Timestamp: now.Add(1 * time.Hour), Actor: "user1"}, // 1 hour in future (kept)
{ID: "bd-002", Timestamp: now.Add(-1 * time.Hour), Actor: "user2"}, // 1 hour ago (pruned with 0 retention)
}
for _, r := range records {
if err := AppendDeletion(path, r); err != nil {
t.Fatalf("AppendDeletion failed: %v", err)
}
}
// With 0 retention, cutoff is now - past records should be pruned
result, err := PruneDeletions(path, 0)
if err != nil {
t.Fatalf("PruneDeletions failed: %v", err)
}
// Future record should be kept, past record should be pruned
if result.KeptCount != 1 {
t.Errorf("expected 1 kept with 0 retention, got %d", result.KeptCount)
}
if result.PrunedCount != 1 {
t.Errorf("expected 1 pruned with 0 retention, got %d", result.PrunedCount)
}
}

View File

@@ -1,12 +1,15 @@
package importer
import (
"bytes"
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"time"
"github.com/steveyegge/beads/internal/deletions"
"github.com/steveyegge/beads/internal/storage"
@@ -753,6 +756,7 @@ func importComments(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, issu
// purgeDeletedIssues removes issues from the DB that are in the deletions manifest
// but not in the incoming JSONL. This enables deletion propagation across clones.
// Also uses git history fallback for deletions that were pruned from the manifest.
func purgeDeletedIssues(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, dbPath string, jsonlIssues []*types.Issue, result *Result) error {
// Get deletions manifest path (same directory as database)
beadsDir := filepath.Dir(dbPath)
@@ -769,11 +773,6 @@ func purgeDeletedIssues(ctx context.Context, sqliteStore *sqlite.SQLiteStorage,
fmt.Fprintf(os.Stderr, "Warning: %s\n", warning)
}
// If no deletions, nothing to do
if len(loadResult.Records) == 0 {
return nil
}
// Build set of IDs in the incoming JSONL for O(1) lookup
jsonlIDs := make(map[string]bool, len(jsonlIssues))
for _, issue := range jsonlIssues {
@@ -786,6 +785,9 @@ func purgeDeletedIssues(ctx context.Context, sqliteStore *sqlite.SQLiteStorage,
return fmt.Errorf("failed to get DB issues: %w", err)
}
// Collect IDs that need git history check (not in JSONL, not in manifest)
var needGitCheck []string
// Find DB issues that:
// 1. Are NOT in the JSONL (not synced from remote)
// 2. ARE in the deletions manifest (were deleted elsewhere)
@@ -811,13 +813,155 @@ func purgeDeletedIssues(ctx context.Context, sqliteStore *sqlite.SQLiteStorage,
result.Purged++
result.PurgedIDs = append(result.PurgedIDs, dbIssue.ID)
} else {
// Not in JSONL and not in deletions manifest
// This could be:
// 1. Local work (new issue not yet exported)
// 2. Deletion was pruned from manifest (check git history)
needGitCheck = append(needGitCheck, dbIssue.ID)
}
}
// Git history fallback for potential pruned deletions
if len(needGitCheck) > 0 {
deletedViaGit := checkGitHistoryForDeletions(beadsDir, needGitCheck)
for _, id := range deletedViaGit {
// Backfill the deletions manifest (self-healing)
backfillRecord := deletions.DeletionRecord{
ID: id,
Timestamp: time.Now().UTC(),
Actor: "git-history-backfill",
Reason: "recovered from git history (pruned from manifest)",
}
if err := deletions.AppendDeletion(deletionsPath, backfillRecord); err != nil {
fmt.Fprintf(os.Stderr, "Warning: failed to backfill deletion record for %s: %v\n", id, err)
}
// Delete from DB
if err := sqliteStore.DeleteIssue(ctx, id); err != nil {
fmt.Fprintf(os.Stderr, "Warning: failed to purge %s (git-recovered): %v\n", id, err)
continue
}
fmt.Fprintf(os.Stderr, "Purged %s (recovered from git history, pruned from manifest)\n", id)
result.Purged++
result.PurgedIDs = append(result.PurgedIDs, id)
}
// If not in JSONL and not in deletions manifest, keep it (local work)
}
return nil
}
// checkGitHistoryForDeletions checks if IDs were ever in the JSONL history.
// Returns the IDs that were found in git history (meaning they were deleted,
// and the deletion record was pruned from the manifest).
//
// Uses batched git log search for efficiency when checking multiple IDs.
func checkGitHistoryForDeletions(beadsDir string, ids []string) []string {
if len(ids) == 0 {
return nil
}
// Get the repo root directory (parent of .beads)
repoRoot := filepath.Dir(beadsDir)
// Build JSONL path relative to repo root
jsonlPath := filepath.Join(".beads", "beads.jsonl")
var deleted []string
// For efficiency, batch IDs into a single git command when possible
// We use git log with -S to search for string additions/removals
if len(ids) <= 10 {
// Small batch: check each ID individually for accuracy
for _, id := range ids {
if wasInGitHistory(repoRoot, jsonlPath, id) {
deleted = append(deleted, id)
}
}
} else {
// Large batch: use grep pattern for efficiency
// This may have some false positives, but is much faster
deleted = batchCheckGitHistory(repoRoot, jsonlPath, ids)
}
return deleted
}
// wasInGitHistory checks if a single ID was ever in the JSONL via git history.
// Returns true if the ID was found in history (meaning it was deleted).
func wasInGitHistory(repoRoot, jsonlPath, id string) bool {
// git log --all -S "\"id\":\"bd-xxx\"" --oneline -- .beads/beads.jsonl
// This searches for commits that added or removed the ID string
searchPattern := fmt.Sprintf(`"id":"%s"`, id)
// #nosec G204 - searchPattern is constructed from validated issue IDs
cmd := exec.Command("git", "log", "--all", "-S", searchPattern, "--oneline", "--", jsonlPath)
cmd.Dir = repoRoot
var stdout bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = nil // Ignore stderr
if err := cmd.Run(); err != nil {
// Git command failed - could be shallow clone, not a git repo, etc.
// Conservative: assume issue is local work, don't delete
return false
}
// If output is non-empty, the ID was in git history
// This means it was added and then removed (deleted)
return len(bytes.TrimSpace(stdout.Bytes())) > 0
}
// batchCheckGitHistory checks multiple IDs at once using git log with pattern matching.
// Returns the IDs that were found in git history.
func batchCheckGitHistory(repoRoot, jsonlPath string, ids []string) []string {
// Build a regex pattern to match any of the IDs
// Pattern: "id":"bd-xxx"|"id":"bd-yyy"|...
patterns := make([]string, 0, len(ids))
for _, id := range ids {
patterns = append(patterns, fmt.Sprintf(`"id":"%s"`, id))
}
searchPattern := strings.Join(patterns, "|")
// Use git log -G (regex) for batch search
// #nosec G204 - searchPattern is constructed from validated issue IDs
cmd := exec.Command("git", "log", "--all", "-G", searchPattern, "-p", "--", jsonlPath)
cmd.Dir = repoRoot
var stdout bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = nil // Ignore stderr
if err := cmd.Run(); err != nil {
// Git command failed - fall back to individual checks
var deleted []string
for _, id := range ids {
if wasInGitHistory(repoRoot, jsonlPath, id) {
deleted = append(deleted, id)
}
}
return deleted
}
output := stdout.String()
if output == "" {
return nil
}
// Parse output to find which IDs were actually in history
var deleted []string
for _, id := range ids {
searchStr := fmt.Sprintf(`"id":"%s"`, id)
if strings.Contains(output, searchStr) {
deleted = append(deleted, id)
}
}
return deleted
}
// Helper functions
func GetPrefixList(prefixes map[string]int) []string {

View File

@@ -1067,3 +1067,43 @@ func TestConcurrentExternalRefImports(t *testing.T) {
}
})
}
func TestCheckGitHistoryForDeletions_EmptyList(t *testing.T) {
// Empty list should return nil
result := checkGitHistoryForDeletions("/tmp/test", nil)
if result != nil {
t.Errorf("Expected nil for empty list, got %v", result)
}
result = checkGitHistoryForDeletions("/tmp/test", []string{})
if result != nil {
t.Errorf("Expected nil for empty slice, got %v", result)
}
}
func TestCheckGitHistoryForDeletions_NonGitDir(t *testing.T) {
// Non-git directory should return empty (conservative behavior)
tmpDir := t.TempDir()
result := checkGitHistoryForDeletions(tmpDir, []string{"bd-test"})
if len(result) != 0 {
t.Errorf("Expected empty result for non-git dir, got %v", result)
}
}
func TestWasInGitHistory_NonGitDir(t *testing.T) {
// Non-git directory should return false (conservative behavior)
tmpDir := t.TempDir()
result := wasInGitHistory(tmpDir, ".beads/beads.jsonl", "bd-test")
if result {
t.Error("Expected false for non-git dir")
}
}
func TestBatchCheckGitHistory_NonGitDir(t *testing.T) {
// Non-git directory should return empty (falls back to individual checks)
tmpDir := t.TempDir()
result := batchCheckGitHistory(tmpDir, ".beads/beads.jsonl", []string{"bd-test1", "bd-test2"})
if len(result) != 0 {
t.Errorf("Expected empty result for non-git dir, got %v", result)
}
}