Add compaction schema and candidate identification

- Added compaction columns to issues table (compaction_level, compacted_at, original_size)
- Created issue_snapshots table for snapshot storage before compaction
- Added compaction configuration with opt-in flag (compaction_enabled=false by default)
- Implemented GetTier1Candidates and GetTier2Candidates queries
- Added CheckEligibility validation function
- Comprehensive tests for all compaction queries
- Idempotent migrations for existing databases

Closes bd-252, bd-253, bd-254

Amp-Thread-ID: https://ampcode.com/threads/T-c4d7acd1-c161-4b80-9d80-a0691e8fa87b
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-15 22:26:11 -07:00
parent eaf08106c1
commit 1c5a4a9c70
5 changed files with 755 additions and 17 deletions

View File

@@ -0,0 +1,277 @@
package sqlite
import (
"context"
"database/sql"
"fmt"
"time"
)
// CompactionCandidate represents an issue eligible for compaction
type CompactionCandidate struct {
IssueID string
ClosedAt time.Time
OriginalSize int
EstimatedSize int
DependentCount int
}
// GetTier1Candidates returns issues eligible for Tier 1 compaction.
// Criteria:
// - Status = closed
// - Closed for at least compact_tier1_days
// - No open dependents within compact_tier1_dep_levels depth
// - Not already compacted (compaction_level = 0)
func (s *SQLiteStorage) GetTier1Candidates(ctx context.Context) ([]*CompactionCandidate, error) {
// Get configuration
daysStr, err := s.GetConfig(ctx, "compact_tier1_days")
if err != nil {
return nil, fmt.Errorf("failed to get compact_tier1_days: %w", err)
}
if daysStr == "" {
daysStr = "30"
}
depthStr, err := s.GetConfig(ctx, "compact_tier1_dep_levels")
if err != nil {
return nil, fmt.Errorf("failed to get compact_tier1_dep_levels: %w", err)
}
if depthStr == "" {
depthStr = "2"
}
query := `
WITH RECURSIVE
-- Find all issues that depend on (are blocked by) other issues
dependent_tree AS (
-- Base case: direct dependents
SELECT
d.depends_on_id as issue_id,
i.id as dependent_id,
i.status as dependent_status,
0 as depth
FROM dependencies d
JOIN issues i ON d.issue_id = i.id
WHERE d.type = 'blocks'
UNION ALL
-- Recursive case: parent-child relationships
SELECT
dt.issue_id,
i.id as dependent_id,
i.status as dependent_status,
dt.depth + 1
FROM dependent_tree dt
JOIN dependencies d ON d.depends_on_id = dt.dependent_id
JOIN issues i ON d.issue_id = i.id
WHERE d.type = 'parent-child'
AND dt.depth < ?
)
SELECT
i.id,
i.closed_at,
COALESCE(i.original_size, LENGTH(i.description) + LENGTH(i.design) + LENGTH(i.notes) + LENGTH(i.acceptance_criteria)) as original_size,
0 as estimated_size,
COUNT(DISTINCT dt.dependent_id) as dependent_count
FROM issues i
LEFT JOIN dependent_tree dt ON i.id = dt.issue_id
AND dt.dependent_status IN ('open', 'in_progress', 'blocked')
AND dt.depth <= ?
WHERE i.status = 'closed'
AND i.closed_at IS NOT NULL
AND i.closed_at <= datetime('now', '-' || CAST(? AS INTEGER) || ' days')
AND COALESCE(i.compaction_level, 0) = 0
AND dt.dependent_id IS NULL -- No open dependents
GROUP BY i.id
ORDER BY i.closed_at ASC
`
rows, err := s.db.QueryContext(ctx, query, depthStr, depthStr, daysStr)
if err != nil {
return nil, fmt.Errorf("failed to query tier1 candidates: %w", err)
}
defer rows.Close()
var candidates []*CompactionCandidate
for rows.Next() {
var c CompactionCandidate
if err := rows.Scan(&c.IssueID, &c.ClosedAt, &c.OriginalSize, &c.EstimatedSize, &c.DependentCount); err != nil {
return nil, fmt.Errorf("failed to scan candidate: %w", err)
}
candidates = append(candidates, &c)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("rows iteration error: %w", err)
}
return candidates, nil
}
// GetTier2Candidates returns issues eligible for Tier 2 compaction.
// Criteria:
// - Status = closed
// - Closed for at least compact_tier2_days
// - No open dependents within compact_tier2_dep_levels depth
// - Already at compaction_level = 1
// - Either has many commits (compact_tier2_commits) or many dependent issues
func (s *SQLiteStorage) GetTier2Candidates(ctx context.Context) ([]*CompactionCandidate, error) {
// Get configuration
daysStr, err := s.GetConfig(ctx, "compact_tier2_days")
if err != nil {
return nil, fmt.Errorf("failed to get compact_tier2_days: %w", err)
}
if daysStr == "" {
daysStr = "90"
}
depthStr, err := s.GetConfig(ctx, "compact_tier2_dep_levels")
if err != nil {
return nil, fmt.Errorf("failed to get compact_tier2_dep_levels: %w", err)
}
if depthStr == "" {
depthStr = "5"
}
commitsStr, err := s.GetConfig(ctx, "compact_tier2_commits")
if err != nil {
return nil, fmt.Errorf("failed to get compact_tier2_commits: %w", err)
}
if commitsStr == "" {
commitsStr = "100"
}
query := `
WITH event_counts AS (
SELECT issue_id, COUNT(*) as event_count
FROM events
GROUP BY issue_id
)
SELECT
i.id,
i.closed_at,
i.original_size,
0 as estimated_size,
COALESCE(ec.event_count, 0) as dependent_count
FROM issues i
LEFT JOIN event_counts ec ON i.id = ec.issue_id
WHERE i.status = 'closed'
AND i.closed_at IS NOT NULL
AND i.closed_at <= datetime('now', '-' || CAST(? AS INTEGER) || ' days')
AND i.compaction_level = 1
AND COALESCE(ec.event_count, 0) >= CAST(? AS INTEGER)
AND NOT EXISTS (
-- Check for open dependents
SELECT 1 FROM dependencies d
JOIN issues dep ON d.issue_id = dep.id
WHERE d.depends_on_id = i.id
AND d.type = 'blocks'
AND dep.status IN ('open', 'in_progress', 'blocked')
)
ORDER BY i.closed_at ASC
`
rows, err := s.db.QueryContext(ctx, query, daysStr, commitsStr)
if err != nil {
return nil, fmt.Errorf("failed to query tier2 candidates: %w", err)
}
defer rows.Close()
var candidates []*CompactionCandidate
for rows.Next() {
var c CompactionCandidate
if err := rows.Scan(&c.IssueID, &c.ClosedAt, &c.OriginalSize, &c.EstimatedSize, &c.DependentCount); err != nil {
return nil, fmt.Errorf("failed to scan candidate: %w", err)
}
candidates = append(candidates, &c)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("rows iteration error: %w", err)
}
return candidates, nil
}
// CheckEligibility checks if a specific issue is eligible for compaction at the given tier.
// Returns (eligible, reason, error).
// If not eligible, reason explains why.
func (s *SQLiteStorage) CheckEligibility(ctx context.Context, issueID string, tier int) (bool, string, error) {
// Get the issue
var status string
var closedAt sql.NullTime
var compactionLevel int
err := s.db.QueryRowContext(ctx, `
SELECT status, closed_at, COALESCE(compaction_level, 0)
FROM issues
WHERE id = ?
`, issueID).Scan(&status, &closedAt, &compactionLevel)
if err == sql.ErrNoRows {
return false, "issue not found", nil
}
if err != nil {
return false, "", fmt.Errorf("failed to get issue: %w", err)
}
// Check basic requirements
if status != "closed" {
return false, "issue is not closed", nil
}
if !closedAt.Valid {
return false, "issue has no closed_at timestamp", nil
}
if tier == 1 {
if compactionLevel != 0 {
return false, "issue is already compacted", nil
}
// Check if closed long enough
daysStr, err := s.GetConfig(ctx, "compact_tier1_days")
if err != nil {
return false, "", fmt.Errorf("failed to get compact_tier1_days: %w", err)
}
if daysStr == "" {
daysStr = "30"
}
// Check if it appears in tier1 candidates
candidates, err := s.GetTier1Candidates(ctx)
if err != nil {
return false, "", fmt.Errorf("failed to get tier1 candidates: %w", err)
}
for _, c := range candidates {
if c.IssueID == issueID {
return true, "", nil
}
}
return false, "issue has open dependents or not closed long enough", nil
} else if tier == 2 {
if compactionLevel != 1 {
return false, "issue must be at compaction level 1 for tier 2", nil
}
// Check if it appears in tier2 candidates
candidates, err := s.GetTier2Candidates(ctx)
if err != nil {
return false, "", fmt.Errorf("failed to get tier2 candidates: %w", err)
}
for _, c := range candidates {
if c.IssueID == issueID {
return true, "", nil
}
}
return false, "issue has open dependents, not closed long enough, or insufficient events", nil
}
return false, fmt.Sprintf("invalid tier: %d", tier), nil
}

View File

@@ -0,0 +1,318 @@
package sqlite
import (
"context"
"testing"
"time"
"github.com/steveyegge/beads/internal/types"
)
func TestGetTier1Candidates(t *testing.T) {
store, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
// Create test issues
// Old closed issue (eligible)
issue1 := &types.Issue{
ID: "bd-1",
Title: "Old closed issue",
Description: "This is a test description",
Status: "closed",
Priority: 2,
IssueType: "task",
ClosedAt: timePtr(time.Now().Add(-40 * 24 * time.Hour)),
}
if err := store.CreateIssue(ctx, issue1, "test"); err != nil {
t.Fatalf("Failed to create issue1: %v", err)
}
// Recently closed issue (not eligible - too recent)
issue2 := &types.Issue{
ID: "bd-2",
Title: "Recent closed issue",
Description: "Recent",
Status: "closed",
Priority: 2,
IssueType: "task",
ClosedAt: timePtr(time.Now().Add(-10 * 24 * time.Hour)),
}
if err := store.CreateIssue(ctx, issue2, "test"); err != nil {
t.Fatalf("Failed to create issue2: %v", err)
}
// Open issue (not eligible)
issue3 := &types.Issue{
ID: "bd-3",
Title: "Open issue",
Description: "Open",
Status: "open",
Priority: 2,
IssueType: "task",
}
if err := store.CreateIssue(ctx, issue3, "test"); err != nil {
t.Fatalf("Failed to create issue3: %v", err)
}
// Old closed issue with open dependent (not eligible)
issue4 := &types.Issue{
ID: "bd-4",
Title: "Has open dependent",
Description: "Blocked by open issue",
Status: "closed",
Priority: 2,
IssueType: "task",
ClosedAt: timePtr(time.Now().Add(-40 * 24 * time.Hour)),
}
if err := store.CreateIssue(ctx, issue4, "test"); err != nil {
t.Fatalf("Failed to create issue4: %v", err)
}
// Create blocking dependency
dep := &types.Dependency{
IssueID: "bd-3",
DependsOnID: "bd-4",
Type: "blocks",
}
if err := store.AddDependency(ctx, dep, "test"); err != nil {
t.Fatalf("Failed to add dependency: %v", err)
}
// Get candidates
candidates, err := store.GetTier1Candidates(ctx)
if err != nil {
t.Fatalf("GetTier1Candidates failed: %v", err)
}
// Should only return bd-1 (old and no open dependents)
if len(candidates) != 1 {
t.Errorf("Expected 1 candidate, got %d", len(candidates))
}
if len(candidates) > 0 && candidates[0].IssueID != "bd-1" {
t.Errorf("Expected candidate bd-1, got %s", candidates[0].IssueID)
}
}
func TestGetTier2Candidates(t *testing.T) {
store, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
// Create old tier1 compacted issue with many events
issue1 := &types.Issue{
ID: "bd-1",
Title: "Tier1 compacted with events",
Description: "Summary",
Status: "closed",
Priority: 2,
IssueType: "task",
ClosedAt: timePtr(time.Now().Add(-100 * 24 * time.Hour)),
}
if err := store.CreateIssue(ctx, issue1, "test"); err != nil {
t.Fatalf("Failed to create issue1: %v", err)
}
// Set compaction level to 1
_, err := store.db.ExecContext(ctx, `
UPDATE issues
SET compaction_level = 1,
compacted_at = datetime('now', '-95 days'),
original_size = 1000
WHERE id = ?
`, "bd-1")
if err != nil {
t.Fatalf("Failed to set compaction level: %v", err)
}
// Add many events (simulate high activity)
for i := 0; i < 120; i++ {
if err := store.AddComment(ctx, "bd-1", "test", "comment"); err != nil {
t.Fatalf("Failed to add event: %v", err)
}
}
// Get tier2 candidates
candidates, err := store.GetTier2Candidates(ctx)
if err != nil {
t.Fatalf("GetTier2Candidates failed: %v", err)
}
// Should return bd-1
if len(candidates) != 1 {
t.Errorf("Expected 1 candidate, got %d", len(candidates))
}
if len(candidates) > 0 && candidates[0].IssueID != "bd-1" {
t.Errorf("Expected candidate bd-1, got %s", candidates[0].IssueID)
}
}
func TestCheckEligibilityTier1(t *testing.T) {
store, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
// Create eligible issue
issue1 := &types.Issue{
ID: "bd-1",
Title: "Eligible",
Description: "Test",
Status: "closed",
Priority: 2,
IssueType: "task",
ClosedAt: timePtr(time.Now().Add(-40 * 24 * time.Hour)),
}
if err := store.CreateIssue(ctx, issue1, "test"); err != nil {
t.Fatalf("Failed to create issue: %v", err)
}
eligible, reason, err := store.CheckEligibility(ctx, "bd-1", 1)
if err != nil {
t.Fatalf("CheckEligibility failed: %v", err)
}
if !eligible {
t.Errorf("Expected eligible, got not eligible: %s", reason)
}
}
func TestCheckEligibilityOpenIssue(t *testing.T) {
store, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
issue := &types.Issue{
ID: "bd-1",
Title: "Open",
Description: "Test",
Status: "open",
Priority: 2,
IssueType: "task",
}
if err := store.CreateIssue(ctx, issue, "test"); err != nil {
t.Fatalf("Failed to create issue: %v", err)
}
eligible, reason, err := store.CheckEligibility(ctx, "bd-1", 1)
if err != nil {
t.Fatalf("CheckEligibility failed: %v", err)
}
if eligible {
t.Error("Expected not eligible for open issue")
}
if reason != "issue is not closed" {
t.Errorf("Expected 'issue is not closed', got '%s'", reason)
}
}
func TestCheckEligibilityAlreadyCompacted(t *testing.T) {
store, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
issue := &types.Issue{
ID: "bd-1",
Title: "Already compacted",
Description: "Test",
Status: "closed",
Priority: 2,
IssueType: "task",
ClosedAt: timePtr(time.Now().Add(-40 * 24 * time.Hour)),
}
if err := store.CreateIssue(ctx, issue, "test"); err != nil {
t.Fatalf("Failed to create issue: %v", err)
}
// Mark as compacted
_, err := store.db.ExecContext(ctx, `
UPDATE issues SET compaction_level = 1 WHERE id = ?
`, "bd-1")
if err != nil {
t.Fatalf("Failed to set compaction level: %v", err)
}
eligible, reason, err := store.CheckEligibility(ctx, "bd-1", 1)
if err != nil {
t.Fatalf("CheckEligibility failed: %v", err)
}
if eligible {
t.Error("Expected not eligible for already compacted issue")
}
if reason != "issue is already compacted" {
t.Errorf("Expected 'issue is already compacted', got '%s'", reason)
}
}
func TestTier1NoCircularDeps(t *testing.T) {
store, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
// Create three closed issues with circular dependency
issue1 := &types.Issue{
ID: "bd-1",
Title: "Issue 1",
Description: "Test",
Status: "closed",
Priority: 2,
IssueType: "task",
ClosedAt: timePtr(time.Now().Add(-40 * 24 * time.Hour)),
}
issue2 := &types.Issue{
ID: "bd-2",
Title: "Issue 2",
Description: "Test",
Status: "closed",
Priority: 2,
IssueType: "task",
ClosedAt: timePtr(time.Now().Add(-40 * 24 * time.Hour)),
}
issue3 := &types.Issue{
ID: "bd-3",
Title: "Issue 3",
Description: "Test",
Status: "closed",
Priority: 2,
IssueType: "task",
ClosedAt: timePtr(time.Now().Add(-40 * 24 * time.Hour)),
}
for _, issue := range []*types.Issue{issue1, issue2, issue3} {
if err := store.CreateIssue(ctx, issue, "test"); err != nil {
t.Fatalf("Failed to create issue: %v", err)
}
}
// Create circular dependency: 1->2->3->1
// Note: the AddDependency validation should prevent this, but let's test the query handles it
_, err := store.db.ExecContext(ctx, `
INSERT INTO dependencies (issue_id, depends_on_id, type, created_by) VALUES
('bd-1', 'bd-2', 'blocks', 'test'),
('bd-2', 'bd-3', 'blocks', 'test'),
('bd-3', 'bd-1', 'blocks', 'test')
`)
if err != nil {
t.Fatalf("Failed to create dependencies: %v", err)
}
// Should not crash and should return all three as they're all closed
candidates, err := store.GetTier1Candidates(ctx)
if err != nil {
t.Fatalf("GetTier1Candidates failed with circular deps: %v", err)
}
// All should be eligible since all are closed
if len(candidates) != 3 {
t.Errorf("Expected 3 candidates, got %d", len(candidates))
}
}
func timePtr(t time.Time) *time.Time {
return &t
}

View File

@@ -18,6 +18,9 @@ CREATE TABLE IF NOT EXISTS issues (
updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
closed_at DATETIME,
external_ref TEXT,
compaction_level INTEGER DEFAULT 0,
compacted_at DATETIME,
original_size INTEGER,
CHECK ((status = 'closed') = (closed_at IS NOT NULL))
);
@@ -74,6 +77,19 @@ CREATE TABLE IF NOT EXISTS config (
value TEXT NOT NULL
);
-- Default compaction configuration
INSERT OR IGNORE INTO config (key, value) VALUES
('compaction_enabled', 'false'),
('compact_tier1_days', '30'),
('compact_tier1_dep_levels', '2'),
('compact_tier2_days', '90'),
('compact_tier2_dep_levels', '5'),
('compact_tier2_commits', '100'),
('compact_model', 'claude-3-5-haiku-20241022'),
('compact_batch_size', '50'),
('compact_parallel_workers', '5'),
('auto_compact_enabled', 'false');
-- Metadata table (for storing internal state like import hashes)
CREATE TABLE IF NOT EXISTS metadata (
key TEXT PRIMARY KEY,
@@ -96,6 +112,22 @@ CREATE TABLE IF NOT EXISTS issue_counters (
last_id INTEGER NOT NULL DEFAULT 0
);
-- Issue snapshots table (for compaction)
CREATE TABLE IF NOT EXISTS issue_snapshots (
id INTEGER PRIMARY KEY AUTOINCREMENT,
issue_id TEXT NOT NULL,
snapshot_time DATETIME NOT NULL,
compaction_level INTEGER NOT NULL,
original_size INTEGER NOT NULL,
compressed_size INTEGER NOT NULL,
original_content TEXT NOT NULL,
archived_events TEXT,
FOREIGN KEY (issue_id) REFERENCES issues(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_snapshots_issue ON issue_snapshots(issue_id);
CREATE INDEX IF NOT EXISTS idx_snapshots_level ON issue_snapshots(compaction_level);
-- Ready work view (with hierarchical blocking)
-- Uses recursive CTE to propagate blocking through parent-child hierarchy
CREATE VIEW IF NOT EXISTS ready_issues AS

View File

@@ -72,6 +72,21 @@ func New(path string) (*SQLiteStorage, error) {
return nil, fmt.Errorf("failed to migrate closed_at constraint: %w", err)
}
// Migrate existing databases to add compaction columns
if err := migrateCompactionColumns(db); err != nil {
return nil, fmt.Errorf("failed to migrate compaction columns: %w", err)
}
// Migrate existing databases to add issue_snapshots table
if err := migrateSnapshotsTable(db); err != nil {
return nil, fmt.Errorf("failed to migrate snapshots table: %w", err)
}
// Migrate existing databases to add compaction config defaults
if err := migrateCompactionConfig(db); err != nil {
return nil, fmt.Errorf("failed to migrate compaction config: %w", err)
}
return &SQLiteStorage{
db: db,
}, nil
@@ -290,6 +305,102 @@ func migrateClosedAtConstraint(db *sql.DB) error {
return nil
}
// migrateCompactionColumns adds compaction_level, compacted_at, and original_size columns to the issues table.
// This migration is idempotent and safe to run multiple times.
func migrateCompactionColumns(db *sql.DB) error {
// Check if compaction_level column exists
var columnExists bool
err := db.QueryRow(`
SELECT COUNT(*) > 0
FROM pragma_table_info('issues')
WHERE name = 'compaction_level'
`).Scan(&columnExists)
if err != nil {
return fmt.Errorf("failed to check compaction_level column: %w", err)
}
if columnExists {
// Columns already exist, nothing to do
return nil
}
// Add the three compaction columns
_, err = db.Exec(`
ALTER TABLE issues ADD COLUMN compaction_level INTEGER DEFAULT 0;
ALTER TABLE issues ADD COLUMN compacted_at DATETIME;
ALTER TABLE issues ADD COLUMN original_size INTEGER;
`)
if err != nil {
return fmt.Errorf("failed to add compaction columns: %w", err)
}
return nil
}
// migrateSnapshotsTable creates the issue_snapshots table if it doesn't exist.
// This migration is idempotent and safe to run multiple times.
func migrateSnapshotsTable(db *sql.DB) error {
// Check if issue_snapshots table exists
var tableExists bool
err := db.QueryRow(`
SELECT COUNT(*) > 0
FROM sqlite_master
WHERE type='table' AND name='issue_snapshots'
`).Scan(&tableExists)
if err != nil {
return fmt.Errorf("failed to check issue_snapshots table: %w", err)
}
if tableExists {
// Table already exists, nothing to do
return nil
}
// Create the table and indexes
_, err = db.Exec(`
CREATE TABLE issue_snapshots (
id INTEGER PRIMARY KEY AUTOINCREMENT,
issue_id TEXT NOT NULL,
snapshot_time DATETIME NOT NULL,
compaction_level INTEGER NOT NULL,
original_size INTEGER NOT NULL,
compressed_size INTEGER NOT NULL,
original_content TEXT NOT NULL,
archived_events TEXT,
FOREIGN KEY (issue_id) REFERENCES issues(id) ON DELETE CASCADE
);
CREATE INDEX idx_snapshots_issue ON issue_snapshots(issue_id);
CREATE INDEX idx_snapshots_level ON issue_snapshots(compaction_level);
`)
if err != nil {
return fmt.Errorf("failed to create issue_snapshots table: %w", err)
}
return nil
}
// migrateCompactionConfig adds default compaction configuration values.
// This migration is idempotent and safe to run multiple times (INSERT OR IGNORE).
func migrateCompactionConfig(db *sql.DB) error {
_, err := db.Exec(`
INSERT OR IGNORE INTO config (key, value) VALUES
('compaction_enabled', 'false'),
('compact_tier1_days', '30'),
('compact_tier1_dep_levels', '2'),
('compact_tier2_days', '90'),
('compact_tier2_dep_levels', '5'),
('compact_tier2_commits', '100'),
('compact_model', 'claude-3-5-haiku-20241022'),
('compact_batch_size', '50'),
('compact_parallel_workers', '5'),
('auto_compact_enabled', 'false')
`)
if err != nil {
return fmt.Errorf("failed to add compaction config defaults: %w", err)
}
return nil
}
// getNextIDForPrefix atomically generates the next ID for a given prefix
// Uses the issue_counters table for atomic, cross-process ID generation
func (s *SQLiteStorage) getNextIDForPrefix(ctx context.Context, prefix string) (int, error) {