Files
beads/internal/storage/sqlite/dirty.go
Steve Yegge a898df6915 WIP: bd-164 timestamp-only export deduplication (~80% complete)
Implemented content hash-based deduplication to skip exports when only
timestamps changed. Core logic complete, needs export_hashes table wiring.

Completed:
- Added computeIssueContentHash() excluding timestamps
- Created shouldSkipExport() logic
- Updated export loop to skip timestamp-only changes
- Added hash.go with content hashing
- Extended Storage interface

Remaining:
- Complete export_hashes table migration
- Add SetExportHash/GetExportHash to interface
- Revert content_hash from dirty_issues approach
- Wire up hash persistence in export
- Testing

See bd-164 notes for details.

Amp-Thread-ID: https://ampcode.com/threads/T-d70657d1-4433-4f7e-b10a-3fccf8bf17fb
Co-authored-by: Amp <amp@ampcode.com>
2025-10-26 20:29:10 -07:00

209 lines
6.2 KiB
Go

// Package sqlite implements dirty issue tracking for incremental JSONL export.
package sqlite
import (
"context"
"database/sql"
"fmt"
"time"
)
// MarkIssueDirty marks an issue as dirty (needs to be exported to JSONL)
// This should be called whenever an issue is created, updated, or has dependencies changed
func (s *SQLiteStorage) MarkIssueDirty(ctx context.Context, issueID string) error {
// Fetch the issue to compute its content hash
issue, err := s.GetIssue(ctx, issueID)
if err != nil {
return err
}
hash, err := computeIssueContentHash(issue)
if err != nil {
return err
}
_, err = s.db.ExecContext(ctx, `
INSERT INTO dirty_issues (issue_id, marked_at, content_hash)
VALUES (?, ?, ?)
ON CONFLICT (issue_id) DO UPDATE SET marked_at = excluded.marked_at, content_hash = excluded.content_hash
`, issueID, time.Now(), hash)
return err
}
// MarkIssuesDirty marks multiple issues as dirty in a single transaction
// More efficient when marking multiple issues (e.g., both sides of a dependency)
func (s *SQLiteStorage) MarkIssuesDirty(ctx context.Context, issueIDs []string) error {
if len(issueIDs) == 0 {
return nil
}
tx, err := s.db.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("failed to begin transaction: %w", err)
}
defer func() { _ = tx.Rollback() }()
now := time.Now()
stmt, err := tx.PrepareContext(ctx, `
INSERT INTO dirty_issues (issue_id, marked_at, content_hash)
VALUES (?, ?, ?)
ON CONFLICT (issue_id) DO UPDATE SET marked_at = excluded.marked_at, content_hash = excluded.content_hash
`)
if err != nil {
return fmt.Errorf("failed to prepare statement: %w", err)
}
defer func() { _ = stmt.Close() }()
for _, issueID := range issueIDs {
// Fetch issue to compute content hash
issue, err := s.GetIssue(ctx, issueID)
if err != nil {
return fmt.Errorf("failed to get issue %s: %w", issueID, err)
}
hash, err := computeIssueContentHash(issue)
if err != nil {
return fmt.Errorf("failed to compute hash for issue %s: %w", issueID, err)
}
if _, err := stmt.ExecContext(ctx, issueID, now, hash); err != nil {
return fmt.Errorf("failed to mark issue %s dirty: %w", issueID, err)
}
}
return tx.Commit()
}
// GetDirtyIssues returns the list of issue IDs that need to be exported
func (s *SQLiteStorage) GetDirtyIssues(ctx context.Context) ([]string, error) {
rows, err := s.db.QueryContext(ctx, `
SELECT issue_id FROM dirty_issues
ORDER BY marked_at ASC
`)
if err != nil {
return nil, fmt.Errorf("failed to get dirty issues: %w", err)
}
defer func() { _ = rows.Close() }()
var issueIDs []string
for rows.Next() {
var issueID string
if err := rows.Scan(&issueID); err != nil {
return nil, fmt.Errorf("failed to scan issue ID: %w", err)
}
issueIDs = append(issueIDs, issueID)
}
return issueIDs, rows.Err()
}
// GetDirtyIssueHash returns the stored content hash for a dirty issue, if it exists
func (s *SQLiteStorage) GetDirtyIssueHash(ctx context.Context, issueID string) (string, error) {
var hash sql.NullString
err := s.db.QueryRowContext(ctx, `
SELECT content_hash FROM dirty_issues WHERE issue_id = ?
`, issueID).Scan(&hash)
if err == sql.ErrNoRows {
return "", nil // Issue not dirty
}
if err != nil {
return "", fmt.Errorf("failed to get dirty issue hash: %w", err)
}
if !hash.Valid {
return "", nil // No hash stored yet
}
return hash.String, nil
}
// ClearDirtyIssues removes all entries from the dirty_issues table
// This should be called after a successful JSONL export
//
// WARNING: This has a race condition (bd-52). Use ClearDirtyIssuesByID instead
// to only clear specific issues that were actually exported.
func (s *SQLiteStorage) ClearDirtyIssues(ctx context.Context) error {
_, err := s.db.ExecContext(ctx, `DELETE FROM dirty_issues`)
if err != nil {
return fmt.Errorf("failed to clear dirty issues: %w", err)
}
return nil
}
// ClearDirtyIssuesByID removes specific issue IDs from the dirty_issues table
// This avoids race conditions by only clearing issues that were actually exported
func (s *SQLiteStorage) ClearDirtyIssuesByID(ctx context.Context, issueIDs []string) error {
if len(issueIDs) == 0 {
return nil
}
tx, err := s.db.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("failed to begin transaction: %w", err)
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.PrepareContext(ctx, `DELETE FROM dirty_issues WHERE issue_id = ?`)
if err != nil {
return fmt.Errorf("failed to prepare statement: %w", err)
}
defer func() { _ = stmt.Close() }()
for _, issueID := range issueIDs {
if _, err := stmt.ExecContext(ctx, issueID); err != nil {
return fmt.Errorf("failed to clear dirty issue %s: %w", issueID, err)
}
}
return tx.Commit()
}
// GetDirtyIssueCount returns the count of dirty issues (for monitoring/debugging)
func (s *SQLiteStorage) GetDirtyIssueCount(ctx context.Context) (int, error) {
var count int
err := s.db.QueryRowContext(ctx, `SELECT COUNT(*) FROM dirty_issues`).Scan(&count)
if err != nil && err != sql.ErrNoRows {
return 0, fmt.Errorf("failed to count dirty issues: %w", err)
}
return count, nil
}
// markIssuesDirtyTx marks multiple issues as dirty within an existing transaction
// This is a helper for operations that need to mark issues dirty as part of a larger transaction
func markIssuesDirtyTx(ctx context.Context, tx *sql.Tx, store *SQLiteStorage, issueIDs []string) error {
if len(issueIDs) == 0 {
return nil
}
now := time.Now()
stmt, err := tx.PrepareContext(ctx, `
INSERT INTO dirty_issues (issue_id, marked_at, content_hash)
VALUES (?, ?, ?)
ON CONFLICT (issue_id) DO UPDATE SET marked_at = excluded.marked_at, content_hash = excluded.content_hash
`)
if err != nil {
return fmt.Errorf("failed to prepare dirty statement: %w", err)
}
defer func() { _ = stmt.Close() }()
for _, issueID := range issueIDs {
// Fetch issue to compute content hash
issue, err := store.GetIssue(ctx, issueID)
if err != nil {
return fmt.Errorf("failed to get issue %s: %w", issueID, err)
}
hash, err := computeIssueContentHash(issue)
if err != nil {
return fmt.Errorf("failed to compute hash for issue %s: %w", issueID, err)
}
if _, err := stmt.ExecContext(ctx, issueID, now, hash); err != nil {
return fmt.Errorf("failed to mark issue %s dirty: %w", issueID, err)
}
}
return nil
}