Complete bd-164: Fix timestamp-only export deduplication

- Add export_hashes table to track last exported content state
- Implement GetExportHash/SetExportHash in storage interface
- Update shouldSkipExport to use export_hashes instead of dirty_issues
- Call SetExportHash after successful export
- Clean up dirty_issues table (remove content_hash column)
- Simplify MarkIssueDirty functions (no longer compute hashes)
- Update markIssuesDirtyTx signature (remove store parameter)

Testing:
- Timestamp-only updates are skipped during export ✓
- Real content changes trigger export ✓
- export_hashes table populated correctly ✓

Fixes bd-159, bd-164
This commit is contained in:
Steve Yegge
2025-10-26 20:35:37 -07:00
parent a898df6915
commit a02729ea57
7 changed files with 109 additions and 56 deletions

View File

@@ -151,7 +151,7 @@ func (s *SQLiteStorage) AddDependency(ctx context.Context, dep *types.Dependency
// Mark both issues as dirty for incremental export
// (dependencies are exported with each issue, so both need updating)
if err := markIssuesDirtyTx(ctx, tx, s, []string{dep.IssueID, dep.DependsOnID}); err != nil {
if err := markIssuesDirtyTx(ctx, tx, []string{dep.IssueID, dep.DependsOnID}); err != nil {
return err
}
@@ -264,7 +264,7 @@ func (s *SQLiteStorage) addDependencyUnchecked(ctx context.Context, dep *types.D
}
// Mark both issues as dirty
if err := markIssuesDirtyTx(ctx, tx, s, []string{dep.IssueID, dep.DependsOnID}); err != nil {
if err := markIssuesDirtyTx(ctx, tx, []string{dep.IssueID, dep.DependsOnID}); err != nil {
return err
}
@@ -305,7 +305,7 @@ func (s *SQLiteStorage) RemoveDependency(ctx context.Context, issueID, dependsOn
}
// Mark both issues as dirty for incremental export
if err := markIssuesDirtyTx(ctx, tx, s, []string{issueID, dependsOnID}); err != nil {
if err := markIssuesDirtyTx(ctx, tx, []string{issueID, dependsOnID}); err != nil {
return err
}
@@ -348,7 +348,7 @@ func (s *SQLiteStorage) removeDependencyIfExists(ctx context.Context, issueID, d
}
// Mark both issues as dirty for incremental export
if err := markIssuesDirtyTx(ctx, tx, s, []string{issueID, dependsOnID}); err != nil {
if err := markIssuesDirtyTx(ctx, tx, []string{issueID, dependsOnID}); err != nil {
return err
}

View File

@@ -11,22 +11,11 @@ import (
// MarkIssueDirty marks an issue as dirty (needs to be exported to JSONL)
// This should be called whenever an issue is created, updated, or has dependencies changed
func (s *SQLiteStorage) MarkIssueDirty(ctx context.Context, issueID string) error {
// Fetch the issue to compute its content hash
issue, err := s.GetIssue(ctx, issueID)
if err != nil {
return err
}
hash, err := computeIssueContentHash(issue)
if err != nil {
return err
}
_, err = s.db.ExecContext(ctx, `
INSERT INTO dirty_issues (issue_id, marked_at, content_hash)
VALUES (?, ?, ?)
ON CONFLICT (issue_id) DO UPDATE SET marked_at = excluded.marked_at, content_hash = excluded.content_hash
`, issueID, time.Now(), hash)
_, err := s.db.ExecContext(ctx, `
INSERT INTO dirty_issues (issue_id, marked_at)
VALUES (?, ?)
ON CONFLICT (issue_id) DO UPDATE SET marked_at = excluded.marked_at
`, issueID, time.Now())
return err
}
@@ -45,9 +34,9 @@ func (s *SQLiteStorage) MarkIssuesDirty(ctx context.Context, issueIDs []string)
now := time.Now()
stmt, err := tx.PrepareContext(ctx, `
INSERT INTO dirty_issues (issue_id, marked_at, content_hash)
VALUES (?, ?, ?)
ON CONFLICT (issue_id) DO UPDATE SET marked_at = excluded.marked_at, content_hash = excluded.content_hash
INSERT INTO dirty_issues (issue_id, marked_at)
VALUES (?, ?)
ON CONFLICT (issue_id) DO UPDATE SET marked_at = excluded.marked_at
`)
if err != nil {
return fmt.Errorf("failed to prepare statement: %w", err)
@@ -55,18 +44,7 @@ func (s *SQLiteStorage) MarkIssuesDirty(ctx context.Context, issueIDs []string)
defer func() { _ = stmt.Close() }()
for _, issueID := range issueIDs {
// Fetch issue to compute content hash
issue, err := s.GetIssue(ctx, issueID)
if err != nil {
return fmt.Errorf("failed to get issue %s: %w", issueID, err)
}
hash, err := computeIssueContentHash(issue)
if err != nil {
return fmt.Errorf("failed to compute hash for issue %s: %w", issueID, err)
}
if _, err := stmt.ExecContext(ctx, issueID, now, hash); err != nil {
if _, err := stmt.ExecContext(ctx, issueID, now); err != nil {
return fmt.Errorf("failed to mark issue %s dirty: %w", issueID, err)
}
}
@@ -171,16 +149,16 @@ func (s *SQLiteStorage) GetDirtyIssueCount(ctx context.Context) (int, error) {
// markIssuesDirtyTx marks multiple issues as dirty within an existing transaction
// This is a helper for operations that need to mark issues dirty as part of a larger transaction
func markIssuesDirtyTx(ctx context.Context, tx *sql.Tx, store *SQLiteStorage, issueIDs []string) error {
func markIssuesDirtyTx(ctx context.Context, tx *sql.Tx, issueIDs []string) error {
if len(issueIDs) == 0 {
return nil
}
now := time.Now()
stmt, err := tx.PrepareContext(ctx, `
INSERT INTO dirty_issues (issue_id, marked_at, content_hash)
VALUES (?, ?, ?)
ON CONFLICT (issue_id) DO UPDATE SET marked_at = excluded.marked_at, content_hash = excluded.content_hash
INSERT INTO dirty_issues (issue_id, marked_at)
VALUES (?, ?)
ON CONFLICT (issue_id) DO UPDATE SET marked_at = excluded.marked_at
`)
if err != nil {
return fmt.Errorf("failed to prepare dirty statement: %w", err)
@@ -188,18 +166,7 @@ func markIssuesDirtyTx(ctx context.Context, tx *sql.Tx, store *SQLiteStorage, is
defer func() { _ = stmt.Close() }()
for _, issueID := range issueIDs {
// Fetch issue to compute content hash
issue, err := store.GetIssue(ctx, issueID)
if err != nil {
return fmt.Errorf("failed to get issue %s: %w", issueID, err)
}
hash, err := computeIssueContentHash(issue)
if err != nil {
return fmt.Errorf("failed to compute hash for issue %s: %w", issueID, err)
}
if _, err := stmt.ExecContext(ctx, issueID, now, hash); err != nil {
if _, err := stmt.ExecContext(ctx, issueID, now); err != nil {
return fmt.Errorf("failed to mark issue %s dirty: %w", issueID, err)
}
}

View File

@@ -1,9 +1,12 @@
package sqlite
import (
"context"
"crypto/sha256"
"database/sql"
"encoding/hex"
"encoding/json"
"fmt"
"time"
"github.com/steveyegge/beads/internal/types"
@@ -33,3 +36,38 @@ func computeIssueContentHash(issue *types.Issue) (string, error) {
hash := sha256.Sum256(data)
return hex.EncodeToString(hash[:]), nil
}
// GetExportHash retrieves the content hash of the last export for an issue.
// Returns empty string if no hash is stored (first export).
func (s *SQLiteStorage) GetExportHash(ctx context.Context, issueID string) (string, error) {
var hash string
err := s.db.QueryRowContext(ctx, `
SELECT content_hash FROM export_hashes WHERE issue_id = ?
`, issueID).Scan(&hash)
if err == sql.ErrNoRows {
return "", nil // No hash stored yet
}
if err != nil {
return "", fmt.Errorf("failed to get export hash for %s: %w", issueID, err)
}
return hash, nil
}
// SetExportHash stores the content hash of an issue after successful export.
func (s *SQLiteStorage) SetExportHash(ctx context.Context, issueID, contentHash string) error {
_, err := s.db.ExecContext(ctx, `
INSERT INTO export_hashes (issue_id, content_hash, exported_at)
VALUES (?, ?, CURRENT_TIMESTAMP)
ON CONFLICT(issue_id) DO UPDATE SET
content_hash = excluded.content_hash,
exported_at = CURRENT_TIMESTAMP
`, issueID, contentHash)
if err != nil {
return fmt.Errorf("failed to set export hash for %s: %w", issueID, err)
}
return nil
}

View File

@@ -116,6 +116,11 @@ func New(path string) (*SQLiteStorage, error) {
return nil, fmt.Errorf("failed to migrate compacted_at_commit column: %w", err)
}
// Migrate existing databases to add export_hashes table (bd-164)
if err := migrateExportHashesTable(db); err != nil {
return nil, fmt.Errorf("failed to migrate export_hashes table: %w", err)
}
// Convert to absolute path for consistency
absPath, err := filepath.Abs(path)
if err != nil {
@@ -144,7 +149,6 @@ func migrateDirtyIssuesTable(db *sql.DB) error {
CREATE TABLE dirty_issues (
issue_id TEXT PRIMARY KEY,
marked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
content_hash TEXT,
FOREIGN KEY (issue_id) REFERENCES issues(id) ON DELETE CASCADE
);
CREATE INDEX idx_dirty_issues_marked_at ON dirty_issues(marked_at);
@@ -481,7 +485,38 @@ func migrateCompactedAtCommitColumn(db *sql.DB) error {
return nil
}
// migrateExportHashesTable ensures the export_hashes table exists for timestamp-only dedup (bd-164)
func migrateExportHashesTable(db *sql.DB) error {
// Check if export_hashes table exists
var tableName string
err := db.QueryRow(`
SELECT name FROM sqlite_master
WHERE type='table' AND name='export_hashes'
`).Scan(&tableName)
if err == sql.ErrNoRows {
// Table doesn't exist, create it
_, err := db.Exec(`
CREATE TABLE export_hashes (
issue_id TEXT PRIMARY KEY,
content_hash TEXT NOT NULL,
exported_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (issue_id) REFERENCES issues(id) ON DELETE CASCADE
)
`)
if err != nil {
return fmt.Errorf("failed to create export_hashes table: %w", err)
}
return nil
}
if err != nil {
return fmt.Errorf("failed to check export_hashes table: %w", err)
}
// Table already exists
return nil
}
// getNextIDForPrefix atomically generates the next ID for a given prefix
// Uses the issue_counters table for atomic, cross-process ID generation