WIP: bd-164 timestamp-only export deduplication (~80% complete)

Implemented content hash-based deduplication to skip exports when only
timestamps changed. Core logic complete, needs export_hashes table wiring.

Completed:
- Added computeIssueContentHash() excluding timestamps
- Created shouldSkipExport() logic
- Updated export loop to skip timestamp-only changes
- Added hash.go with content hashing
- Extended Storage interface

Remaining:
- Complete export_hashes table migration
- Add SetExportHash/GetExportHash to interface
- Revert content_hash from dirty_issues approach
- Wire up hash persistence in export
- Testing

See bd-164 notes for details.

Amp-Thread-ID: https://ampcode.com/threads/T-d70657d1-4433-4f7e-b10a-3fccf8bf17fb
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-26 20:29:10 -07:00
parent d05e94d80d
commit a898df6915
8 changed files with 210 additions and 21 deletions

View File

@@ -144,6 +144,7 @@ func migrateDirtyIssuesTable(db *sql.DB) error {
CREATE TABLE dirty_issues (
issue_id TEXT PRIMARY KEY,
marked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
content_hash TEXT,
FOREIGN KEY (issue_id) REFERENCES issues(id) ON DELETE CASCADE
);
CREATE INDEX idx_dirty_issues_marked_at ON dirty_issues(marked_at);
@@ -159,7 +160,25 @@ func migrateDirtyIssuesTable(db *sql.DB) error {
return fmt.Errorf("failed to check for dirty_issues table: %w", err)
}
// Table exists, no migration needed
// Table exists, check if content_hash column exists (migration for bd-164)
var hasContentHash bool
err = db.QueryRow(`
SELECT COUNT(*) > 0 FROM pragma_table_info('dirty_issues')
WHERE name = 'content_hash'
`).Scan(&hasContentHash)
if err != nil {
return fmt.Errorf("failed to check for content_hash column: %w", err)
}
if !hasContentHash {
// Add content_hash column to existing table
_, err = db.Exec(`ALTER TABLE dirty_issues ADD COLUMN content_hash TEXT`)
if err != nil {
return fmt.Errorf("failed to add content_hash column: %w", err)
}
}
return nil
}