Files
beads/internal/storage/sqlite/hash.go
Steve Yegge c34b93fa1a Fix bd-160: Implement JSONL integrity validation and prevent export deduplication data loss
## Problem
Export deduplication feature broke when JSONL and export_hashes diverged
(e.g., after git pull/reset). This caused exports to skip issues that
weren't actually in the file, leading to silent data loss.

## Solution
1. JSONL integrity validation before every export
   - Store JSONL file hash after export
   - Validate hash before export, clear export_hashes if mismatch
   - Automatically recovers from git operations changing JSONL

2. Clear export_hashes on all imports
   - Prevents stale hashes from causing future export failures
   - Import operations invalidate export_hashes state

3. Add Storage interface methods:
   - GetJSONLFileHash/SetJSONLFileHash for integrity tracking
   - ClearAllExportHashes for recovery

## Tests Added
- TestJSONLIntegrityValidation: Unit tests for validation logic
- TestImportClearsExportHashes: Verifies imports clear hashes
- TestExportIntegrityAfterJSONLTruncation: Simulates git reset (would have caught bd-160)
- TestExportIntegrityAfterJSONLDeletion: Tests recovery from file deletion
- TestMultipleExportsStayConsistent: Tests repeated export integrity

## Follow-up
Created bd-179 epic for remaining integration test gaps (multi-repo sync,
daemon auto-sync, corruption recovery tests).

Closes bd-160
2025-10-29 21:57:15 -07:00

86 lines
2.4 KiB
Go

package sqlite
import (
"context"
"database/sql"
"fmt"
)
// GetExportHash retrieves the content hash of the last export for an issue.
// Returns empty string if no hash is stored (first export).
func (s *SQLiteStorage) GetExportHash(ctx context.Context, issueID string) (string, error) {
var hash string
err := s.db.QueryRowContext(ctx, `
SELECT content_hash FROM export_hashes WHERE issue_id = ?
`, issueID).Scan(&hash)
if err == sql.ErrNoRows {
return "", nil // No hash stored yet
}
if err != nil {
return "", fmt.Errorf("failed to get export hash for %s: %w", issueID, err)
}
return hash, nil
}
// SetExportHash stores the content hash of an issue after successful export.
func (s *SQLiteStorage) SetExportHash(ctx context.Context, issueID, contentHash string) error {
_, err := s.db.ExecContext(ctx, `
INSERT INTO export_hashes (issue_id, content_hash, exported_at)
VALUES (?, ?, CURRENT_TIMESTAMP)
ON CONFLICT(issue_id) DO UPDATE SET
content_hash = excluded.content_hash,
exported_at = CURRENT_TIMESTAMP
`, issueID, contentHash)
if err != nil {
return fmt.Errorf("failed to set export hash for %s: %w", issueID, err)
}
return nil
}
// ClearAllExportHashes removes all export hashes from the database.
// This is primarily used for test isolation to force re-export of issues.
func (s *SQLiteStorage) ClearAllExportHashes(ctx context.Context) error {
_, err := s.db.ExecContext(ctx, `DELETE FROM export_hashes`)
if err != nil {
return fmt.Errorf("failed to clear export hashes: %w", err)
}
return nil
}
// GetJSONLFileHash retrieves the stored hash of the JSONL file.
// Returns empty string if no hash is stored (bd-160).
func (s *SQLiteStorage) GetJSONLFileHash(ctx context.Context) (string, error) {
var hash string
err := s.db.QueryRowContext(ctx, `
SELECT value FROM metadata WHERE key = 'jsonl_file_hash'
`).Scan(&hash)
if err == sql.ErrNoRows {
return "", nil // No hash stored yet
}
if err != nil {
return "", fmt.Errorf("failed to get jsonl_file_hash: %w", err)
}
return hash, nil
}
// SetJSONLFileHash stores the hash of the JSONL file after export (bd-160).
func (s *SQLiteStorage) SetJSONLFileHash(ctx context.Context, fileHash string) error {
_, err := s.db.ExecContext(ctx, `
INSERT INTO metadata (key, value)
VALUES ('jsonl_file_hash', ?)
ON CONFLICT(key) DO UPDATE SET value = excluded.value
`, fileHash)
if err != nil {
return fmt.Errorf("failed to set jsonl_file_hash: %w", err)
}
return nil
}