From d0e70473ccd0054cb211b044e5a168e7a5b2033b Mon Sep 17 00:00:00 2001 From: Steve Yegge Date: Thu, 20 Nov 2025 20:54:57 -0500 Subject: [PATCH] Fix bd-lm2q: Use content-based comparison to prevent false-positive timestamp skew This change fixes the issue where 'bd sync' would fail with a false-positive "JSONL is newer than database" error after the daemon auto-exports. Root Cause: - Daemon exports local changes to JSONL, updating its timestamp - bd sync sees JSONL.mtime > DB.mtime and incorrectly assumes external changes - This blocks export even though content is identical Solution: - Modified isJSONLNewer() to use SHA256 content hash comparison - Only triggers auto-import when JSONL is newer AND content differs - Prevents false positives from daemon auto-export timestamp updates - Maintains conservative fallback if hashes can't be computed Changes: - Added computeJSONLHash() and computeDBHash() helper functions - Created isJSONLNewerWithStore() to support testing with explicit store - Added comprehensive tests for content-based comparison logic - All existing tests pass, including export_mtime tests Fixes: bd-lm2q --- cmd/bd/integrity.go | 114 +++++++++++++- cmd/bd/integrity_content_test.go | 259 +++++++++++++++++++++++++++++++ 2 files changed, 371 insertions(+), 2 deletions(-) create mode 100644 cmd/bd/integrity_content_test.go diff --git a/cmd/bd/integrity.go b/cmd/bd/integrity.go index 77ecd4c1..54a45441 100644 --- a/cmd/bd/integrity.go +++ b/cmd/bd/integrity.go @@ -1,20 +1,32 @@ package main import ( + "bytes" "context" + "crypto/sha256" "database/sql" + "encoding/hex" + "encoding/json" "fmt" "math" "os" "path/filepath" + "sort" "github.com/steveyegge/beads/internal/storage" "github.com/steveyegge/beads/internal/types" ) // isJSONLNewer checks if JSONL file is newer than database file. -// Returns true if JSONL is newer, false otherwise. +// Returns true if JSONL is newer AND has different content, false otherwise. +// This prevents false positives from daemon auto-export timestamp skew (bd-lm2q). func isJSONLNewer(jsonlPath string) bool { + return isJSONLNewerWithStore(jsonlPath, nil) +} + +// isJSONLNewerWithStore is like isJSONLNewer but accepts an optional store parameter. +// If st is nil, it will try to use the global store. +func isJSONLNewerWithStore(jsonlPath string, st storage.Storage) bool { jsonlInfo, jsonlStatErr := os.Stat(jsonlPath) if jsonlStatErr != nil { return false @@ -27,7 +39,36 @@ func isJSONLNewer(jsonlPath string) bool { return false } - return jsonlInfo.ModTime().After(dbInfo.ModTime()) + // Quick path: if DB is newer, JSONL is definitely not newer + if !jsonlInfo.ModTime().After(dbInfo.ModTime()) { + return false + } + + // JSONL is newer by timestamp - but this could be due to daemon auto-export + // or clock skew. Use content-based comparison to determine if import is needed. + // If we can't determine content hash (e.g., store not available), conservatively + // assume JSONL is newer to trigger auto-import. + if st == nil { + if ensureStoreActive() != nil || store == nil { + return true // Conservative: can't check content, assume different + } + st = store + } + + ctx := context.Background() + jsonlHash, err := computeJSONLHash(jsonlPath) + if err != nil { + return true // Conservative: can't read JSONL, assume different + } + + dbHash, err := computeDBHash(ctx, st) + if err != nil { + return true // Conservative: can't read DB, assume different + } + + // Compare hashes: if they match, JSONL and DB have same content + // despite timestamp difference (daemon auto-export case) + return jsonlHash != dbHash } // validatePreExport performs integrity checks before exporting database to JSONL. @@ -280,3 +321,72 @@ func dbNeedsExport(ctx context.Context, store storage.Storage, jsonlPath string) // DB and JSONL appear to be in sync return false, nil } + +// computeJSONLHash computes a content hash of the JSONL file. +// Returns the SHA256 hash of the file contents. +func computeJSONLHash(jsonlPath string) (string, error) { + data, err := os.ReadFile(jsonlPath) // #nosec G304 - controlled path from config + if err != nil { + return "", fmt.Errorf("failed to read JSONL: %w", err) + } + + // Use sha256 for consistency with autoimport package + hasher := sha256.New() + hasher.Write(data) + return hex.EncodeToString(hasher.Sum(nil)), nil +} + +// computeDBHash computes a content hash of the database by exporting to memory. +// This is used to compare DB content with JSONL content without relying on timestamps. +func computeDBHash(ctx context.Context, store storage.Storage) (string, error) { + // Get all issues from DB + issues, err := store.SearchIssues(ctx, "", types.IssueFilter{}) + if err != nil { + return "", fmt.Errorf("failed to get issues: %w", err) + } + + // Sort by ID for consistent hash + sort.Slice(issues, func(i, j int) bool { + return issues[i].ID < issues[j].ID + }) + + // Populate dependencies + allDeps, err := store.GetAllDependencyRecords(ctx) + if err != nil { + return "", fmt.Errorf("failed to get dependencies: %w", err) + } + for _, issue := range issues { + issue.Dependencies = allDeps[issue.ID] + } + + // Populate labels + for _, issue := range issues { + labels, err := store.GetLabels(ctx, issue.ID) + if err != nil { + return "", fmt.Errorf("failed to get labels for %s: %w", issue.ID, err) + } + issue.Labels = labels + } + + // Populate comments + for _, issue := range issues { + comments, err := store.GetIssueComments(ctx, issue.ID) + if err != nil { + return "", fmt.Errorf("failed to get comments for %s: %w", issue.ID, err) + } + issue.Comments = comments + } + + // Serialize to JSON and hash + var buf bytes.Buffer + encoder := json.NewEncoder(&buf) + for _, issue := range issues { + if err := encoder.Encode(issue); err != nil { + return "", fmt.Errorf("failed to encode issue %s: %w", issue.ID, err) + } + } + + hasher := sha256.New() + hasher.Write(buf.Bytes()) + return hex.EncodeToString(hasher.Sum(nil)), nil +} diff --git a/cmd/bd/integrity_content_test.go b/cmd/bd/integrity_content_test.go new file mode 100644 index 00000000..4376124d --- /dev/null +++ b/cmd/bd/integrity_content_test.go @@ -0,0 +1,259 @@ +package main + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/steveyegge/beads/internal/storage/sqlite" + "github.com/steveyegge/beads/internal/types" +) + +// TestContentBasedComparison verifies that isJSONLNewer uses content comparison +// instead of just timestamp comparison to prevent false positives (bd-lm2q) +func TestContentBasedComparison(t *testing.T) { + if testing.Short() { + t.Skip("skipping slow test in short mode") + } + + tmpDir := t.TempDir() + beadsDir := filepath.Join(tmpDir, ".beads") + if err := os.Mkdir(beadsDir, 0750); err != nil { + t.Fatal(err) + } + + dbPath := filepath.Join(beadsDir, "beads.db") + jsonlPath := filepath.Join(beadsDir, "issues.jsonl") + + // Create and populate database + localStore, err := sqlite.New(dbPath) + if err != nil { + t.Fatalf("Failed to create store: %v", err) + } + defer localStore.Close() + + ctx := context.Background() + + // Initialize database with issue_prefix + if err := localStore.SetConfig(ctx, "issue_prefix", "test"); err != nil { + t.Fatalf("Failed to set issue_prefix: %v", err) + } + + // Create a test issue + issue := &types.Issue{ + ID: "test-1", + Title: "Test Issue", + Status: types.StatusOpen, + Priority: 2, + IssueType: types.TypeTask, + } + if err := localStore.CreateIssue(ctx, issue, "test-actor"); err != nil { + t.Fatalf("Failed to create issue: %v", err) + } + + // Export to JSONL + if err := exportToJSONLWithStore(ctx, localStore, jsonlPath); err != nil { + t.Fatalf("Export failed: %v", err) + } + + // Touch database to match JSONL + if err := TouchDatabaseFile(dbPath, jsonlPath); err != nil { + t.Fatalf("TouchDatabaseFile failed: %v", err) + } + + // Verify they're in sync (content matches, timestamps match) + if isJSONLNewerWithStore(jsonlPath, localStore) { + t.Error("isJSONLNewer should return false when content matches (before timestamp manipulation)") + } + + // Wait to ensure timestamp difference + time.Sleep(100 * time.Millisecond) + + // Simulate daemon auto-export: touch JSONL to make it newer + // This simulates clock skew or filesystem timestamp quirks + futureTime := time.Now().Add(2 * time.Second) + if err := os.Chtimes(jsonlPath, futureTime, futureTime); err != nil { + t.Fatalf("Failed to update JSONL timestamp: %v", err) + } + + // Verify JSONL is newer by timestamp + jsonlInfo, _ := os.Stat(jsonlPath) + dbInfo, _ := os.Stat(dbPath) + if !jsonlInfo.ModTime().After(dbInfo.ModTime()) { + t.Fatal("Test setup failed: JSONL should be newer than DB by timestamp") + } + + // KEY TEST: isJSONLNewer should return FALSE because content is identical + // despite timestamp difference (this is the bd-lm2q fix) + + // Compute hashes for debugging + jsonlHash1, err := computeJSONLHash(jsonlPath) + if err != nil { + t.Fatalf("Failed to compute JSONL hash: %v", err) + } + dbHash1, err := computeDBHash(ctx, localStore) + if err != nil { + t.Fatalf("Failed to compute DB hash: %v", err) + } + t.Logf("JSONL hash: %s", jsonlHash1) + t.Logf("DB hash: %s", dbHash1) + + if isJSONLNewerWithStore(jsonlPath, localStore) { + t.Error("isJSONLNewer should return false when content matches despite timestamp difference (bd-lm2q)") + t.Logf("Hashes: JSONL=%s, DB=%s", jsonlHash1, dbHash1) + } + + // Now modify the database (add a new issue) + issue2 := &types.Issue{ + ID: "test-2", + Title: "New Issue", + Status: types.StatusOpen, + Priority: 1, + IssueType: types.TypeBug, + } + if err := localStore.CreateIssue(ctx, issue2, "test-actor"); err != nil { + t.Fatalf("Failed to create second issue: %v", err) + } + + // Re-export to JSONL to reflect new content + if err := exportToJSONLWithStore(ctx, localStore, jsonlPath); err != nil { + t.Fatalf("Second export failed: %v", err) + } + + // Make JSONL appear older than DB by timestamp + pastTime := time.Now().Add(-2 * time.Second) + if err := os.Chtimes(jsonlPath, pastTime, pastTime); err != nil { + t.Fatalf("Failed to update JSONL timestamp to past: %v", err) + } + + // Verify JSONL is older by timestamp + jsonlInfo, _ = os.Stat(jsonlPath) + dbInfo, _ = os.Stat(dbPath) + if jsonlInfo.ModTime().After(dbInfo.ModTime()) { + t.Fatal("Test setup failed: JSONL should be older than DB by timestamp") + } + + // isJSONLNewer should return false because JSONL is older by timestamp + if isJSONLNewerWithStore(jsonlPath, localStore) { + t.Error("isJSONLNewer should return false when JSONL is older by timestamp") + } + + // Final test: Make JSONL newer AND different content + // First, manually edit JSONL to have different content + originalJSONL, err := os.ReadFile(jsonlPath) // #nosec G304 - test code + if err != nil { + t.Fatalf("Failed to read JSONL: %v", err) + } + + // Remove the second issue from database + if err := localStore.DeleteIssue(ctx, "test-2"); err != nil { + t.Fatalf("Failed to delete issue: %v", err) + } + + // Restore the JSONL with 2 issues (making it different from DB which has 1) + if err := os.WriteFile(jsonlPath, originalJSONL, 0600); err != nil { // #nosec G306 - test code + t.Fatalf("Failed to write JSONL: %v", err) + } + + // Make JSONL newer by timestamp + if err := os.Chtimes(jsonlPath, futureTime, futureTime); err != nil { + t.Fatalf("Failed to update JSONL timestamp: %v", err) + } + + // Now isJSONLNewer should return TRUE (newer timestamp AND different content) + if !isJSONLNewerWithStore(jsonlPath, localStore) { + t.Error("isJSONLNewer should return true when JSONL is newer AND has different content") + } +} + +// TestContentHashComputation verifies the hash computation functions +func TestContentHashComputation(t *testing.T) { + tmpDir := t.TempDir() + beadsDir := filepath.Join(tmpDir, ".beads") + if err := os.Mkdir(beadsDir, 0750); err != nil { + t.Fatal(err) + } + + dbPath := filepath.Join(beadsDir, "beads.db") + jsonlPath := filepath.Join(beadsDir, "issues.jsonl") + + // Create and populate database + localStore, err := sqlite.New(dbPath) + if err != nil { + t.Fatalf("Failed to create store: %v", err) + } + defer localStore.Close() + + ctx := context.Background() + + if err := localStore.SetConfig(ctx, "issue_prefix", "test"); err != nil { + t.Fatalf("Failed to set issue_prefix: %v", err) + } + + issue := &types.Issue{ + ID: "test-1", + Title: "Test Issue", + Status: types.StatusOpen, + Priority: 2, + IssueType: types.TypeTask, + } + if err := localStore.CreateIssue(ctx, issue, "test-actor"); err != nil { + t.Fatalf("Failed to create issue: %v", err) + } + + // Export to JSONL + if err := exportToJSONLWithStore(ctx, localStore, jsonlPath); err != nil { + t.Fatalf("Export failed: %v", err) + } + + // Compute hashes + jsonlHash, err := computeJSONLHash(jsonlPath) + if err != nil { + t.Fatalf("Failed to compute JSONL hash: %v", err) + } + + dbHash, err := computeDBHash(ctx, localStore) + if err != nil { + t.Fatalf("Failed to compute DB hash: %v", err) + } + + // Hashes should match since we just exported + if jsonlHash != dbHash { + t.Errorf("Hash mismatch after export:\nJSONL: %s\nDB: %s", jsonlHash, dbHash) + } + + // Modify database by adding a new issue + issue2 := &types.Issue{ + ID: "test-2", + Title: "Second Issue", + Status: types.StatusOpen, + Priority: 1, + IssueType: types.TypeBug, + } + if err := localStore.CreateIssue(ctx, issue2, "test-actor"); err != nil { + t.Fatalf("Failed to create second issue: %v", err) + } + + // Compute new DB hash + newDBHash, err := computeDBHash(ctx, localStore) + if err != nil { + t.Fatalf("Failed to compute new DB hash: %v", err) + } + + // Hashes should differ now + if jsonlHash == newDBHash { + t.Error("Hash should differ after DB modification") + } + + // Hash should be consistent across multiple calls + dbHash2, err := computeDBHash(ctx, localStore) + if err != nil { + t.Fatalf("Failed to compute DB hash second time: %v", err) + } + + if newDBHash != dbHash2 { + t.Error("DB hash should be consistent across calls") + } +}