Fix bd-c6cf: Force full export when export_hashes is cleared

When validateJSONLIntegrity() clears export_hashes due to hash mismatch or missing JSONL, the subsequent export now correctly exports ALL issues instead of only dirty ones, preventing permanent database divergence. Changes: - validateJSONLIntegrity() returns (needsFullExport, error) to signal when export_hashes was cleared - flushToJSONL() moved integrity check BEFORE isDirty gate so integrity issues trigger export even when nothing is dirty - Missing JSONL treated as non-fatal force-full-export case - Increased scanner buffer from 64KB to 2MB to handle large JSON lines - Added scanner.Err() check to catch buffer overflow errors - Updated all tests to verify needsFullExport flag Fixes database divergence issue where clearing export_hashes didn't trigger re-export, causing 5 issues to disappear from JSONL in fred clone. Amp-Thread-ID: https://ampcode.com/threads/T-bf2fdcd6-7bbd-4c30-b1db-746b928c93b8 Co-authored-by: Amp <amp@ampcode.com>
2025-11-01 20:29:13 -07:00
parent 0c1738c976
commit 5fabb5fdcc
3 changed files with 117 additions and 40 deletions
@@ -392,16 +392,17 @@ func clearAutoFlushState() {
 // Thread-safe: No shared state access. Safe to call from multiple goroutines.
 // validateJSONLIntegrity checks if JSONL file hash matches stored hash.
 // If mismatch detected, clears export_hashes and logs warning (bd-160).
-func validateJSONLIntegrity(ctx context.Context, jsonlPath string) error {
+// Returns (needsFullExport, error) where needsFullExport=true if export_hashes was cleared.
+func validateJSONLIntegrity(ctx context.Context, jsonlPath string) (bool, error) {
 	// Get stored JSONL file hash
 	storedHash, err := store.GetJSONLFileHash(ctx)
 	if err != nil {
-		return fmt.Errorf("failed to get stored JSONL hash: %w", err)
+		return false, fmt.Errorf("failed to get stored JSONL hash: %w", err)
 	}
 	
 	// If no hash stored, this is first export - skip validation
 	if storedHash == "" {
-		return nil
+		return false, nil
 	}
 	
 	// Read current JSONL file
@@ -411,11 +412,11 @@ func validateJSONLIntegrity(ctx context.Context, jsonlPath string) error {
 			// JSONL doesn't exist but we have a stored hash - clear export_hashes
 			fmt.Fprintf(os.Stderr, "⚠️  WARNING: JSONL file missing but export_hashes exist. Clearing export_hashes.\n")
 			if err := store.ClearAllExportHashes(ctx); err != nil {
-				return fmt.Errorf("failed to clear export_hashes: %w", err)
+				return false, fmt.Errorf("failed to clear export_hashes: %w", err)
 			}
-			return nil
+			return true, nil // Signal full export needed
 		}
-		return fmt.Errorf("failed to read JSONL file: %w", err)
+		return false, fmt.Errorf("failed to read JSONL file: %w", err)
 	}
 	
 	// Compute current JSONL hash
@@ -431,11 +432,12 @@ func validateJSONLIntegrity(ctx context.Context, jsonlPath string) error {
 		
 		// Clear export_hashes to force full re-export
 		if err := store.ClearAllExportHashes(ctx); err != nil {
-			return fmt.Errorf("failed to clear export_hashes: %w", err)
+			return false, fmt.Errorf("failed to clear export_hashes: %w", err)
 		}
+		return true, nil // Signal full export needed
 	}
 	
-	return nil
+	return false, nil
 }

 func writeJSONLAtomic(jsonlPath string, issues []*types.Issue) ([]string, error) {
@@ -556,16 +558,6 @@ func flushToJSONL() {
 	}
 	storeMutex.Unlock()

-	flushMutex.Lock()
-	if !isDirty {
-		flushMutex.Unlock()
-		return
-	}
-	isDirty = false
-	fullExport := needsFullExport
-	needsFullExport = false // Reset flag
-	flushMutex.Unlock()
-
 	jsonlPath := findJSONLPath()

 	// Double-check store is still active before accessing
@@ -576,6 +568,52 @@ func flushToJSONL() {
 	}
 	storeMutex.Unlock()

+	ctx := context.Background()
+	
+	// Validate JSONL integrity BEFORE checking isDirty (bd-c6cf)
+	// This detects if JSONL and export_hashes are out of sync (e.g., after git operations)
+	// If export_hashes was cleared, we need to do a full export even if nothing is dirty
+	integrityNeedsFullExport, err := validateJSONLIntegrity(ctx, jsonlPath)
+	if err != nil {
+		// Special case: missing JSONL is not fatal, just forces full export (bd-c6cf)
+		if !os.IsNotExist(err) {
+			// Record failure without clearing isDirty (we didn't do any work yet)
+			flushMutex.Lock()
+			flushFailureCount++
+			lastFlushError = err
+			failCount := flushFailureCount
+			flushMutex.Unlock()
+
+			// Always show the immediate warning
+			fmt.Fprintf(os.Stderr, "Warning: auto-flush failed: %v\n", err)
+
+			// Show prominent warning after 3+ consecutive failures
+			if failCount >= 3 {
+				red := color.New(color.FgRed, color.Bold).SprintFunc()
+				fmt.Fprintf(os.Stderr, "\n%s\n", red("⚠️  CRITICAL: Auto-flush has failed "+fmt.Sprint(failCount)+" times consecutively!"))
+				fmt.Fprintf(os.Stderr, "%s\n", red("⚠️  Your JSONL file may be out of sync with the database."))
+				fmt.Fprintf(os.Stderr, "%s\n\n", red("⚠️  Run 'bd export -o .beads/issues.jsonl' manually to fix."))
+			}
+			return
+		}
+		// Missing JSONL: treat as "force full export" case
+		integrityNeedsFullExport = true
+	}
+
+	// Now check if we should proceed with export
+	flushMutex.Lock()
+	if !isDirty && !integrityNeedsFullExport {
+		// Nothing to do: no dirty issues and no integrity issue
+		flushMutex.Unlock()
+		return
+	}
+	
+	// We're proceeding with export - capture state and clear flags
+	isDirty = false
+	fullExport := needsFullExport || integrityNeedsFullExport
+	needsFullExport = false // Reset flag
+	flushMutex.Unlock()
+
 	// Helper to record failure
 	recordFailure := func(err error) {
 		flushMutex.Lock()
@@ -604,24 +642,14 @@ func flushToJSONL() {
 		flushMutex.Unlock()
 	}

-	ctx := context.Background()
-	
-	// Validate JSONL integrity before export (bd-160)
-	// This detects if JSONL and export_hashes are out of sync (e.g., after git operations)
-	if err := validateJSONLIntegrity(ctx, jsonlPath); err != nil {
-		recordFailure(fmt.Errorf("JSONL integrity check failed: %w", err))
-		return
-	}
-
 	// Determine which issues to export
 	var dirtyIDs []string
-	var err error

 	if fullExport {
 		// Full export: get ALL issues (needed after ID-changing operations like renumber)
-		allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
-		if err != nil {
-			recordFailure(fmt.Errorf("failed to get all issues: %w", err))
+		allIssues, err2 := store.SearchIssues(ctx, "", types.IssueFilter{})
+		if err2 != nil {
+			recordFailure(fmt.Errorf("failed to get all issues: %w", err2))
 			return
 		}
 		dirtyIDs = make([]string, len(allIssues))
@@ -630,9 +658,10 @@ func flushToJSONL() {
 		}
 	} else {
 		// Incremental export: get only dirty issue IDs (bd-39 optimization)
-		dirtyIDs, err = store.GetDirtyIssues(ctx)
-		if err != nil {
-			recordFailure(fmt.Errorf("failed to get dirty issues: %w", err))
+		var err2 error
+		dirtyIDs, err2 = store.GetDirtyIssues(ctx)
+		if err2 != nil {
+			recordFailure(fmt.Errorf("failed to get dirty issues: %w", err2))
 			return
 		}

@@ -648,6 +677,9 @@ func flushToJSONL() {
 	if !fullExport {
 		if existingFile, err := os.Open(jsonlPath); err == nil {
 			scanner := bufio.NewScanner(existingFile)
+			// Increase buffer to handle large JSON lines (bd-c6cf)
+			// Default scanner limit is 64KB which can cause silent truncation
+			scanner.Buffer(make([]byte, 0, 1024), 2*1024*1024) // 2MB max line size
 			lineNum := 0
 			for scanner.Scan() {
 				lineNum++
@@ -663,6 +695,12 @@ func flushToJSONL() {
 					fmt.Fprintf(os.Stderr, "Warning: skipping malformed JSONL line %d: %v\n", lineNum, err)
 				}
 			}
+			// Check for scanner errors (bd-c6cf)
+			if err := scanner.Err(); err != nil {
+				_ = existingFile.Close()
+				recordFailure(fmt.Errorf("failed to read existing JSONL: %w", err))
+				return
+			}
 			_ = existingFile.Close()
 		}
 	}
@@ -2,6 +2,8 @@ package main

 import (
 	"context"
+	"crypto/sha256"
+	"encoding/hex"
 	"os"
 	"path/filepath"
 	"testing"
@@ -70,6 +72,14 @@ func TestExportIntegrityAfterJSONLTruncation(t *testing.T) {
 		t.Fatalf("failed to read JSONL: %v", err)
 	}
 	
+	// Compute and store the JSONL file hash
+	hasher := sha256.New()
+	hasher.Write(jsonlData)
+	fileHash := hex.EncodeToString(hasher.Sum(nil))
+	if err := testStore.SetJSONLFileHash(ctx, fileHash); err != nil {
+		t.Fatalf("failed to set JSONL file hash: %v", err)
+	}
+	
 	initialSize := len(jsonlData)
 	
 	// Step 2: Simulate git operation that truncates JSONL (the bd-160 scenario)
@@ -92,9 +102,13 @@ func TestExportIntegrityAfterJSONLTruncation(t *testing.T) {
 	defer func() { store = oldStore }()
 	
 	// This should detect the mismatch and clear export_hashes
-	if err := validateJSONLIntegrity(ctx, jsonlPath); err != nil {
+	needsFullExport, err := validateJSONLIntegrity(ctx, jsonlPath)
+	if err != nil {
 		t.Fatalf("integrity validation failed: %v", err)
 	}
+	if !needsFullExport {
+		t.Fatalf("expected needsFullExport=true after truncation")
+	}
 	
 	// Step 4: Export all issues again
 	exportedIDs2, err := writeJSONLAtomic(jsonlPath, allIssues)
@@ -180,7 +194,16 @@ func TestExportIntegrityAfterJSONLDeletion(t *testing.T) {
 	}
 	
 	// Store JSONL hash (would happen in real export)
-	_ , _ = os.ReadFile(jsonlPath)
+	jsonlData, err := os.ReadFile(jsonlPath)
+	if err != nil {
+		t.Fatalf("failed to read JSONL: %v", err)
+	}
+	hasher := sha256.New()
+	hasher.Write(jsonlData)
+	fileHash := hex.EncodeToString(hasher.Sum(nil))
+	if err := testStore.SetJSONLFileHash(ctx, fileHash); err != nil {
+		t.Fatalf("failed to set JSONL file hash: %v", err)
+	}
 	
 	// Set global store
 	oldStore := store
@@ -194,12 +217,16 @@ func TestExportIntegrityAfterJSONLDeletion(t *testing.T) {
 	
 	// Integrity validation should detect missing file
 	// (In real system, this happens before next export)
-	if err := validateJSONLIntegrity(ctx, jsonlPath); err != nil {
+	needsFullExport, err := validateJSONLIntegrity(ctx, jsonlPath)
+	if err != nil {
 		// Error is OK if file doesn't exist
 		if !os.IsNotExist(err) {
 			t.Fatalf("unexpected error: %v", err)
 		}
 	}
+	if !needsFullExport {
+		t.Fatalf("expected needsFullExport=true after JSONL deletion")
+	}
 	
 	// Export again should recreate JSONL
 	_, err = writeJSONLAtomic(jsonlPath, []*types.Issue{issue})
@@ -85,9 +85,13 @@ func TestJSONLIntegrityValidation(t *testing.T) {
 	
 	// Test 1: Validate with matching hash (should succeed)
 	t.Run("MatchingHash", func(t *testing.T) {
-		if err := validateJSONLIntegrity(ctx, jsonlPath); err != nil {
+		needsFullExport, err := validateJSONLIntegrity(ctx, jsonlPath)
+		if err != nil {
 			t.Fatalf("validation failed with matching hash: %v", err)
 		}
+		if needsFullExport {
+			t.Fatalf("expected needsFullExport=false for matching hash")
+		}
 	})
 	
 	// Test 2: Modify JSONL file (simulating git pull) and validate
@@ -103,9 +107,13 @@ func TestJSONLIntegrityValidation(t *testing.T) {
 		}
 		
 		// Validate should detect mismatch and clear export_hashes
-		if err := validateJSONLIntegrity(ctx, jsonlPath); err != nil {
+		needsFullExport, err := validateJSONLIntegrity(ctx, jsonlPath)
+		if err != nil {
 			t.Fatalf("validation failed: %v", err)
 		}
+		if !needsFullExport {
+			t.Fatalf("expected needsFullExport=true after clearing export_hashes")
+		}
 		
 		// Verify export_hashes were cleared
 		hash, err := testStore.GetExportHash(ctx, "bd-1")
@@ -135,9 +143,13 @@ func TestJSONLIntegrityValidation(t *testing.T) {
 		}
 		
 		// Validate should detect missing file and clear export_hashes
-		if err := validateJSONLIntegrity(ctx, jsonlPath); err != nil {
+		needsFullExport, err := validateJSONLIntegrity(ctx, jsonlPath)
+		if err != nil {
 			t.Fatalf("validation failed: %v", err)
 		}
+		if !needsFullExport {
+			t.Fatalf("expected needsFullExport=true after clearing export_hashes")
+		}
 		
 		// Verify export_hashes were cleared
 		hash, err := testStore.GetExportHash(ctx, "bd-1")