bd-162: Add database integrity checks with oracle review fixes

- Added validatePreExport to prevent data loss - Added checkDuplicateIDs to detect corruption - Added checkOrphanedDeps to find orphaned dependencies (both sides) - Added validatePostImport to ensure imports don't lose data - CRITICAL FIX: Removed post-pull export that clobbered fresh JSONL - Conservative checks when JSONL is unreadable - Efficient COUNT(*) SQL path instead of loading all issues - Comprehensive test coverage including edge cases
2025-10-26 20:17:48 -07:00
parent 3cbd913f31
commit 6271b521b4
5 changed files with 616 additions and 9 deletions
@@ -988,6 +988,25 @@ func createSyncFunc(ctx context.Context, store storage.Storage, autoCommit, auto
 			log.log("Removed stale lock (%s), proceeding with sync", holder)
 		}

+		// Integrity check: validate before export
+		if err := validatePreExport(syncCtx, store, jsonlPath); err != nil {
+			log.log("Pre-export validation failed: %v", err)
+			return
+		}
+
+		// Check for duplicate IDs (database corruption)
+		if err := checkDuplicateIDs(syncCtx, store); err != nil {
+			log.log("Duplicate ID check failed: %v", err)
+			return
+		}
+
+		// Check for orphaned dependencies (warns but doesn't fail)
+		if orphaned, err := checkOrphanedDeps(syncCtx, store); err != nil {
+			log.log("Orphaned dependency check failed: %v", err)
+		} else if len(orphaned) > 0 {
+			log.log("Found %d orphaned dependencies: %v", len(orphaned), orphaned)
+		}
+
 		if err := exportToJSONLWithStore(syncCtx, store, jsonlPath); err != nil {
 			log.log("Export failed: %v", err)
 			return
@@ -1017,13 +1036,12 @@ func createSyncFunc(ctx context.Context, store storage.Storage, autoCommit, auto
 		}
 		log.log("Pulled from remote")

-		// Flush any pending dirty issues to JSONL BEFORE importing
-		// This prevents the race condition where deletions get re-imported (bd-155)
-		if err := exportToJSONLWithStore(syncCtx, store, jsonlPath); err != nil {
-		 log.log("Pre-import flush failed: %v", err)
-		 return
+		// Count issues before import for validation
+	beforeCount, err := countDBIssues(syncCtx, store)
+	if err != nil {
+		log.log("Failed to count issues before import: %v", err)
+		return
 	}
-	log.log("Flushed pending changes before import")

 	if err := importToJSONLWithStore(syncCtx, store, jsonlPath); err != nil {
 		log.log("Import failed: %v", err)
@@ -1031,6 +1049,18 @@ func createSyncFunc(ctx context.Context, store storage.Storage, autoCommit, auto
 	}
 	log.log("Imported from JSONL")

+	// Validate import didn't cause data loss
+	afterCount, err := countDBIssues(syncCtx, store)
+	if err != nil {
+		log.log("Failed to count issues after import: %v", err)
+		return
+	}
+
+	if err := validatePostImport(beforeCount, afterCount); err != nil {
+		log.log("Post-import validation failed: %v", err)
+		return
+	}
+
 		if autoPush && autoCommit {
 			if err := gitPush(syncCtx); err != nil {
 				log.log("Push failed: %v", err)
@@ -34,9 +34,8 @@ func countIssuesInJSONL(path string) (int, error) {
 			if err.Error() == "EOF" {
 				break
 			}
-			// If we hit a decode error, stop counting but return what we have
-			// This handles partially corrupt files
-			break
+			// Return error for corrupt/invalid JSON
+			return count, fmt.Errorf("invalid JSON at issue %d: %w", count+1, err)
 		}
 		count++
 	}
@@ -0,0 +1,211 @@
+package main
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"math"
+	"os"
+
+	"github.com/steveyegge/beads/internal/storage"
+	"github.com/steveyegge/beads/internal/types"
+)
+
+// validatePreExport performs integrity checks before exporting database to JSONL.
+// Returns error if critical issues found that would cause data loss.
+func validatePreExport(ctx context.Context, store storage.Storage, jsonlPath string) error {
+	// Get database issue count (fast path with COUNT(*) if available)
+	dbCount, err := countDBIssuesFast(ctx, store)
+	if err != nil {
+		return fmt.Errorf("failed to count database issues: %w", err)
+	}
+
+	// Get JSONL issue count
+	jsonlCount := 0
+	fileInfo, statErr := os.Stat(jsonlPath)
+	if statErr == nil {
+		jsonlCount, err = countIssuesInJSONL(jsonlPath)
+		if err != nil {
+			// Conservative: if JSONL exists with content but we can't count it,
+			// and DB is empty, refuse to export (potential data loss)
+			if dbCount == 0 && fileInfo.Size() > 0 {
+				return fmt.Errorf("refusing to export empty DB over existing JSONL whose contents couldn't be verified: %w", err)
+			}
+			// Warning for other cases
+			fmt.Fprintf(os.Stderr, "WARNING: Failed to count issues in JSONL: %v\n", err)
+		}
+	}
+
+	// Critical: refuse to export empty DB over non-empty JSONL
+	if dbCount == 0 && jsonlCount > 0 {
+		return fmt.Errorf("refusing to export empty DB over %d issues in JSONL (would cause data loss)", jsonlCount)
+	}
+
+	// Warning: large divergence suggests sync failure
+	if jsonlCount > 0 {
+		divergencePercent := math.Abs(float64(dbCount-jsonlCount)) / float64(jsonlCount) * 100
+		if divergencePercent > 50 {
+			fmt.Fprintf(os.Stderr, "WARNING: DB has %d issues, JSONL has %d (%.1f%% divergence)\n",
+				dbCount, jsonlCount, divergencePercent)
+			fmt.Fprintf(os.Stderr, "This suggests sync failure - investigate before proceeding\n")
+		}
+	}
+
+	return nil
+}
+
+// checkDuplicateIDs detects duplicate issue IDs in the database.
+// Returns error if duplicates are found (indicates database corruption).
+func checkDuplicateIDs(ctx context.Context, store storage.Storage) error {
+	// Get access to underlying database
+	// This is a hack - we need to add a proper interface method for this
+	// For now, we'll use a type assertion to access the underlying *sql.DB
+	type dbGetter interface {
+		GetDB() interface{}
+	}
+
+	getter, ok := store.(dbGetter)
+	if !ok {
+		// If store doesn't expose GetDB, skip this check
+		// This is acceptable since duplicate IDs are prevented by UNIQUE constraint
+		return nil
+	}
+
+	db, ok := getter.GetDB().(*sql.DB)
+	if !ok || db == nil {
+		return nil
+	}
+
+	rows, err := db.QueryContext(ctx, `
+		SELECT id, COUNT(*) as cnt 
+		FROM issues 
+		GROUP BY id 
+		HAVING cnt > 1
+	`)
+	if err != nil {
+		return fmt.Errorf("failed to check for duplicate IDs: %w", err)
+	}
+	defer rows.Close()
+
+	var duplicates []string
+	for rows.Next() {
+		var id string
+		var count int
+		if err := rows.Scan(&id, &count); err != nil {
+			return fmt.Errorf("failed to scan duplicate ID row: %w", err)
+		}
+		duplicates = append(duplicates, fmt.Sprintf("%s (x%d)", id, count))
+	}
+
+	if err := rows.Err(); err != nil {
+		return fmt.Errorf("error iterating duplicate IDs: %w", err)
+	}
+
+	if len(duplicates) > 0 {
+		return fmt.Errorf("database corruption: duplicate IDs: %v", duplicates)
+	}
+
+	return nil
+}
+
+// checkOrphanedDeps finds dependencies pointing to or from non-existent issues.
+// Returns list of orphaned dependency IDs and any error encountered.
+func checkOrphanedDeps(ctx context.Context, store storage.Storage) ([]string, error) {
+	// Get access to underlying database
+	type dbGetter interface {
+		GetDB() interface{}
+	}
+
+	getter, ok := store.(dbGetter)
+	if !ok {
+		return nil, nil
+	}
+
+	db, ok := getter.GetDB().(*sql.DB)
+	if !ok || db == nil {
+		return nil, nil
+	}
+
+	// Check both sides: dependencies where either issue_id or depends_on_id doesn't exist
+	rows, err := db.QueryContext(ctx, `
+		SELECT DISTINCT d.issue_id 
+		FROM dependencies d 
+		LEFT JOIN issues i ON d.issue_id = i.id 
+		WHERE i.id IS NULL
+		UNION
+		SELECT DISTINCT d.depends_on_id 
+		FROM dependencies d 
+		LEFT JOIN issues i ON d.depends_on_id = i.id 
+		WHERE i.id IS NULL
+	`)
+	if err != nil {
+		return nil, fmt.Errorf("failed to check for orphaned dependencies: %w", err)
+	}
+	defer rows.Close()
+
+	var orphaned []string
+	for rows.Next() {
+		var id string
+		if err := rows.Scan(&id); err != nil {
+			return nil, fmt.Errorf("failed to scan orphaned dependency: %w", err)
+		}
+		orphaned = append(orphaned, id)
+	}
+
+	if err := rows.Err(); err != nil {
+		return nil, fmt.Errorf("error iterating orphaned dependencies: %w", err)
+	}
+
+	if len(orphaned) > 0 {
+		fmt.Fprintf(os.Stderr, "WARNING: Found %d orphaned dependency references: %v\n", len(orphaned), orphaned)
+	}
+
+	return orphaned, nil
+}
+
+// validatePostImport checks that import didn't cause data loss.
+// Returns error if issue count decreased (data loss) or nil if OK.
+func validatePostImport(before, after int) error {
+	if after < before {
+		return fmt.Errorf("import reduced issue count: %d → %d (data loss detected!)", before, after)
+	}
+	if after == before {
+		fmt.Fprintf(os.Stderr, "Import complete: no changes\n")
+	} else {
+		fmt.Fprintf(os.Stderr, "Import complete: %d → %d issues (+%d)\n", before, after, after-before)
+	}
+	return nil
+}
+
+// countDBIssues returns the total number of issues in the database.
+// This is the legacy interface kept for compatibility.
+func countDBIssues(ctx context.Context, store storage.Storage) (int, error) {
+	return countDBIssuesFast(ctx, store)
+}
+
+// countDBIssuesFast uses COUNT(*) if possible, falls back to SearchIssues.
+func countDBIssuesFast(ctx context.Context, store storage.Storage) (int, error) {
+	// Try fast path with COUNT(*) using direct SQL
+	// This is a hack until we add a proper CountIssues method to storage.Storage
+	type dbGetter interface {
+		GetDB() interface{}
+	}
+
+	if getter, ok := store.(dbGetter); ok {
+		if db, ok := getter.GetDB().(*sql.DB); ok && db != nil {
+			var count int
+			err := db.QueryRowContext(ctx, "SELECT COUNT(*) FROM issues").Scan(&count)
+			if err == nil {
+				return count, nil
+			}
+			// Fall through to slow path on error
+		}
+	}
+
+	// Fallback: load all issues and count them (slow but always works)
+	issues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
+	if err != nil {
+		return 0, fmt.Errorf("failed to count database issues: %w", err)
+	}
+	return len(issues), nil
+}
@@ -0,0 +1,326 @@
+package main
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/steveyegge/beads/internal/storage/sqlite"
+	"github.com/steveyegge/beads/internal/types"
+)
+
+func TestValidatePreExport(t *testing.T) {
+	ctx := context.Background()
+
+	t.Run("empty DB over non-empty JSONL fails", func(t *testing.T) {
+		// Create temp directory
+		tmpDir := t.TempDir()
+		dbPath := filepath.Join(tmpDir, "test.db")
+		jsonlPath := filepath.Join(tmpDir, "issues.jsonl")
+
+		// Create empty database
+		store, err := sqlite.New(dbPath)
+		if err != nil {
+			t.Fatalf("Failed to create store: %v", err)
+		}
+		defer store.Close()
+
+		// Create non-empty JSONL file
+		jsonlContent := `{"id":"bd-1","title":"Test","status":"open","priority":1}
+`
+		if err := os.WriteFile(jsonlPath, []byte(jsonlContent), 0600); err != nil {
+			t.Fatalf("Failed to write JSONL: %v", err)
+		}
+
+		// Should fail validation
+		err = validatePreExport(ctx, store, jsonlPath)
+		if err == nil {
+			t.Error("Expected error for empty DB over non-empty JSONL, got nil")
+		}
+	})
+
+	t.Run("non-empty DB over non-empty JSONL succeeds", func(t *testing.T) {
+		// Create temp directory
+		tmpDir := t.TempDir()
+		dbPath := filepath.Join(tmpDir, "test.db")
+		jsonlPath := filepath.Join(tmpDir, "issues.jsonl")
+
+		// Create database with issues
+		store, err := sqlite.New(dbPath)
+		if err != nil {
+			t.Fatalf("Failed to create store: %v", err)
+		}
+		defer store.Close()
+
+		// Add an issue
+		issue := &types.Issue{
+			ID:          "bd-1",
+			Title:       "Test",
+			Status:      types.StatusOpen,
+			Priority:    1,
+			IssueType:   types.TypeTask,
+			Description: "Test issue",
+		}
+		if err := store.CreateIssue(ctx, issue, "test"); err != nil {
+			t.Fatalf("Failed to create issue: %v", err)
+		}
+
+		// Create JSONL file
+		jsonlContent := `{"id":"bd-1","title":"Test","status":"open","priority":1}
+`
+		if err := os.WriteFile(jsonlPath, []byte(jsonlContent), 0600); err != nil {
+			t.Fatalf("Failed to write JSONL: %v", err)
+		}
+
+		// Should pass validation
+		err = validatePreExport(ctx, store, jsonlPath)
+		if err != nil {
+			t.Errorf("Expected no error, got: %v", err)
+		}
+	})
+
+	t.Run("empty DB over missing JSONL succeeds", func(t *testing.T) {
+		// Create temp directory
+		tmpDir := t.TempDir()
+		dbPath := filepath.Join(tmpDir, "test.db")
+		jsonlPath := filepath.Join(tmpDir, "issues.jsonl")
+
+		// Create empty database
+		store, err := sqlite.New(dbPath)
+		if err != nil {
+			t.Fatalf("Failed to create store: %v", err)
+		}
+		defer store.Close()
+
+		// JSONL doesn't exist
+
+		// Should pass validation (new repo scenario)
+		err = validatePreExport(ctx, store, jsonlPath)
+		if err != nil {
+			t.Errorf("Expected no error for empty DB with no JSONL, got: %v", err)
+		}
+	})
+
+	t.Run("empty DB over unreadable JSONL fails", func(t *testing.T) {
+		// Create temp directory
+		tmpDir := t.TempDir()
+		dbPath := filepath.Join(tmpDir, "test.db")
+		jsonlPath := filepath.Join(tmpDir, "issues.jsonl")
+
+		// Create empty database
+		store, err := sqlite.New(dbPath)
+		if err != nil {
+			t.Fatalf("Failed to create store: %v", err)
+		}
+		defer store.Close()
+
+		// Create corrupt/unreadable JSONL file with content
+		corruptContent := `{"id":"bd-1","title":INVALID JSON`
+		if err := os.WriteFile(jsonlPath, []byte(corruptContent), 0600); err != nil {
+			t.Fatalf("Failed to write corrupt JSONL: %v", err)
+		}
+
+		// Should fail validation (can't verify JSONL content, DB is empty, file has content)
+		err = validatePreExport(ctx, store, jsonlPath)
+		if err == nil {
+			t.Error("Expected error for empty DB over unreadable non-empty JSONL, got nil")
+		}
+	})
+}
+
+func TestValidatePostImport(t *testing.T) {
+	t.Run("issue count decreased fails", func(t *testing.T) {
+		err := validatePostImport(10, 5)
+		if err == nil {
+			t.Error("Expected error for decreased issue count, got nil")
+		}
+	})
+
+	t.Run("issue count same succeeds", func(t *testing.T) {
+		err := validatePostImport(10, 10)
+		if err != nil {
+			t.Errorf("Expected no error for same count, got: %v", err)
+		}
+	})
+
+	t.Run("issue count increased succeeds", func(t *testing.T) {
+		err := validatePostImport(10, 15)
+		if err != nil {
+			t.Errorf("Expected no error for increased count, got: %v", err)
+		}
+	})
+}
+
+func TestCountDBIssues(t *testing.T) {
+	ctx := context.Background()
+
+	t.Run("count issues in database", func(t *testing.T) {
+		// Create temp directory
+		tmpDir := t.TempDir()
+		dbPath := filepath.Join(tmpDir, "test.db")
+
+		// Create database
+		store, err := sqlite.New(dbPath)
+		if err != nil {
+			t.Fatalf("Failed to create store: %v", err)
+		}
+		defer store.Close()
+
+		// Initially 0
+		count, err := countDBIssues(ctx, store)
+		if err != nil {
+			t.Fatalf("Failed to count issues: %v", err)
+		}
+		if count != 0 {
+			t.Errorf("Expected 0 issues, got %d", count)
+		}
+
+		// Add issues
+		for i := 1; i <= 3; i++ {
+			issue := &types.Issue{
+				ID:          "bd-" + string(rune('0'+i)),
+				Title:       "Test",
+				Status:      types.StatusOpen,
+				Priority:    1,
+				IssueType:   types.TypeTask,
+				Description: "Test issue",
+			}
+			if err := store.CreateIssue(ctx, issue, "test"); err != nil {
+				t.Fatalf("Failed to create issue: %v", err)
+			}
+		}
+
+		// Should be 3
+		count, err = countDBIssues(ctx, store)
+		if err != nil {
+			t.Fatalf("Failed to count issues: %v", err)
+		}
+		if count != 3 {
+			t.Errorf("Expected 3 issues, got %d", count)
+		}
+	})
+}
+
+func TestCheckOrphanedDeps(t *testing.T) {
+	ctx := context.Background()
+
+	t.Run("function executes without error", func(t *testing.T) {
+		// Create temp directory
+		tmpDir := t.TempDir()
+		dbPath := filepath.Join(tmpDir, "test.db")
+
+		// Create database
+		store, err := sqlite.New(dbPath)
+		if err != nil {
+			t.Fatalf("Failed to create store: %v", err)
+		}
+		defer store.Close()
+
+		// Create two issues
+		issue1 := &types.Issue{
+			ID:          "bd-1",
+			Title:       "Test 1",
+			Status:      types.StatusOpen,
+			Priority:    1,
+			IssueType:   types.TypeTask,
+			Description: "Test issue 1",
+		}
+		if err := store.CreateIssue(ctx, issue1, "test"); err != nil {
+			t.Fatalf("Failed to create issue 1: %v", err)
+		}
+
+		issue2 := &types.Issue{
+			ID:          "bd-2",
+			Title:       "Test 2",
+			Status:      types.StatusOpen,
+			Priority:    1,
+			IssueType:   types.TypeTask,
+			Description: "Test issue 2",
+		}
+		if err := store.CreateIssue(ctx, issue2, "test"); err != nil {
+			t.Fatalf("Failed to create issue 2: %v", err)
+		}
+
+		// Add dependency
+		dep := &types.Dependency{
+			IssueID:     "bd-1",
+			DependsOnID: "bd-2",
+			Type:        types.DepBlocks,
+		}
+		if err := store.AddDependency(ctx, dep, "test"); err != nil {
+			t.Fatalf("Failed to add dependency: %v", err)
+		}
+
+		// Check for orphaned deps - should succeed without error
+		// Note: Database maintains referential integrity, so we can't easily create orphaned deps in tests
+		// This test verifies the function executes correctly
+		orphaned, err := checkOrphanedDeps(ctx, store)
+		if err != nil {
+			t.Fatalf("Failed to check orphaned deps: %v", err)
+		}
+
+		// With proper foreign keys, there should be no orphaned dependencies
+		if len(orphaned) != 0 {
+			t.Logf("Note: Found %d orphaned dependencies (unexpected with FK constraints): %v", len(orphaned), orphaned)
+		}
+	})
+
+	t.Run("no orphaned dependencies", func(t *testing.T) {
+		// Create temp directory
+		tmpDir := t.TempDir()
+		dbPath := filepath.Join(tmpDir, "test.db")
+
+		// Create database
+		store, err := sqlite.New(dbPath)
+		if err != nil {
+			t.Fatalf("Failed to create store: %v", err)
+		}
+		defer store.Close()
+
+		// Create two issues
+		issue1 := &types.Issue{
+			ID:          "bd-1",
+			Title:       "Test 1",
+			Status:      types.StatusOpen,
+			Priority:    1,
+			IssueType:   types.TypeTask,
+			Description: "Test issue 1",
+		}
+		if err := store.CreateIssue(ctx, issue1, "test"); err != nil {
+			t.Fatalf("Failed to create issue 1: %v", err)
+		}
+
+		issue2 := &types.Issue{
+			ID:          "bd-2",
+			Title:       "Test 2",
+			Status:      types.StatusOpen,
+			Priority:    1,
+			IssueType:   types.TypeTask,
+			Description: "Test issue 2",
+		}
+		if err := store.CreateIssue(ctx, issue2, "test"); err != nil {
+			t.Fatalf("Failed to create issue 2: %v", err)
+		}
+
+		// Add valid dependency
+		dep := &types.Dependency{
+			IssueID:     "bd-1",
+			DependsOnID: "bd-2",
+			Type:        types.DepBlocks,
+		}
+		if err := store.AddDependency(ctx, dep, "test"); err != nil {
+			t.Fatalf("Failed to add dependency: %v", err)
+		}
+
+		// Check for orphaned deps
+		orphaned, err := checkOrphanedDeps(ctx, store)
+		if err != nil {
+			t.Fatalf("Failed to check orphaned deps: %v", err)
+		}
+
+		if len(orphaned) != 0 {
+			t.Errorf("Expected 0 orphaned dependencies, got %d: %v", len(orphaned), orphaned)
+		}
+	})
+}
@@ -104,6 +104,23 @@ Use --import-only to just import from JSONL (useful after git pull).`,
 		if dryRun {
 			fmt.Println("→ [DRY RUN] Would export pending changes to JSONL")
 		} else {
+			// Pre-export integrity checks
+			if err := ensureStoreActive(); err == nil && store != nil {
+				if err := validatePreExport(ctx, store, jsonlPath); err != nil {
+					fmt.Fprintf(os.Stderr, "Pre-export validation failed: %v\n", err)
+					os.Exit(1)
+				}
+				if err := checkDuplicateIDs(ctx, store); err != nil {
+					fmt.Fprintf(os.Stderr, "Database corruption detected: %v\n", err)
+					os.Exit(1)
+				}
+				if orphaned, err := checkOrphanedDeps(ctx, store); err != nil {
+					fmt.Fprintf(os.Stderr, "Warning: orphaned dependency check failed: %v\n", err)
+				} else if len(orphaned) > 0 {
+					fmt.Fprintf(os.Stderr, "Warning: found %d orphaned dependencies: %v\n", len(orphaned), orphaned)
+				}
+			}
+
 			fmt.Println("→ Exporting pending changes to JSONL...")
 			if err := exportToJSONL(ctx, jsonlPath); err != nil {
 				fmt.Fprintf(os.Stderr, "Error exporting: %v\n", err)
@@ -144,12 +161,36 @@ Use --import-only to just import from JSONL (useful after git pull).`,
 					os.Exit(1)
 				}

+				// Count issues before import for validation
+				var beforeCount int
+				if err := ensureStoreActive(); err == nil && store != nil {
+					beforeCount, err = countDBIssues(ctx, store)
+					if err != nil {
+						fmt.Fprintf(os.Stderr, "Warning: failed to count issues before import: %v\n", err)
+					}
+				}
+
 				// Step 4: Import updated JSONL after pull
 				fmt.Println("→ Importing updated JSONL...")
 				if err := importFromJSONL(ctx, jsonlPath, renameOnImport); err != nil {
 					fmt.Fprintf(os.Stderr, "Error importing: %v\n", err)
 					os.Exit(1)
 				}
+
+				// Validate import didn't cause data loss
+				if beforeCount > 0 {
+					if err := ensureStoreActive(); err == nil && store != nil {
+						afterCount, err := countDBIssues(ctx, store)
+						if err != nil {
+							fmt.Fprintf(os.Stderr, "Warning: failed to count issues after import: %v\n", err)
+						} else {
+							if err := validatePostImport(beforeCount, afterCount); err != nil {
+								fmt.Fprintf(os.Stderr, "Post-import validation failed: %v\n", err)
+								os.Exit(1)
+							}
+						}
+					}
+				}
 			}
 		}