bd-162: Add database integrity checks with oracle review fixes
- Added validatePreExport to prevent data loss - Added checkDuplicateIDs to detect corruption - Added checkOrphanedDeps to find orphaned dependencies (both sides) - Added validatePostImport to ensure imports don't lose data - CRITICAL FIX: Removed post-pull export that clobbered fresh JSONL - Conservative checks when JSONL is unreadable - Efficient COUNT(*) SQL path instead of loading all issues - Comprehensive test coverage including edge cases
This commit is contained in:
211
cmd/bd/integrity.go
Normal file
211
cmd/bd/integrity.go
Normal file
@@ -0,0 +1,211 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
|
||||
"github.com/steveyegge/beads/internal/storage"
|
||||
"github.com/steveyegge/beads/internal/types"
|
||||
)
|
||||
|
||||
// validatePreExport performs integrity checks before exporting database to JSONL.
|
||||
// Returns error if critical issues found that would cause data loss.
|
||||
func validatePreExport(ctx context.Context, store storage.Storage, jsonlPath string) error {
|
||||
// Get database issue count (fast path with COUNT(*) if available)
|
||||
dbCount, err := countDBIssuesFast(ctx, store)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to count database issues: %w", err)
|
||||
}
|
||||
|
||||
// Get JSONL issue count
|
||||
jsonlCount := 0
|
||||
fileInfo, statErr := os.Stat(jsonlPath)
|
||||
if statErr == nil {
|
||||
jsonlCount, err = countIssuesInJSONL(jsonlPath)
|
||||
if err != nil {
|
||||
// Conservative: if JSONL exists with content but we can't count it,
|
||||
// and DB is empty, refuse to export (potential data loss)
|
||||
if dbCount == 0 && fileInfo.Size() > 0 {
|
||||
return fmt.Errorf("refusing to export empty DB over existing JSONL whose contents couldn't be verified: %w", err)
|
||||
}
|
||||
// Warning for other cases
|
||||
fmt.Fprintf(os.Stderr, "WARNING: Failed to count issues in JSONL: %v\n", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Critical: refuse to export empty DB over non-empty JSONL
|
||||
if dbCount == 0 && jsonlCount > 0 {
|
||||
return fmt.Errorf("refusing to export empty DB over %d issues in JSONL (would cause data loss)", jsonlCount)
|
||||
}
|
||||
|
||||
// Warning: large divergence suggests sync failure
|
||||
if jsonlCount > 0 {
|
||||
divergencePercent := math.Abs(float64(dbCount-jsonlCount)) / float64(jsonlCount) * 100
|
||||
if divergencePercent > 50 {
|
||||
fmt.Fprintf(os.Stderr, "WARNING: DB has %d issues, JSONL has %d (%.1f%% divergence)\n",
|
||||
dbCount, jsonlCount, divergencePercent)
|
||||
fmt.Fprintf(os.Stderr, "This suggests sync failure - investigate before proceeding\n")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkDuplicateIDs detects duplicate issue IDs in the database.
|
||||
// Returns error if duplicates are found (indicates database corruption).
|
||||
func checkDuplicateIDs(ctx context.Context, store storage.Storage) error {
|
||||
// Get access to underlying database
|
||||
// This is a hack - we need to add a proper interface method for this
|
||||
// For now, we'll use a type assertion to access the underlying *sql.DB
|
||||
type dbGetter interface {
|
||||
GetDB() interface{}
|
||||
}
|
||||
|
||||
getter, ok := store.(dbGetter)
|
||||
if !ok {
|
||||
// If store doesn't expose GetDB, skip this check
|
||||
// This is acceptable since duplicate IDs are prevented by UNIQUE constraint
|
||||
return nil
|
||||
}
|
||||
|
||||
db, ok := getter.GetDB().(*sql.DB)
|
||||
if !ok || db == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
rows, err := db.QueryContext(ctx, `
|
||||
SELECT id, COUNT(*) as cnt
|
||||
FROM issues
|
||||
GROUP BY id
|
||||
HAVING cnt > 1
|
||||
`)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check for duplicate IDs: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var duplicates []string
|
||||
for rows.Next() {
|
||||
var id string
|
||||
var count int
|
||||
if err := rows.Scan(&id, &count); err != nil {
|
||||
return fmt.Errorf("failed to scan duplicate ID row: %w", err)
|
||||
}
|
||||
duplicates = append(duplicates, fmt.Sprintf("%s (x%d)", id, count))
|
||||
}
|
||||
|
||||
if err := rows.Err(); err != nil {
|
||||
return fmt.Errorf("error iterating duplicate IDs: %w", err)
|
||||
}
|
||||
|
||||
if len(duplicates) > 0 {
|
||||
return fmt.Errorf("database corruption: duplicate IDs: %v", duplicates)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkOrphanedDeps finds dependencies pointing to or from non-existent issues.
|
||||
// Returns list of orphaned dependency IDs and any error encountered.
|
||||
func checkOrphanedDeps(ctx context.Context, store storage.Storage) ([]string, error) {
|
||||
// Get access to underlying database
|
||||
type dbGetter interface {
|
||||
GetDB() interface{}
|
||||
}
|
||||
|
||||
getter, ok := store.(dbGetter)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
db, ok := getter.GetDB().(*sql.DB)
|
||||
if !ok || db == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Check both sides: dependencies where either issue_id or depends_on_id doesn't exist
|
||||
rows, err := db.QueryContext(ctx, `
|
||||
SELECT DISTINCT d.issue_id
|
||||
FROM dependencies d
|
||||
LEFT JOIN issues i ON d.issue_id = i.id
|
||||
WHERE i.id IS NULL
|
||||
UNION
|
||||
SELECT DISTINCT d.depends_on_id
|
||||
FROM dependencies d
|
||||
LEFT JOIN issues i ON d.depends_on_id = i.id
|
||||
WHERE i.id IS NULL
|
||||
`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to check for orphaned dependencies: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var orphaned []string
|
||||
for rows.Next() {
|
||||
var id string
|
||||
if err := rows.Scan(&id); err != nil {
|
||||
return nil, fmt.Errorf("failed to scan orphaned dependency: %w", err)
|
||||
}
|
||||
orphaned = append(orphaned, id)
|
||||
}
|
||||
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, fmt.Errorf("error iterating orphaned dependencies: %w", err)
|
||||
}
|
||||
|
||||
if len(orphaned) > 0 {
|
||||
fmt.Fprintf(os.Stderr, "WARNING: Found %d orphaned dependency references: %v\n", len(orphaned), orphaned)
|
||||
}
|
||||
|
||||
return orphaned, nil
|
||||
}
|
||||
|
||||
// validatePostImport checks that import didn't cause data loss.
|
||||
// Returns error if issue count decreased (data loss) or nil if OK.
|
||||
func validatePostImport(before, after int) error {
|
||||
if after < before {
|
||||
return fmt.Errorf("import reduced issue count: %d → %d (data loss detected!)", before, after)
|
||||
}
|
||||
if after == before {
|
||||
fmt.Fprintf(os.Stderr, "Import complete: no changes\n")
|
||||
} else {
|
||||
fmt.Fprintf(os.Stderr, "Import complete: %d → %d issues (+%d)\n", before, after, after-before)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// countDBIssues returns the total number of issues in the database.
|
||||
// This is the legacy interface kept for compatibility.
|
||||
func countDBIssues(ctx context.Context, store storage.Storage) (int, error) {
|
||||
return countDBIssuesFast(ctx, store)
|
||||
}
|
||||
|
||||
// countDBIssuesFast uses COUNT(*) if possible, falls back to SearchIssues.
|
||||
func countDBIssuesFast(ctx context.Context, store storage.Storage) (int, error) {
|
||||
// Try fast path with COUNT(*) using direct SQL
|
||||
// This is a hack until we add a proper CountIssues method to storage.Storage
|
||||
type dbGetter interface {
|
||||
GetDB() interface{}
|
||||
}
|
||||
|
||||
if getter, ok := store.(dbGetter); ok {
|
||||
if db, ok := getter.GetDB().(*sql.DB); ok && db != nil {
|
||||
var count int
|
||||
err := db.QueryRowContext(ctx, "SELECT COUNT(*) FROM issues").Scan(&count)
|
||||
if err == nil {
|
||||
return count, nil
|
||||
}
|
||||
// Fall through to slow path on error
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: load all issues and count them (slow but always works)
|
||||
issues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to count database issues: %w", err)
|
||||
}
|
||||
return len(issues), nil
|
||||
}
|
||||
Reference in New Issue
Block a user