fix: prevent parallel execution migration race conditions (GH#720)

When multiple bd commands are run in parallel, they can race during database
migrations, causing "duplicate column name" errors. This happens because:

1. Process A checks if column exists → false
2. Process B checks if column exists → false
3. Process A adds column → succeeds
4. Process B adds column → FAILS (duplicate column)

Changes:
- Wrap RunMigrations in BEGIN EXCLUSIVE transaction to serialize migrations
- Disable foreign keys BEFORE the transaction (PRAGMA must be called outside tx)
- Convert nested BEGIN/COMMIT in migrations 010, 022, 025 to use SAVEPOINTs
  (SQLite does not support nested transactions)
- Remove redundant PRAGMA foreign_keys calls from individual migrations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Steve Yegge
2025-12-23 00:55:05 -08:00
parent e79558a972
commit d677554ef3
4 changed files with 102 additions and 54 deletions

View File

@@ -101,8 +101,36 @@ func getMigrationDescription(name string) string {
return "Unknown migration"
}
// RunMigrations executes all registered migrations in order with invariant checking
// RunMigrations executes all registered migrations in order with invariant checking.
// Uses EXCLUSIVE transaction to prevent race conditions when multiple processes
// open the database simultaneously (GH#720).
func RunMigrations(db *sql.DB) error {
// Disable foreign keys BEFORE starting the transaction.
// PRAGMA foreign_keys must be called when no transaction is active (SQLite limitation).
// Some migrations (022, 025) drop/recreate tables and need foreign keys off
// to prevent ON DELETE CASCADE from deleting related data.
_, err := db.Exec("PRAGMA foreign_keys = OFF")
if err != nil {
return fmt.Errorf("failed to disable foreign keys for migrations: %w", err)
}
defer func() { _, _ = db.Exec("PRAGMA foreign_keys = ON") }()
// Acquire EXCLUSIVE lock to serialize migrations across processes.
// Without this, parallel processes can race on check-then-modify operations
// (e.g., checking if a column exists then adding it), causing "duplicate column" errors.
_, err = db.Exec("BEGIN EXCLUSIVE")
if err != nil {
return fmt.Errorf("failed to acquire exclusive lock for migrations: %w", err)
}
// Ensure we release the lock on any exit path
committed := false
defer func() {
if !committed {
_, _ = db.Exec("ROLLBACK")
}
}()
snapshot, err := captureSnapshot(db)
if err != nil {
return fmt.Errorf("failed to capture pre-migration snapshot: %w", err)
@@ -118,5 +146,11 @@ func RunMigrations(db *sql.DB) error {
return fmt.Errorf("post-migration validation failed: %w", err)
}
// Commit the transaction
if _, err := db.Exec("COMMIT"); err != nil {
return fmt.Errorf("failed to commit migrations: %w", err)
}
committed = true
return nil
}