fix(sqlite): use BEGIN IMMEDIATE without retry loop (GH#1272)

The original PR added retry logic on top of BEGIN IMMEDIATE, but this caused
multi-minute hangs because:

1. Connection has busy_timeout=30s set via pragma
2. Each BEGIN IMMEDIATE waits up to 30s before returning SQLITE_BUSY
3. With 5 retries, worst case was 5 × 30s = 150+ seconds

The fix removes the retry loop since SQLite's busy_timeout already handles
retries internally. BEGIN IMMEDIATE still acquires the write lock early,
preventing deadlocks - we just let busy_timeout handle contention.

Root cause analysis in bd-9ldm.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
ruby
2026-01-22 18:55:30 -08:00
committed by Aaron Leiby
parent 718fc49776
commit 35bd93b443
5 changed files with 32 additions and 99 deletions

View File

@@ -4,6 +4,7 @@ import (
"context"
"database/sql"
"errors"
"strings"
"testing"
)
@@ -257,21 +258,24 @@ func TestIsBusyError(t *testing.T) {
}
}
func TestBeginImmediateWithRetry(t *testing.T) {
// TestBeginImmediate tests that BEGIN IMMEDIATE transactions work correctly.
// Note: The retry logic was removed because SQLite's busy_timeout pragma (30s)
// already handles retries internally. See GH#1272 for details.
func TestBeginImmediate(t *testing.T) {
ctx := context.Background()
store := newTestStore(t, t.TempDir()+"/test.db")
defer store.Close()
t.Run("successful on first try", func(t *testing.T) {
t.Run("successful BEGIN IMMEDIATE", func(t *testing.T) {
conn, err := store.db.Conn(ctx)
if err != nil {
t.Fatalf("Failed to acquire connection: %v", err)
}
defer conn.Close()
err = beginImmediateWithRetry(ctx, conn, 5, 10)
_, err = conn.ExecContext(ctx, "BEGIN IMMEDIATE")
if err != nil {
t.Errorf("beginImmediateWithRetry failed: %v", err)
t.Errorf("BEGIN IMMEDIATE failed: %v", err)
}
// Rollback to clean up
@@ -288,30 +292,14 @@ func TestBeginImmediateWithRetry(t *testing.T) {
cancelCtx, cancel := context.WithCancel(ctx)
cancel() // Cancel immediately
err = beginImmediateWithRetry(cancelCtx, conn, 5, 10)
_, err = conn.ExecContext(cancelCtx, "BEGIN IMMEDIATE")
if err == nil {
t.Error("Expected context cancellation error, got nil")
t.Error("Expected error due to canceled context, got nil")
_, _ = conn.ExecContext(context.Background(), "ROLLBACK")
}
if !errors.Is(err, context.Canceled) {
t.Errorf("Expected context.Canceled, got %v", err)
// sqlite3 driver returns "interrupted" error rather than wrapping context.Canceled
if err != nil && !errors.Is(err, context.Canceled) && !strings.Contains(err.Error(), "interrupt") {
t.Errorf("Expected context cancellation or interrupt error, got %v", err)
}
})
t.Run("defaults for invalid parameters", func(t *testing.T) {
conn, err := store.db.Conn(ctx)
if err != nil {
t.Fatalf("Failed to acquire connection: %v", err)
}
defer conn.Close()
// Should use defaults (5 retries, 10ms delay) when passed invalid values
err = beginImmediateWithRetry(ctx, conn, 0, 0)
if err != nil {
t.Errorf("beginImmediateWithRetry with invalid params failed: %v", err)
}
// Rollback to clean up
_, _ = conn.ExecContext(context.Background(), "ROLLBACK")
})
}