fix(sqlite): use BEGIN IMMEDIATE without retry loop (GH#1272)
The original PR added retry logic on top of BEGIN IMMEDIATE, but this caused multi-minute hangs because: 1. Connection has busy_timeout=30s set via pragma 2. Each BEGIN IMMEDIATE waits up to 30s before returning SQLITE_BUSY 3. With 5 retries, worst case was 5 × 30s = 150+ seconds The fix removes the retry loop since SQLite's busy_timeout already handles retries internally. BEGIN IMMEDIATE still acquires the write lock early, preventing deadlocks - we just let busy_timeout handle contention. Root cause analysis in bd-9ldm. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -273,8 +273,9 @@ func (s *SQLiteStorage) CreateIssuesWithFullOptions(ctx context.Context, issues
|
|||||||
}
|
}
|
||||||
defer func() { _ = conn.Close() }()
|
defer func() { _ = conn.Close() }()
|
||||||
|
|
||||||
// Use retry logic with exponential backoff to handle SQLITE_BUSY under concurrent load
|
// Start IMMEDIATE transaction to acquire write lock early.
|
||||||
if err := beginImmediateWithRetry(ctx, conn, 5, 10*time.Millisecond); err != nil {
|
// The connection's busy_timeout pragma (30s) handles retries if locked.
|
||||||
|
if _, err := conn.ExecContext(ctx, "BEGIN IMMEDIATE"); err != nil {
|
||||||
return fmt.Errorf("failed to begin immediate transaction: %w", err)
|
return fmt.Errorf("failed to begin immediate transaction: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -158,8 +158,8 @@ func (s *SQLiteStorage) CreateIssue(ctx context.Context, issue *types.Issue, act
|
|||||||
// We use raw Exec instead of BeginTx because database/sql doesn't support transaction
|
// We use raw Exec instead of BeginTx because database/sql doesn't support transaction
|
||||||
// modes in BeginTx, and modernc.org/sqlite's BeginTx always uses DEFERRED mode.
|
// modes in BeginTx, and modernc.org/sqlite's BeginTx always uses DEFERRED mode.
|
||||||
//
|
//
|
||||||
// Use retry logic with exponential backoff to handle SQLITE_BUSY under concurrent load
|
// The connection's busy_timeout pragma (30s) handles retries if locked.
|
||||||
if err := beginImmediateWithRetry(ctx, conn, 5, 10*time.Millisecond); err != nil {
|
if _, err := conn.ExecContext(ctx, "BEGIN IMMEDIATE"); err != nil {
|
||||||
return fmt.Errorf("failed to begin immediate transaction: %w", err)
|
return fmt.Errorf("failed to begin immediate transaction: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -48,8 +48,9 @@ func (s *SQLiteStorage) RunInTransaction(ctx context.Context, fn func(tx storage
|
|||||||
defer func() { _ = conn.Close() }()
|
defer func() { _ = conn.Close() }()
|
||||||
|
|
||||||
// Start IMMEDIATE transaction to acquire write lock early.
|
// Start IMMEDIATE transaction to acquire write lock early.
|
||||||
// Use retry logic with exponential backoff to handle SQLITE_BUSY
|
// BEGIN IMMEDIATE prevents deadlocks by acquiring the write lock upfront.
|
||||||
if err := beginImmediateWithRetry(ctx, conn, 5, 10*time.Millisecond); err != nil {
|
// The connection's busy_timeout pragma (30s) handles retries if locked.
|
||||||
|
if _, err := conn.ExecContext(ctx, "BEGIN IMMEDIATE"); err != nil {
|
||||||
return fmt.Errorf("failed to begin transaction: %w", err)
|
return fmt.Errorf("failed to begin transaction: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// QueryContext exposes the underlying database QueryContext method for advanced queries
|
// QueryContext exposes the underlying database QueryContext method for advanced queries
|
||||||
@@ -18,10 +17,13 @@ func (s *SQLiteStorage) BeginTx(ctx context.Context) (*sql.Tx, error) {
|
|||||||
return s.db.BeginTx(ctx, nil)
|
return s.db.BeginTx(ctx, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
// withTx executes a function within a database transaction with retry logic.
|
// withTx executes a function within a database transaction.
|
||||||
// Uses BEGIN IMMEDIATE with exponential backoff retry on SQLITE_BUSY errors.
|
// Uses BEGIN IMMEDIATE to acquire the write lock early, preventing deadlocks
|
||||||
// If the function returns an error, the transaction is rolled back.
|
// in concurrent scenarios. If the function returns an error, the transaction
|
||||||
// Otherwise, the transaction is committed.
|
// is rolled back. Otherwise, the transaction is committed.
|
||||||
|
//
|
||||||
|
// The connection's busy_timeout pragma (30s by default) handles SQLITE_BUSY
|
||||||
|
// retries internally - no additional retry logic is needed here.
|
||||||
//
|
//
|
||||||
// This fixes GH#1272: database lock errors during concurrent operations.
|
// This fixes GH#1272: database lock errors during concurrent operations.
|
||||||
func (s *SQLiteStorage) withTx(ctx context.Context, fn func(*sql.Conn) error) error {
|
func (s *SQLiteStorage) withTx(ctx context.Context, fn func(*sql.Conn) error) error {
|
||||||
@@ -34,8 +36,10 @@ func (s *SQLiteStorage) withTx(ctx context.Context, fn func(*sql.Conn) error) er
|
|||||||
defer func() { _ = conn.Close() }()
|
defer func() { _ = conn.Close() }()
|
||||||
|
|
||||||
// Start IMMEDIATE transaction to acquire write lock early.
|
// Start IMMEDIATE transaction to acquire write lock early.
|
||||||
// Use retry logic with exponential backoff to handle SQLITE_BUSY
|
// BEGIN IMMEDIATE prevents deadlocks by acquiring the write lock upfront
|
||||||
if err := beginImmediateWithRetry(ctx, conn, 5, 10*time.Millisecond); err != nil {
|
// rather than upgrading from a read lock later. The connection's
|
||||||
|
// busy_timeout pragma (30s) handles retries if another writer holds the lock.
|
||||||
|
if _, err := conn.ExecContext(ctx, "BEGIN IMMEDIATE"); err != nil {
|
||||||
return wrapDBError("begin transaction", err)
|
return wrapDBError("begin transaction", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -103,64 +107,3 @@ func IsBusyError(err error) bool {
|
|||||||
return strings.Contains(errStr, "database is locked") ||
|
return strings.Contains(errStr, "database is locked") ||
|
||||||
strings.Contains(errStr, "SQLITE_BUSY")
|
strings.Contains(errStr, "SQLITE_BUSY")
|
||||||
}
|
}
|
||||||
|
|
||||||
// beginImmediateWithRetry starts an IMMEDIATE transaction with exponential backoff retry
|
|
||||||
// on SQLITE_BUSY errors. This addresses bd-ola6: under concurrent write load, BEGIN IMMEDIATE
|
|
||||||
// can fail with SQLITE_BUSY, so we retry with exponential backoff instead of failing immediately.
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// - ctx: context for cancellation checking
|
|
||||||
// - conn: dedicated database connection (must use same connection for entire transaction)
|
|
||||||
// - maxRetries: maximum number of retry attempts (default: 5)
|
|
||||||
// - initialDelay: initial backoff delay (default: 10ms)
|
|
||||||
//
|
|
||||||
// Returns error if:
|
|
||||||
// - Context is canceled
|
|
||||||
// - BEGIN IMMEDIATE fails with non-busy error
|
|
||||||
// - All retries exhausted with SQLITE_BUSY
|
|
||||||
func beginImmediateWithRetry(ctx context.Context, conn *sql.Conn, maxRetries int, initialDelay time.Duration) error {
|
|
||||||
if maxRetries <= 0 {
|
|
||||||
maxRetries = 5
|
|
||||||
}
|
|
||||||
if initialDelay <= 0 {
|
|
||||||
initialDelay = 10 * time.Millisecond
|
|
||||||
}
|
|
||||||
|
|
||||||
var lastErr error
|
|
||||||
delay := initialDelay
|
|
||||||
|
|
||||||
for attempt := 0; attempt <= maxRetries; attempt++ {
|
|
||||||
// Check context cancellation before each attempt
|
|
||||||
if err := ctx.Err(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attempt to begin transaction
|
|
||||||
_, err := conn.ExecContext(ctx, "BEGIN IMMEDIATE")
|
|
||||||
if err == nil {
|
|
||||||
return nil // Success
|
|
||||||
}
|
|
||||||
|
|
||||||
lastErr = err
|
|
||||||
|
|
||||||
// If not a busy error, fail immediately
|
|
||||||
if !IsBusyError(err) {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// On last attempt, don't sleep
|
|
||||||
if attempt == maxRetries {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Exponential backoff: sleep before retry
|
|
||||||
select {
|
|
||||||
case <-time.After(delay):
|
|
||||||
delay *= 2 // Double the delay for next attempt
|
|
||||||
case <-ctx.Done():
|
|
||||||
return ctx.Err()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return lastErr // Return the last SQLITE_BUSY error after exhausting retries
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"errors"
|
"errors"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -257,21 +258,24 @@ func TestIsBusyError(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBeginImmediateWithRetry(t *testing.T) {
|
// TestBeginImmediate tests that BEGIN IMMEDIATE transactions work correctly.
|
||||||
|
// Note: The retry logic was removed because SQLite's busy_timeout pragma (30s)
|
||||||
|
// already handles retries internally. See GH#1272 for details.
|
||||||
|
func TestBeginImmediate(t *testing.T) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
store := newTestStore(t, t.TempDir()+"/test.db")
|
store := newTestStore(t, t.TempDir()+"/test.db")
|
||||||
defer store.Close()
|
defer store.Close()
|
||||||
|
|
||||||
t.Run("successful on first try", func(t *testing.T) {
|
t.Run("successful BEGIN IMMEDIATE", func(t *testing.T) {
|
||||||
conn, err := store.db.Conn(ctx)
|
conn, err := store.db.Conn(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to acquire connection: %v", err)
|
t.Fatalf("Failed to acquire connection: %v", err)
|
||||||
}
|
}
|
||||||
defer conn.Close()
|
defer conn.Close()
|
||||||
|
|
||||||
err = beginImmediateWithRetry(ctx, conn, 5, 10)
|
_, err = conn.ExecContext(ctx, "BEGIN IMMEDIATE")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("beginImmediateWithRetry failed: %v", err)
|
t.Errorf("BEGIN IMMEDIATE failed: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rollback to clean up
|
// Rollback to clean up
|
||||||
@@ -288,30 +292,14 @@ func TestBeginImmediateWithRetry(t *testing.T) {
|
|||||||
cancelCtx, cancel := context.WithCancel(ctx)
|
cancelCtx, cancel := context.WithCancel(ctx)
|
||||||
cancel() // Cancel immediately
|
cancel() // Cancel immediately
|
||||||
|
|
||||||
err = beginImmediateWithRetry(cancelCtx, conn, 5, 10)
|
_, err = conn.ExecContext(cancelCtx, "BEGIN IMMEDIATE")
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Error("Expected context cancellation error, got nil")
|
t.Error("Expected error due to canceled context, got nil")
|
||||||
_, _ = conn.ExecContext(context.Background(), "ROLLBACK")
|
_, _ = conn.ExecContext(context.Background(), "ROLLBACK")
|
||||||
}
|
}
|
||||||
if !errors.Is(err, context.Canceled) {
|
// sqlite3 driver returns "interrupted" error rather than wrapping context.Canceled
|
||||||
t.Errorf("Expected context.Canceled, got %v", err)
|
if err != nil && !errors.Is(err, context.Canceled) && !strings.Contains(err.Error(), "interrupt") {
|
||||||
|
t.Errorf("Expected context cancellation or interrupt error, got %v", err)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("defaults for invalid parameters", func(t *testing.T) {
|
|
||||||
conn, err := store.db.Conn(ctx)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to acquire connection: %v", err)
|
|
||||||
}
|
|
||||||
defer conn.Close()
|
|
||||||
|
|
||||||
// Should use defaults (5 retries, 10ms delay) when passed invalid values
|
|
||||||
err = beginImmediateWithRetry(ctx, conn, 0, 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("beginImmediateWithRetry with invalid params failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rollback to clean up
|
|
||||||
_, _ = conn.ExecContext(context.Background(), "ROLLBACK")
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user