doctor: add fs fault injection and lock contention coverage

Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
This commit is contained in:
Jordan Hubbard
2025-12-26 09:22:45 -04:00
parent 8166207eb4
commit 7af3106610
15 changed files with 357 additions and 68 deletions

View File

@@ -65,10 +65,10 @@ func DatabaseIntegrity(path string) error {
// Back up corrupt DB and its sidecar files.
ts := time.Now().UTC().Format("20060102T150405Z")
backupDB := dbPath + "." + ts + ".corrupt.backup.db"
if err := os.Rename(dbPath, backupDB); err != nil {
if err := moveFile(dbPath, backupDB); err != nil {
// Retry once after attempting to kill daemons again (helps on platforms with strict file locks).
_ = Daemon(absPath)
if err2 := os.Rename(dbPath, backupDB); err2 != nil {
if err2 := moveFile(dbPath, backupDB); err2 != nil {
// Prefer the original error (more likely root cause).
return fmt.Errorf("failed to back up database: %w", err)
}
@@ -76,7 +76,7 @@ func DatabaseIntegrity(path string) error {
for _, suffix := range []string{"-wal", "-shm", "-journal"} {
sidecar := dbPath + suffix
if _, err := os.Stat(sidecar); err == nil {
_ = os.Rename(sidecar, backupDB+suffix) // best effort
_ = moveFile(sidecar, backupDB+suffix) // best effort
}
}
@@ -98,9 +98,9 @@ func DatabaseIntegrity(path string) error {
failedTS := time.Now().UTC().Format("20060102T150405Z")
if _, statErr := os.Stat(dbPath); statErr == nil {
failedDB := dbPath + "." + failedTS + ".failed.init.db"
_ = os.Rename(dbPath, failedDB)
_ = moveFile(dbPath, failedDB)
for _, suffix := range []string{"-wal", "-shm", "-journal"} {
_ = os.Rename(dbPath+suffix, failedDB+suffix)
_ = moveFile(dbPath+suffix, failedDB+suffix)
}
}
_ = copyFile(backupDB, dbPath)

57
cmd/bd/doctor/fix/fs.go Normal file
View File

@@ -0,0 +1,57 @@
package fix
import (
"errors"
"fmt"
"io"
"os"
"syscall"
)
var (
renameFile = os.Rename
removeFile = os.Remove
openFileRO = os.Open
openFileRW = os.OpenFile
)
func moveFile(src, dst string) error {
if err := renameFile(src, dst); err == nil {
return nil
} else if isEXDEV(err) {
if err := copyFile(src, dst); err != nil {
return err
}
if err := removeFile(src); err != nil {
return fmt.Errorf("failed to remove source after copy: %w", err)
}
return nil
} else {
return err
}
}
func copyFile(src, dst string) error {
in, err := openFileRO(src) // #nosec G304 -- src is within the workspace
if err != nil {
return err
}
defer in.Close()
out, err := openFileRW(dst, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
if err != nil {
return err
}
defer func() { _ = out.Close() }()
if _, err := io.Copy(out, in); err != nil {
return err
}
return out.Close()
}
func isEXDEV(err error) bool {
var linkErr *os.LinkError
if errors.As(err, &linkErr) {
return errors.Is(linkErr.Err, syscall.EXDEV)
}
return errors.Is(err, syscall.EXDEV)
}

View File

@@ -0,0 +1,71 @@
package fix
import (
"errors"
"os"
"path/filepath"
"syscall"
"testing"
)
func TestMoveFile_EXDEV_FallsBackToCopy(t *testing.T) {
root := t.TempDir()
src := filepath.Join(root, "src.txt")
dst := filepath.Join(root, "dst.txt")
if err := os.WriteFile(src, []byte("hello"), 0644); err != nil {
t.Fatal(err)
}
oldRename := renameFile
defer func() { renameFile = oldRename }()
renameFile = func(oldpath, newpath string) error {
return &os.LinkError{Op: "rename", Old: oldpath, New: newpath, Err: syscall.EXDEV}
}
if err := moveFile(src, dst); err != nil {
t.Fatalf("moveFile failed: %v", err)
}
if _, err := os.Stat(src); !os.IsNotExist(err) {
t.Fatalf("expected src to be removed, stat err=%v", err)
}
data, err := os.ReadFile(dst)
if err != nil {
t.Fatalf("read dst: %v", err)
}
if string(data) != "hello" {
t.Fatalf("dst contents=%q", string(data))
}
}
func TestMoveFile_EXDEV_CopyFails_LeavesSource(t *testing.T) {
root := t.TempDir()
src := filepath.Join(root, "src.txt")
dst := filepath.Join(root, "dst.txt")
if err := os.WriteFile(src, []byte("hello"), 0644); err != nil {
t.Fatal(err)
}
oldRename := renameFile
oldOpenRW := openFileRW
defer func() {
renameFile = oldRename
openFileRW = oldOpenRW
}()
renameFile = func(oldpath, newpath string) error {
return &os.LinkError{Op: "rename", Old: oldpath, New: newpath, Err: syscall.EXDEV}
}
openFileRW = func(name string, flag int, perm os.FileMode) (*os.File, error) {
return nil, &os.PathError{Op: "open", Path: name, Err: syscall.ENOSPC}
}
err := moveFile(src, dst)
if err == nil {
t.Fatalf("expected error")
}
if !errors.Is(err, syscall.ENOSPC) {
t.Fatalf("expected ENOSPC, got %v", err)
}
if _, err := os.Stat(src); err != nil {
t.Fatalf("expected src to remain, stat err=%v", err)
}
}

View File

@@ -2,7 +2,6 @@ package fix
import (
"fmt"
"io"
"os"
"path/filepath"
"time"
@@ -58,13 +57,13 @@ func JSONLIntegrity(path string) error {
// Back up the JSONL.
ts := time.Now().UTC().Format("20060102T150405Z")
backup := jsonlPath + "." + ts + ".corrupt.backup.jsonl"
if err := os.Rename(jsonlPath, backup); err != nil {
if err := moveFile(jsonlPath, backup); err != nil {
return fmt.Errorf("failed to back up JSONL: %w", err)
}
binary, err := getBdBinary()
if err != nil {
_ = os.Rename(backup, jsonlPath)
_ = moveFile(backup, jsonlPath)
return err
}
@@ -78,7 +77,7 @@ func JSONLIntegrity(path string) error {
failedTS := time.Now().UTC().Format("20060102T150405Z")
if _, statErr := os.Stat(jsonlPath); statErr == nil {
failed := jsonlPath + "." + failedTS + ".failed.regen.jsonl"
_ = os.Rename(jsonlPath, failed)
_ = moveFile(jsonlPath, failed)
}
_ = copyFile(backup, jsonlPath)
return fmt.Errorf("failed to regenerate JSONL from database: %w (backup: %s)", err, backup)
@@ -86,20 +85,3 @@ func JSONLIntegrity(path string) error {
return nil
}
func copyFile(src, dst string) error {
in, err := os.Open(src) // #nosec G304 -- src is within the workspace
if err != nil {
return err
}
defer in.Close()
out, err := os.OpenFile(dst, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
if err != nil {
return err
}
defer func() { _ = out.Close() }()
if _, err := io.Copy(out, in); err != nil {
return err
}
return out.Close()
}

View File

@@ -0,0 +1,52 @@
package fix
import (
"fmt"
"os"
"strings"
"time"
)
func sqliteConnString(path string, readOnly bool) string {
path = strings.TrimSpace(path)
if path == "" {
return ""
}
busy := 30 * time.Second
if v := strings.TrimSpace(os.Getenv("BD_LOCK_TIMEOUT")); v != "" {
if d, err := time.ParseDuration(v); err == nil {
busy = d
}
}
busyMs := int64(busy / time.Millisecond)
if strings.HasPrefix(path, "file:") {
conn := path
sep := "?"
if strings.Contains(conn, "?") {
sep = "&"
}
if readOnly && !strings.Contains(conn, "mode=") {
conn += sep + "mode=ro"
sep = "&"
}
if !strings.Contains(conn, "_pragma=busy_timeout") {
conn += fmt.Sprintf("%s_pragma=busy_timeout(%d)", sep, busyMs)
sep = "&"
}
if !strings.Contains(conn, "_pragma=foreign_keys") {
conn += sep + "_pragma=foreign_keys(ON)"
sep = "&"
}
if !strings.Contains(conn, "_time_format=") {
conn += sep + "_time_format=sqlite"
}
return conn
}
if readOnly {
return fmt.Sprintf("file:%s?mode=ro&_pragma=foreign_keys(ON)&_pragma=busy_timeout(%d)&_time_format=sqlite", path, busyMs)
}
return fmt.Sprintf("file:%s?_pragma=foreign_keys(ON)&_pragma=busy_timeout(%d)&_time_format=sqlite", path, busyMs)
}

View File

@@ -149,7 +149,7 @@ func DBJSONLSync(path string) error {
// countDatabaseIssues counts the number of issues in the database.
func countDatabaseIssues(dbPath string) (int, error) {
db, err := sql.Open("sqlite3", dbPath)
db, err := sql.Open("sqlite3", sqliteConnString(dbPath, true))
if err != nil {
return 0, fmt.Errorf("failed to open database: %w", err)
}

View File

@@ -229,5 +229,5 @@ func ChildParentDependencies(path string) error {
// openDB opens a SQLite database for read-write access
func openDB(dbPath string) (*sql.DB, error) {
return sql.Open("sqlite3", dbPath)
return sql.Open("sqlite3", sqliteConnString(dbPath, false))
}