From 3a2e9d58526618ecf9b5a1e767ecc09086eeffcf Mon Sep 17 00:00:00 2001 From: Shaun Cutts Date: Sun, 30 Nov 2025 01:07:52 -0500 Subject: [PATCH] fix(multirepo): handle out-of-order dependencies during JSONL import (#414) * bd sync: 2025-11-29 00:08:58 * fix(multirepo): handle out-of-order dependencies during JSONL import Fixes #413. When importing issues from multi-repo JSONL files, if issue A (line 1) has a dependency on issue B (line 5), the import would fail with FK constraint error because B doesn't exist yet. Solution: - Disable FK checks at start of importJSONLFile() - Re-enable FK checks before commit - Run PRAGMA foreign_key_check to validate data integrity - Fail with clear error if orphaned dependencies are detected This allows out-of-order dependencies while still catching corrupted data. --------- Co-authored-by: Shaun Cutts --- .beads/issues.jsonl | 1 + internal/storage/sqlite/multirepo.go | 41 +++++- internal/storage/sqlite/multirepo_test.go | 158 +++++++++++++++++++--- 3 files changed, 182 insertions(+), 18 deletions(-) diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 70937269..ebfd5634 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -27,6 +27,7 @@ {"id":"bd-bhd","title":"Git history fallback assumes .beads is direct child of repo root","description":"## Problem\n\n`checkGitHistoryForDeletions` assumes the repo structure:\n\n```go\nrepoRoot := filepath.Dir(beadsDir) // Assumes .beads is in repo root\njsonlPath := filepath.Join(\".beads\", \"beads.jsonl\")\n```\n\nBut `.beads` could be in a subdirectory (monorepo, nested project), and the actual JSONL filename could be different (configured via `metadata.json`).\n\n## Location\n`internal/importer/importer.go:865-869`\n\n## Impact\n- Git search will fail silently for repos with non-standard structure\n- Monorepo users won't get deletion propagation\n\n## Fix\n1. Use `git rev-parse --show-toplevel` to find actual repo root\n2. Compute relative path from repo root to JSONL\n3. Or use `git -C \u003cdir\u003e` to run from beadsDir directly","status":"closed","priority":2,"issue_type":"bug","created_at":"2025-11-25T12:51:03.46856-08:00","updated_at":"2025-11-25T15:05:40.754716-08:00","closed_at":"2025-11-25T15:05:40.754716-08:00"} {"id":"bd-bok","title":"bd doctor --fix needs non-interactive mode (-y/--yes flag)","description":"When running `bd doctor --fix` in non-interactive mode (scripts, CI, Claude Code), it prompts 'Continue? (Y/n):' and fails with EOF.\n\n**Expected**: A `-y` or `--yes` flag to auto-confirm fixes.\n\n**Workaround**: Currently have to run `bd init` instead, but that's not discoverable from the doctor output.","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-11-27T20:21:10.290649-08:00","updated_at":"2025-11-28T22:17:12.607642-08:00","closed_at":"2025-11-28T21:56:14.708313-08:00"} {"id":"bd-bt6y","title":"Improve compact/daemon/merge documentation and UX","description":"Multiple documentation and UX issues encountered:\n1. \"bd compact --analyze\" fails with misleading \"requires SQLite storage\" error when daemon is running. Needs --no-daemon or better error.\n2. \"bd merge\" help text is outdated (refers to 3-way merge instead of issue merging).\n3. Daemon mode purpose isn't clear to local-only users.\n4. Compact/cleanup commands are hard to discover.\n\nProposed fixes:\n- Fix compact+daemon interaction or error message.\n- Update \"bd merge\" help text.\n- Add \"when to use daemon\" section to docs.\n- Add maintenance section to quickstart.\n","status":"open","priority":2,"issue_type":"task","created_at":"2025-11-20T18:55:43.637047-05:00","updated_at":"2025-11-20T18:55:43.637047-05:00"} +{"id":"bd-bx9","title":"bd init --contributor should configure sync.remote=upstream for fork workflows","description":"When running `bd init --contributor` in a fork workflow (where `upstream` remote points to the original repo), the wizard should configure beads to sync from `upstream/main` rather than `origin/main`.\n\n**Current behavior:**\n- Contributor mode detects the fork setup (upstream remote exists)\n- Sets up planning repo and auto-routing\n- Does NOT configure sync remote\n- `bd sync` on feature branches shows \"No upstream configured, using --from-main mode\" and syncs from `origin/main`\n\n**Expected behavior:**\n- Contributor mode should also set `sync.remote = upstream` (or similar config)\n- `bd sync` should pull beads from `upstream/main` (source of truth)\n\n**Why this matters:**\n- The fork's `origin/main` may be behind `upstream/main`\n- Contributors want the latest issues from the source repo\n- Code PRs go: local -\u003e origin -\u003e upstream, but beads should come FROM upstream\n\n**Suggested fix:**\nAdd to `runContributorWizard()` after detecting fork:\n```go\nif isFork {\n store.SetConfig(ctx, \"sync.remote\", \"upstream\")\n}\n```","status":"open","priority":2,"issue_type":"feature","created_at":"2025-11-29T00:39:05.137488727-05:00","updated_at":"2025-11-29T00:39:05.137488727-05:00","labels":["contributor","sync"]} {"id":"bd-c362","title":"Extract database search logic into helper function","description":"The logic for finding a database in a beads directory is duplicated:\n- FindDatabasePath() BEADS_DIR section (beads.go:141-169)\n- findDatabaseInTree() (beads.go:248-280)\n\nBoth implement the same search order:\n1. Check config.json first (single source of truth)\n2. Fall back to canonical beads.db\n3. Search for *.db files, filtering backups and vc.db\n\nRefactoring suggestion:\nExtract to a helper function like:\n func findDatabaseInBeadsDir(beadsDir string) string\n\nBenefits:\n- Single source of truth for database search logic\n- Easier to maintain and update search order\n- Reduces code duplication\n\nRelated to [deleted:bd-e16b] implementation.","status":"closed","priority":3,"issue_type":"chore","created_at":"2025-11-02T18:34:02.831543-08:00","updated_at":"2025-11-25T22:27:33.794656-08:00","closed_at":"2025-11-25T22:27:33.794656-08:00"} {"id":"bd-c4rq","title":"Refactor: Move staleness check inside daemon branch","description":"## Problem\n\nCurrently ensureDatabaseFresh() is called before the daemon mode check, but it checks daemonClient != nil internally and returns early. This is redundant.\n\n**Location:** All read commands (list.go:196, show.go:27, ready.go:102, status.go:80, etc.)\n\n## Current Pattern\n\nCall happens before daemon check, function checks daemonClient internally.\n\n## Better Pattern\n\nMove staleness check to direct mode branch only, after daemon check.\n\n## Impact\nLow - minor performance improvement (avoids one function call per command in daemon mode)\n\n## Effort\nMedium - requires refactoring 8 command files\n\n## Priority\nLow - can defer to future cleanup PR","status":"open","priority":3,"issue_type":"chore","created_at":"2025-11-20T20:17:45.119583-05:00","updated_at":"2025-11-20T20:17:45.119583-05:00"} {"id":"bd-c8x","title":"Don't search parent directories for .beads databases","description":"bd currently walks up the directory tree looking for .beads directories, which can find unrelated databases (e.g., ~/.beads). This causes confusing warnings and potential data pollution.\n\nShould either:\n1. Stop at git root (don't search above it)\n2. Only use explicit BEADS_DB env var or local .beads\n3. At minimum, don't search in home directory","status":"closed","priority":2,"issue_type":"bug","created_at":"2025-11-27T22:10:41.992686-08:00","updated_at":"2025-11-28T22:17:12.607956-08:00","closed_at":"2025-11-28T22:15:55.878353-08:00"} diff --git a/internal/storage/sqlite/multirepo.go b/internal/storage/sqlite/multirepo.go index 80d09e32..adbf529d 100644 --- a/internal/storage/sqlite/multirepo.go +++ b/internal/storage/sqlite/multirepo.go @@ -117,6 +117,7 @@ func (s *SQLiteStorage) hydrateFromRepo(ctx context.Context, repoPath, sourceRep } // importJSONLFile imports issues from a JSONL file, setting the source_repo field. +// Disables FK checks during import to handle out-of-order dependencies. func (s *SQLiteStorage) importJSONLFile(ctx context.Context, jsonlPath, sourceRepo string) (int, error) { file, err := os.Open(jsonlPath) // #nosec G304 -- jsonlPath is from trusted source if err != nil { @@ -138,8 +139,22 @@ func (s *SQLiteStorage) importJSONLFile(ctx context.Context, jsonlPath, sourceRe count := 0 lineNum := 0 + // Get exclusive connection to ensure PRAGMA applies + conn, err := s.db.Conn(ctx) + if err != nil { + return 0, fmt.Errorf("failed to get connection: %w", err) + } + defer func() { _ = conn.Close() }() + + // Disable foreign keys on this connection to handle out-of-order deps + // (issue A may depend on issue B that appears later in the file) + _, err = conn.ExecContext(ctx, `PRAGMA foreign_keys = OFF`) + if err != nil { + return 0, fmt.Errorf("failed to disable foreign keys: %w", err) + } + // Begin transaction for bulk import - tx, err := s.db.BeginTx(ctx, nil) + tx, err := conn.BeginTx(ctx, nil) if err != nil { return 0, fmt.Errorf("failed to begin transaction: %w", err) } @@ -179,6 +194,28 @@ func (s *SQLiteStorage) importJSONLFile(ctx context.Context, jsonlPath, sourceRe return 0, fmt.Errorf("failed to read JSONL file: %w", err) } + // Re-enable foreign keys before commit to validate data integrity + _, err = conn.ExecContext(ctx, `PRAGMA foreign_keys = ON`) + if err != nil { + return 0, fmt.Errorf("failed to re-enable foreign keys: %w", err) + } + + // Validate FK constraints on imported data + rows, err := conn.QueryContext(ctx, `PRAGMA foreign_key_check`) + if err != nil { + return 0, fmt.Errorf("failed to check foreign keys: %w", err) + } + defer rows.Close() + + if rows.Next() { + var table, rowid, parent, fkid string + _ = rows.Scan(&table, &rowid, &parent, &fkid) + return 0, fmt.Errorf( + "foreign key violation in imported data: table=%s rowid=%s parent=%s", + table, rowid, parent, + ) + } + if err := tx.Commit(); err != nil { return 0, fmt.Errorf("failed to commit transaction: %w", err) } @@ -197,7 +234,7 @@ func (s *SQLiteStorage) upsertIssueInTx(ctx context.Context, tx *sql.Tx, issue * // Check if issue exists var existingID string err := tx.QueryRowContext(ctx, `SELECT id FROM issues WHERE id = ?`, issue.ID).Scan(&existingID) - + if err == sql.ErrNoRows { // Issue doesn't exist - insert it _, err = tx.ExecContext(ctx, ` diff --git a/internal/storage/sqlite/multirepo_test.go b/internal/storage/sqlite/multirepo_test.go index e00d993a..106ecf69 100644 --- a/internal/storage/sqlite/multirepo_test.go +++ b/internal/storage/sqlite/multirepo_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "os" "path/filepath" + "strings" "testing" "time" @@ -72,14 +73,14 @@ func TestHydrateFromMultiRepo(t *testing.T) { // Create test issue issue := types.Issue{ - ID: "test-1", - Title: "Test Issue", - Status: types.StatusOpen, - Priority: 1, - IssueType: types.TypeTask, - CreatedAt: time.Now(), - UpdatedAt: time.Now(), - SourceRepo: ".", + ID: "test-1", + Title: "Test Issue", + Status: types.StatusOpen, + Priority: 1, + IssueType: types.TypeTask, + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + SourceRepo: ".", } issue.ContentHash = issue.ComputeContentHash() @@ -143,14 +144,14 @@ func TestHydrateFromMultiRepo(t *testing.T) { // Create test issue issue := types.Issue{ - ID: "test-2", - Title: "Test Issue 2", - Status: types.StatusOpen, - Priority: 1, - IssueType: types.TypeTask, - CreatedAt: time.Now(), - UpdatedAt: time.Now(), - SourceRepo: ".", + ID: "test-2", + Title: "Test Issue 2", + Status: types.StatusOpen, + Priority: 1, + IssueType: types.TypeTask, + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + SourceRepo: ".", } issue.ContentHash = issue.ComputeContentHash() @@ -374,6 +375,131 @@ func TestImportJSONLFile(t *testing.T) { }) } +func TestImportJSONLFileOutOfOrderDeps(t *testing.T) { + t.Run("handles out-of-order dependencies", func(t *testing.T) { + store, cleanup := setupTestDB(t) + defer cleanup() + + // Create test JSONL file with dependency BEFORE its target + tmpDir := t.TempDir() + jsonlPath := filepath.Join(tmpDir, "test.jsonl") + f, err := os.Create(jsonlPath) + if err != nil { + t.Fatalf("failed to create JSONL file: %v", err) + } + + // Issue 1 depends on Issue 2, but Issue 1 comes FIRST in the file + // This would fail with FK constraint if not handled properly + issue1 := types.Issue{ + ID: "test-1", + Title: "Issue 1 (depends on Issue 2)", + Status: types.StatusOpen, + Priority: 1, + IssueType: types.TypeTask, + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + Dependencies: []*types.Dependency{ + { + IssueID: "test-1", + DependsOnID: "test-2", // test-2 doesn't exist yet! + Type: types.DepBlocks, + CreatedAt: time.Now(), + CreatedBy: "test", + }, + }, + SourceRepo: "test", + } + issue1.ContentHash = issue1.ComputeContentHash() + + issue2 := types.Issue{ + ID: "test-2", + Title: "Issue 2 (dependency target)", + Status: types.StatusOpen, + Priority: 2, + IssueType: types.TypeTask, + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + SourceRepo: "test", + } + issue2.ContentHash = issue2.ComputeContentHash() + + enc := json.NewEncoder(f) + enc.Encode(issue1) // Dependent first + enc.Encode(issue2) // Dependency target second + f.Close() + + // Import should succeed despite out-of-order dependencies + ctx := context.Background() + count, err := store.importJSONLFile(ctx, jsonlPath, "test") + if err != nil { + t.Fatalf("importJSONLFile() error = %v", err) + } + if count != 2 { + t.Errorf("expected 2 issues imported, got %d", count) + } + + // Verify dependency was created + deps, err := store.GetDependencies(ctx, "test-1") + if err != nil { + t.Fatalf("failed to get dependencies: %v", err) + } + if len(deps) != 1 { + t.Errorf("expected 1 dependency, got %d", len(deps)) + } + if len(deps) > 0 && deps[0].ID != "test-2" { + t.Errorf("expected dependency on test-2, got %s", deps[0].ID) + } + }) + + t.Run("detects orphaned dependencies in corrupted data", func(t *testing.T) { + store, cleanup := setupTestDB(t) + defer cleanup() + + // Create test JSONL with orphaned dependency (target doesn't exist) + tmpDir := t.TempDir() + jsonlPath := filepath.Join(tmpDir, "test.jsonl") + f, err := os.Create(jsonlPath) + if err != nil { + t.Fatalf("failed to create JSONL file: %v", err) + } + + issue := types.Issue{ + ID: "test-orphan", + Title: "Issue with orphaned dependency", + Status: types.StatusOpen, + Priority: 1, + IssueType: types.TypeTask, + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + Dependencies: []*types.Dependency{ + { + IssueID: "test-orphan", + DependsOnID: "nonexistent-issue", // This issue doesn't exist + Type: types.DepBlocks, + CreatedAt: time.Now(), + CreatedBy: "test", + }, + }, + SourceRepo: "test", + } + issue.ContentHash = issue.ComputeContentHash() + + enc := json.NewEncoder(f) + enc.Encode(issue) + f.Close() + + // Import should fail due to FK violation + ctx := context.Background() + _, err = store.importJSONLFile(ctx, jsonlPath, "test") + if err == nil { + t.Error("expected error for orphaned dependency, got nil") + } + if err != nil && !strings.Contains(err.Error(), "foreign key violation") { + t.Errorf("expected foreign key violation error, got: %v", err) + } + }) +} + func TestExportToMultiRepo(t *testing.T) { t.Run("returns nil in single-repo mode", func(t *testing.T) { store, cleanup := setupTestDB(t)