Merge pull request #625 from marcodelpin/fix/import-dedupe-by-id

fix(import): handle duplicate issue IDs in JSONL files gracefully
This commit is contained in:
Steve Yegge
2025-12-18 11:08:32 -08:00
committed by GitHub
6 changed files with 60 additions and 9 deletions

View File

@@ -217,6 +217,7 @@ The form uses keyboard navigation:
} }
func runCreateForm(cmd *cobra.Command) { func runCreateForm(cmd *cobra.Command) {
_ = cmd // cmd parameter required by cobra.Command.Run signature
// Raw form input - will be populated by the form // Raw form input - will be populated by the form
raw := &createFormRawInput{} raw := &createFormRawInput{}
@@ -329,7 +330,7 @@ func runCreateForm(cmd *cobra.Command) {
err := form.Run() err := form.Run()
if err != nil { if err != nil {
if err == huh.ErrUserAborted { if err == huh.ErrUserAborted {
fmt.Fprintln(os.Stderr, "Issue creation cancelled.") fmt.Fprintln(os.Stderr, "Issue creation canceled.")
os.Exit(0) os.Exit(0)
} }
FatalError("form error: %v", err) FatalError("form error: %v", err)

View File

@@ -94,6 +94,7 @@ func readBodyFile(filePath string) (string, error) {
if filePath == "-" { if filePath == "-" {
reader = os.Stdin reader = os.Stdin
} else { } else {
// #nosec G304 - filePath comes from user flag, validated by caller
file, err := os.Open(filePath) file, err := os.Open(filePath)
if err != nil { if err != nil {
return "", fmt.Errorf("failed to open file: %w", err) return "", fmt.Errorf("failed to open file: %w", err)

View File

@@ -67,8 +67,10 @@ func (r *Runner) Run(event string, issue *types.Issue) {
return // Not executable, skip return // Not executable, skip
} }
// Run asynchronously // Run asynchronously (ignore error as this is fire-and-forget)
go r.runHook(hookPath, event, issue) go func() {
_ = r.runHook(hookPath, event, issue)
}()
} }
// RunSync executes a hook synchronously and returns any error. // RunSync executes a hook synchronously and returns any error.

View File

@@ -477,6 +477,7 @@ func upsertIssues(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, issues
// Track what we need to create // Track what we need to create
var newIssues []*types.Issue var newIssues []*types.Issue
seenHashes := make(map[string]bool) seenHashes := make(map[string]bool)
seenIDs := make(map[string]bool) // Track IDs to prevent UNIQUE constraint errors
for _, incoming := range issues { for _, incoming := range issues {
hash := incoming.ContentHash hash := incoming.ContentHash
@@ -486,13 +487,21 @@ func upsertIssues(ctx context.Context, sqliteStore *sqlite.SQLiteStorage, issues
incoming.ContentHash = hash incoming.ContentHash = hash
} }
// Skip duplicates within incoming batch // Skip duplicates within incoming batch (by content hash)
if seenHashes[hash] { if seenHashes[hash] {
result.Skipped++ result.Skipped++
continue continue
} }
seenHashes[hash] = true seenHashes[hash] = true
// Skip duplicates by ID to prevent UNIQUE constraint violations
// This handles JSONL files with multiple versions of the same issue
if seenIDs[incoming.ID] {
result.Skipped++
continue
}
seenIDs[incoming.ID] = true
// CRITICAL: Check for tombstone FIRST, before any other matching (bd-4q8 fix) // CRITICAL: Check for tombstone FIRST, before any other matching (bd-4q8 fix)
// This prevents ghost resurrection regardless of which phase would normally match. // This prevents ghost resurrection regardless of which phase would normally match.
// If this ID has a tombstone in the DB, skip importing it entirely. // If this ID has a tombstone in the DB, skip importing it entirely.
@@ -701,6 +710,20 @@ if len(newIssues) > 0 {
return newIssues[i].ID < newIssues[j].ID // Stable sort return newIssues[i].ID < newIssues[j].ID // Stable sort
}) })
// Deduplicate by ID to prevent UNIQUE constraint errors during batch insert
// This handles cases where JSONL contains multiple versions of the same issue
seenNewIDs := make(map[string]bool)
var dedupedNewIssues []*types.Issue
for _, issue := range newIssues {
if !seenNewIDs[issue.ID] {
seenNewIDs[issue.ID] = true
dedupedNewIssues = append(dedupedNewIssues, issue)
} else {
result.Skipped++ // Count duplicates that were skipped
}
}
newIssues = dedupedNewIssues
// Create in batches by depth level (max depth 3) // Create in batches by depth level (max depth 3)
for depth := 0; depth <= 3; depth++ { for depth := 0; depth <= 3; depth++ {
var batchForDepth []*types.Issue var batchForDepth []*types.Issue

View File

@@ -4,10 +4,22 @@ import (
"context" "context"
"database/sql" "database/sql"
"fmt" "fmt"
"strings"
"github.com/steveyegge/beads/internal/types" "github.com/steveyegge/beads/internal/types"
) )
// isUniqueConstraintError checks if error is a UNIQUE constraint violation
// Used to detect and handle duplicate IDs in JSONL imports gracefully
func isUniqueConstraintError(err error) bool {
if err == nil {
return false
}
errMsg := err.Error()
return strings.Contains(errMsg, "UNIQUE constraint failed") ||
strings.Contains(errMsg, "constraint failed: UNIQUE")
}
// insertIssue inserts a single issue into the database // insertIssue inserts a single issue into the database
func insertIssue(ctx context.Context, conn *sql.Conn, issue *types.Issue) error { func insertIssue(ctx context.Context, conn *sql.Conn, issue *types.Issue) error {
sourceRepo := issue.SourceRepo sourceRepo := issue.SourceRepo
@@ -21,7 +33,7 @@ func insertIssue(ctx context.Context, conn *sql.Conn, issue *types.Issue) error
} }
_, err := conn.ExecContext(ctx, ` _, err := conn.ExecContext(ctx, `
INSERT INTO issues ( INSERT OR IGNORE INTO issues (
id, content_hash, title, description, design, acceptance_criteria, notes, id, content_hash, title, description, design, acceptance_criteria, notes,
status, priority, issue_type, assignee, estimated_minutes, status, priority, issue_type, assignee, estimated_minutes,
created_at, updated_at, closed_at, external_ref, source_repo, close_reason, created_at, updated_at, closed_at, external_ref, source_repo, close_reason,
@@ -38,7 +50,12 @@ func insertIssue(ctx context.Context, conn *sql.Conn, issue *types.Issue) error
issue.Sender, ephemeral, issue.Sender, ephemeral,
) )
if err != nil { if err != nil {
return fmt.Errorf("failed to insert issue: %w", err) // INSERT OR IGNORE should handle duplicates, but driver may still return error
// Explicitly ignore UNIQUE constraint errors (expected for duplicate IDs in JSONL)
if !isUniqueConstraintError(err) {
return fmt.Errorf("failed to insert issue: %w", err)
}
// Duplicate ID detected and ignored (INSERT OR IGNORE succeeded)
} }
return nil return nil
} }
@@ -46,7 +63,7 @@ func insertIssue(ctx context.Context, conn *sql.Conn, issue *types.Issue) error
// insertIssues bulk inserts multiple issues using a prepared statement // insertIssues bulk inserts multiple issues using a prepared statement
func insertIssues(ctx context.Context, conn *sql.Conn, issues []*types.Issue) error { func insertIssues(ctx context.Context, conn *sql.Conn, issues []*types.Issue) error {
stmt, err := conn.PrepareContext(ctx, ` stmt, err := conn.PrepareContext(ctx, `
INSERT INTO issues ( INSERT OR IGNORE INTO issues (
id, content_hash, title, description, design, acceptance_criteria, notes, id, content_hash, title, description, design, acceptance_criteria, notes,
status, priority, issue_type, assignee, estimated_minutes, status, priority, issue_type, assignee, estimated_minutes,
created_at, updated_at, closed_at, external_ref, source_repo, close_reason, created_at, updated_at, closed_at, external_ref, source_repo, close_reason,
@@ -80,7 +97,12 @@ func insertIssues(ctx context.Context, conn *sql.Conn, issues []*types.Issue) er
issue.Sender, ephemeral, issue.Sender, ephemeral,
) )
if err != nil { if err != nil {
return fmt.Errorf("failed to insert issue %s: %w", issue.ID, err) // INSERT OR IGNORE should handle duplicates, but driver may still return error
// Explicitly ignore UNIQUE constraint errors (expected for duplicate IDs in JSONL)
if !isUniqueConstraintError(err) {
return fmt.Errorf("failed to insert issue %s: %w", issue.ID, err)
}
// Duplicate ID detected and ignored (INSERT OR IGNORE succeeded)
} }
} }
return nil return nil

View File

@@ -68,7 +68,9 @@ func MigrateDropEdgeColumns(db *sql.DB) error {
return fmt.Errorf("failed to disable foreign keys: %w", err) return fmt.Errorf("failed to disable foreign keys: %w", err)
} }
// Re-enable foreign keys at the end (deferred to ensure it runs) // Re-enable foreign keys at the end (deferred to ensure it runs)
defer db.Exec(`PRAGMA foreign_keys = ON`) defer func() {
_, _ = db.Exec(`PRAGMA foreign_keys = ON`)
}()
// Drop views that depend on the issues table BEFORE starting transaction // Drop views that depend on the issues table BEFORE starting transaction
// This is necessary because SQLite validates views during table operations // This is necessary because SQLite validates views during table operations