Add --clear-duplicate-external-refs flag to bd import

Fixes GH-234 by providing automatic resolution for duplicate external_ref
values instead of forcing manual JSONL editing.

Changes:
- Add ClearDuplicateExternalRefs option to importer.Options
- Modify validateNoDuplicateExternalRefs to clear duplicates when enabled
- Keep first occurrence, clear rest when flag is set
- Enhanced error message to suggest the flag
- Add comprehensive tests for the new behavior

Usage: bd import -i issues.jsonl --clear-duplicate-external-refs
Amp-Thread-ID: https://ampcode.com/threads/T-932dcf45-76f2-4994-9b5c-a6eb20a86036
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-11-06 13:01:44 -08:00
parent b5e2ef4a59
commit 9de98cf1cb
4 changed files with 80 additions and 31 deletions

View File

@@ -33,6 +33,7 @@ Behavior:
dryRun, _ := cmd.Flags().GetBool("dry-run") dryRun, _ := cmd.Flags().GetBool("dry-run")
renameOnImport, _ := cmd.Flags().GetBool("rename-on-import") renameOnImport, _ := cmd.Flags().GetBool("rename-on-import")
dedupeAfter, _ := cmd.Flags().GetBool("dedupe-after") dedupeAfter, _ := cmd.Flags().GetBool("dedupe-after")
clearDuplicateExternalRefs, _ := cmd.Flags().GetBool("clear-duplicate-external-refs")
orphanHandling, _ := cmd.Flags().GetString("orphan-handling") orphanHandling, _ := cmd.Flags().GetString("orphan-handling")
// Open input // Open input
@@ -95,11 +96,12 @@ Behavior:
// Phase 2: Use shared import logic // Phase 2: Use shared import logic
opts := ImportOptions{ opts := ImportOptions{
DryRun: dryRun, DryRun: dryRun,
SkipUpdate: skipUpdate, SkipUpdate: skipUpdate,
Strict: strict, Strict: strict,
RenameOnImport: renameOnImport, RenameOnImport: renameOnImport,
OrphanHandling: orphanHandling, ClearDuplicateExternalRefs: clearDuplicateExternalRefs,
OrphanHandling: orphanHandling,
} }
result, err := importIssuesCore(ctx, dbPath, store, allIssues, opts) result, err := importIssuesCore(ctx, dbPath, store, allIssues, opts)
@@ -265,6 +267,7 @@ func init() {
importCmd.Flags().Bool("dedupe-after", false, "Detect and report content duplicates after import") importCmd.Flags().Bool("dedupe-after", false, "Detect and report content duplicates after import")
importCmd.Flags().Bool("dry-run", false, "Preview collision detection without making changes") importCmd.Flags().Bool("dry-run", false, "Preview collision detection without making changes")
importCmd.Flags().Bool("rename-on-import", false, "Rename imported issues to match database prefix (updates all references)") importCmd.Flags().Bool("rename-on-import", false, "Rename imported issues to match database prefix (updates all references)")
importCmd.Flags().Bool("clear-duplicate-external-refs", false, "Clear duplicate external_ref values (keeps first occurrence)")
importCmd.Flags().String("orphan-handling", "", "How to handle missing parent issues: strict/resurrect/skip/allow (default: use config or 'allow')") importCmd.Flags().String("orphan-handling", "", "How to handle missing parent issues: strict/resurrect/skip/allow (default: use config or 'allow')")
importCmd.Flags().BoolVar(&jsonOutput, "json", false, "Output import statistics in JSON format") importCmd.Flags().BoolVar(&jsonOutput, "json", false, "Output import statistics in JSON format")
rootCmd.AddCommand(importCmd) rootCmd.AddCommand(importCmd)

View File

@@ -158,12 +158,13 @@ func issueDataChanged(existing *types.Issue, updates map[string]interface{}) boo
// ImportOptions configures how the import behaves // ImportOptions configures how the import behaves
type ImportOptions struct { type ImportOptions struct {
DryRun bool // Preview changes without applying them DryRun bool // Preview changes without applying them
SkipUpdate bool // Skip updating existing issues (create-only mode) SkipUpdate bool // Skip updating existing issues (create-only mode)
Strict bool // Fail on any error (dependencies, labels, etc.) Strict bool // Fail on any error (dependencies, labels, etc.)
RenameOnImport bool // Rename imported issues to match database prefix RenameOnImport bool // Rename imported issues to match database prefix
SkipPrefixValidation bool // Skip prefix validation (for auto-import) SkipPrefixValidation bool // Skip prefix validation (for auto-import)
OrphanHandling string // Orphan handling mode: strict/resurrect/skip/allow (empty = use config) ClearDuplicateExternalRefs bool // Clear duplicate external_ref values instead of erroring
OrphanHandling string // Orphan handling mode: strict/resurrect/skip/allow (empty = use config)
} }
// ImportResult contains statistics about the import operation // ImportResult contains statistics about the import operation
@@ -210,12 +211,13 @@ func importIssuesCore(ctx context.Context, dbPath string, store storage.Storage,
// Convert ImportOptions to importer.Options // Convert ImportOptions to importer.Options
importerOpts := importer.Options{ importerOpts := importer.Options{
DryRun: opts.DryRun, DryRun: opts.DryRun,
SkipUpdate: opts.SkipUpdate, SkipUpdate: opts.SkipUpdate,
Strict: opts.Strict, Strict: opts.Strict,
RenameOnImport: opts.RenameOnImport, RenameOnImport: opts.RenameOnImport,
SkipPrefixValidation: opts.SkipPrefixValidation, SkipPrefixValidation: opts.SkipPrefixValidation,
OrphanHandling: importer.OrphanHandling(orphanHandling), ClearDuplicateExternalRefs: opts.ClearDuplicateExternalRefs,
OrphanHandling: importer.OrphanHandling(orphanHandling),
} }
// Delegate to the importer package // Delegate to the importer package

View File

@@ -29,12 +29,13 @@ const (
// Options contains import configuration // Options contains import configuration
type Options struct { type Options struct {
DryRun bool // Preview changes without applying them DryRun bool // Preview changes without applying them
SkipUpdate bool // Skip updating existing issues (create-only mode) SkipUpdate bool // Skip updating existing issues (create-only mode)
Strict bool // Fail on any error (dependencies, labels, etc.) Strict bool // Fail on any error (dependencies, labels, etc.)
RenameOnImport bool // Rename imported issues to match database prefix RenameOnImport bool // Rename imported issues to match database prefix
SkipPrefixValidation bool // Skip prefix validation (for auto-import) SkipPrefixValidation bool // Skip prefix validation (for auto-import)
OrphanHandling OrphanHandling // How to handle missing parent issues (default: allow) OrphanHandling OrphanHandling // How to handle missing parent issues (default: allow)
ClearDuplicateExternalRefs bool // Clear duplicate external_ref values instead of erroring
} }
// Result contains statistics about the import operation // Result contains statistics about the import operation
@@ -109,7 +110,7 @@ func ImportIssues(ctx context.Context, dbPath string, store storage.Storage, iss
} }
// Validate no duplicate external_ref values in batch // Validate no duplicate external_ref values in batch
if err := validateNoDuplicateExternalRefs(issues); err != nil { if err := validateNoDuplicateExternalRefs(issues, opts.ClearDuplicateExternalRefs, result); err != nil {
return result, err return result, err
} }
@@ -741,7 +742,7 @@ func GetPrefixList(prefixes map[string]int) []string {
return result return result
} }
func validateNoDuplicateExternalRefs(issues []*types.Issue) error { func validateNoDuplicateExternalRefs(issues []*types.Issue, clearDuplicates bool, result *Result) error {
seen := make(map[string][]string) seen := make(map[string][]string)
for _, issue := range issues { for _, issue := range issues {
@@ -752,15 +753,34 @@ func validateNoDuplicateExternalRefs(issues []*types.Issue) error {
} }
var duplicates []string var duplicates []string
duplicateIssueIDs := make(map[string]bool)
for ref, issueIDs := range seen { for ref, issueIDs := range seen {
if len(issueIDs) > 1 { if len(issueIDs) > 1 {
duplicates = append(duplicates, fmt.Sprintf("external_ref '%s' appears in issues: %v", ref, issueIDs)) duplicates = append(duplicates, fmt.Sprintf("external_ref '%s' appears in issues: %v", ref, issueIDs))
// Track all duplicate issue IDs except the first one (keep first, clear rest)
for i := 1; i < len(issueIDs); i++ {
duplicateIssueIDs[issueIDs[i]] = true
}
} }
} }
if len(duplicates) > 0 { if len(duplicates) > 0 {
if clearDuplicates {
// Clear duplicate external_refs (keep first occurrence, clear rest)
for _, issue := range issues {
if duplicateIssueIDs[issue.ID] {
issue.ExternalRef = nil
}
}
// Track how many were cleared in result
if result != nil {
result.Skipped += len(duplicateIssueIDs)
}
return nil
}
sort.Strings(duplicates) sort.Strings(duplicates)
return fmt.Errorf("batch import contains duplicate external_ref values:\n%s", strings.Join(duplicates, "\n")) return fmt.Errorf("batch import contains duplicate external_ref values:\n%s\n\nUse --clear-duplicate-external-refs to automatically clear duplicates", strings.Join(duplicates, "\n"))
} }
return nil return nil

View File

@@ -905,7 +905,7 @@ func TestValidateNoDuplicateExternalRefs(t *testing.T) {
{ID: "bd-1", Title: "Issue 1"}, {ID: "bd-1", Title: "Issue 1"},
{ID: "bd-2", Title: "Issue 2"}, {ID: "bd-2", Title: "Issue 2"},
} }
err := validateNoDuplicateExternalRefs(issues) err := validateNoDuplicateExternalRefs(issues, false, nil)
if err != nil { if err != nil {
t.Errorf("Expected no error, got: %v", err) t.Errorf("Expected no error, got: %v", err)
} }
@@ -918,7 +918,7 @@ func TestValidateNoDuplicateExternalRefs(t *testing.T) {
{ID: "bd-1", Title: "Issue 1", ExternalRef: &ref1}, {ID: "bd-1", Title: "Issue 1", ExternalRef: &ref1},
{ID: "bd-2", Title: "Issue 2", ExternalRef: &ref2}, {ID: "bd-2", Title: "Issue 2", ExternalRef: &ref2},
} }
err := validateNoDuplicateExternalRefs(issues) err := validateNoDuplicateExternalRefs(issues, false, nil)
if err != nil { if err != nil {
t.Errorf("Expected no error, got: %v", err) t.Errorf("Expected no error, got: %v", err)
} }
@@ -931,7 +931,7 @@ func TestValidateNoDuplicateExternalRefs(t *testing.T) {
{ID: "bd-1", Title: "Issue 1", ExternalRef: &ref1}, {ID: "bd-1", Title: "Issue 1", ExternalRef: &ref1},
{ID: "bd-2", Title: "Issue 2", ExternalRef: &ref2}, {ID: "bd-2", Title: "Issue 2", ExternalRef: &ref2},
} }
err := validateNoDuplicateExternalRefs(issues) err := validateNoDuplicateExternalRefs(issues, false, nil)
if err == nil { if err == nil {
t.Error("Expected error for duplicate external_ref, got nil") t.Error("Expected error for duplicate external_ref, got nil")
} }
@@ -940,6 +940,30 @@ func TestValidateNoDuplicateExternalRefs(t *testing.T) {
} }
}) })
t.Run("duplicate external_ref values with clear flag", func(t *testing.T) {
ref1 := "JIRA-1"
ref2 := "JIRA-1"
issues := []*types.Issue{
{ID: "bd-1", Title: "Issue 1", ExternalRef: &ref1},
{ID: "bd-2", Title: "Issue 2", ExternalRef: &ref2},
}
result := &Result{}
err := validateNoDuplicateExternalRefs(issues, true, result)
if err != nil {
t.Errorf("Expected no error with clear flag, got: %v", err)
}
// First issue should keep external_ref, second should be cleared
if issues[0].ExternalRef == nil || *issues[0].ExternalRef != "JIRA-1" {
t.Error("Expected first issue to keep external_ref JIRA-1")
}
if issues[1].ExternalRef != nil {
t.Error("Expected second issue to have cleared external_ref")
}
if result.Skipped != 1 {
t.Errorf("Expected 1 skipped (cleared), got %d", result.Skipped)
}
})
t.Run("multiple duplicates", func(t *testing.T) { t.Run("multiple duplicates", func(t *testing.T) {
jira1 := "JIRA-1" jira1 := "JIRA-1"
jira2 := "JIRA-2" jira2 := "JIRA-2"
@@ -949,7 +973,7 @@ func TestValidateNoDuplicateExternalRefs(t *testing.T) {
{ID: "bd-3", Title: "Issue 3", ExternalRef: &jira2}, {ID: "bd-3", Title: "Issue 3", ExternalRef: &jira2},
{ID: "bd-4", Title: "Issue 4", ExternalRef: &jira2}, {ID: "bd-4", Title: "Issue 4", ExternalRef: &jira2},
} }
err := validateNoDuplicateExternalRefs(issues) err := validateNoDuplicateExternalRefs(issues, false, nil)
if err == nil { if err == nil {
t.Error("Expected error for duplicate external_ref, got nil") t.Error("Expected error for duplicate external_ref, got nil")
} }
@@ -968,7 +992,7 @@ func TestValidateNoDuplicateExternalRefs(t *testing.T) {
{ID: "bd-2", Title: "Issue 2", ExternalRef: &empty}, {ID: "bd-2", Title: "Issue 2", ExternalRef: &empty},
{ID: "bd-3", Title: "Issue 3", ExternalRef: &ref1}, {ID: "bd-3", Title: "Issue 3", ExternalRef: &ref1},
} }
err := validateNoDuplicateExternalRefs(issues) err := validateNoDuplicateExternalRefs(issues, false, nil)
if err != nil { if err != nil {
t.Errorf("Expected no error for empty refs, got: %v", err) t.Errorf("Expected no error for empty refs, got: %v", err)
} }