From 773aa736e46ee9041e1e6cdbe02476980b7ec1a1 Mon Sep 17 00:00:00 2001 From: Steve Yegge Date: Sat, 8 Nov 2025 02:22:04 -0800 Subject: [PATCH] Document external_ref in content hash behavior (bd-9f4a) - Added comprehensive code comments in collision.go explaining external_ref inclusion - Documented content hash behavior in HASH_ID_DESIGN.md with examples - Enhanced test documentation in collision_test.go - Closes bd-9f4a, bd-df11, bd-537e Amp-Thread-ID: https://ampcode.com/threads/T-47525168-d51c-4f56-b598-18402e5ea389 Co-authored-by: Amp --- docs/HASH_ID_DESIGN.md | 50 +++++++++++++++++++++++ internal/storage/sqlite/collision.go | 14 +++++++ internal/storage/sqlite/collision_test.go | 17 ++++++++ 3 files changed, 81 insertions(+) diff --git a/docs/HASH_ID_DESIGN.md b/docs/HASH_ID_DESIGN.md index 02123c7d..b5d1d6eb 100644 --- a/docs/HASH_ID_DESIGN.md +++ b/docs/HASH_ID_DESIGN.md @@ -73,6 +73,56 @@ func GenerateHashID(prefix, title, description string, created time.Time, worksp - These fields are mutable and shouldn't affect identity - Changing priority shouldn't change the issue ID +## Content Hash (Collision Detection) + +Separate from ID generation, bd uses content hashing for collision detection during import. See `internal/storage/sqlite/collision.go:hashIssueContent()`. + +### Content Hash Fields + +The content hash includes ALL semantically meaningful fields: +- title, description, status, priority, issue_type +- assignee, design, acceptance_criteria, notes +- **external_ref** ⚠️ (important: see below) + +### External Ref in Content Hash + +**IMPORTANT:** `external_ref` is included in the content hash. This has subtle implications: + +``` +Local issue (no external_ref) → content hash A +Same issue + external_ref → content hash B (different!) +``` + +**Why include external_ref?** +- Linkage to external systems (Jira, GitHub, Linear) is semantically meaningful +- Changing external_ref represents a real content change +- Ensures external system changes are tracked properly + +**Implications:** +1. **Rename detection** won't match issues before/after adding external_ref +2. **Collision detection** treats external_ref changes as updates +3. **Idempotent import** requires identical external_ref +4. **Import by external_ref** still works (checked before content hash) + +**Example scenario:** +```bash +# 1. Create local issue +bd create "Fix auth bug" -p 1 +# → ID: bd-a3f2dd, content_hash: abc123 + +# 2. Link to Jira +bd update bd-a3f2dd --external-ref JIRA-456 +# → ID: bd-a3f2dd (same), content_hash: def789 (changed!) + +# 3. Re-import from Jira +bd import -i jira-export.jsonl +# → Matches by external_ref first (JIRA-456) +# → Content hash different, triggers update +# → Idempotent on subsequent imports +``` + +**Design rationale:** External system linkage is tracked as substantive content, not just metadata. This ensures proper audit trails and collision resolution. + **Why 6 chars (with progressive extension)?** - 6 chars (24 bits) = ~16 million possible IDs - Progressive collision handling: extend to 7-8 chars only when needed diff --git a/internal/storage/sqlite/collision.go b/internal/storage/sqlite/collision.go index 4e4c3133..a3a3622b 100644 --- a/internal/storage/sqlite/collision.go +++ b/internal/storage/sqlite/collision.go @@ -144,6 +144,19 @@ func compareIssues(existing, incoming *types.Issue) []string { } // hashIssueContent creates a deterministic hash of issue content (excluding ID and timestamps) +// +// IMPORTANT: external_ref is included in the content hash. This means: +// - Adding/removing/changing external_ref changes the content hash +// - A local issue that gains an external_ref will have different content hash +// - This is intentional: external_ref is semantically meaningful content +// +// Implications: +// 1. Rename detection won't match issues before/after adding external_ref +// 2. Content-based collision detection treats external_ref changes as updates +// 3. Idempotent import only when external_ref is identical +// +// This design choice ensures external system linkage is tracked as substantive content, +// not just metadata. See docs/HASH_ID_DESIGN.md for more on content hash philosophy. func hashIssueContent(issue *types.Issue) string { h := sha256.New() _, _ = fmt.Fprintf(h, "title:%s\n", issue.Title) @@ -155,6 +168,7 @@ func hashIssueContent(issue *types.Issue) string { _, _ = fmt.Fprintf(h, "design:%s\n", issue.Design) _, _ = fmt.Fprintf(h, "acceptance:%s\n", issue.AcceptanceCriteria) _, _ = fmt.Fprintf(h, "notes:%s\n", issue.Notes) + // external_ref is included in content hash (see comment above) if issue.ExternalRef != nil { _, _ = fmt.Fprintf(h, "external_ref:%s\n", *issue.ExternalRef) } diff --git a/internal/storage/sqlite/collision_test.go b/internal/storage/sqlite/collision_test.go index e3506a71..b0ff6c44 100644 --- a/internal/storage/sqlite/collision_test.go +++ b/internal/storage/sqlite/collision_test.go @@ -340,6 +340,17 @@ func TestHashIssueContent(t *testing.T) { } } +// TestHashIssueContentWithExternalRef verifies that external_ref is included in content hash. +// +// This test demonstrates the behavior documented in bd-9f4a: +// - Adding external_ref to an issue changes its content hash +// - Different external_ref values produce different content hashes +// - This is intentional: external_ref is semantically meaningful content +// +// Implications: +// - Rename detection won't match issues before/after adding external_ref +// - Collision detection treats external_ref changes as updates +// - Idempotent import only when external_ref is identical func TestHashIssueContentWithExternalRef(t *testing.T) { ref1 := "JIRA-123" ref2 := "JIRA-456" @@ -365,11 +376,17 @@ func TestHashIssueContentWithExternalRef(t *testing.T) { hash2 := hashIssueContent(issueWithRef2) hash3 := hashIssueContent(issueNoRef) + // Different external_ref values should produce different hashes if hash1 == hash2 { t.Errorf("Expected different external refs to produce different hashes") } + // Adding external_ref should change the content hash if hash1 == hash3 { t.Errorf("Expected issue with external ref to differ from issue without") } + + if hash2 == hash3 { + t.Errorf("Expected issue with external ref to differ from issue without") + } }