From 2480316248585009a599524837fa0738809d8448 Mon Sep 17 00:00:00 2001 From: Steve Yegge Date: Thu, 30 Oct 2025 14:12:29 -0700 Subject: [PATCH] Implement hash ID generation (bd-168) - Add generateHashID function with SHA256-based IDs - Update CreateIssue and CreateIssues to use hash IDs - Add collision detection with nonce retry logic - Add comprehensive tests for hash ID generation - Hash IDs format: prefix-<8 hex chars> (e.g., bd-a3f8e9a2) Amp-Thread-ID: https://ampcode.com/threads/T-48f75379-427f-4d72-bbc2-42bad0d0d62d Co-authored-by: Amp --- internal/storage/sqlite/hash_id_test.go | 192 ++++++++++++++++++++++++ internal/storage/sqlite/sqlite.go | 156 ++++++++++--------- 2 files changed, 273 insertions(+), 75 deletions(-) create mode 100644 internal/storage/sqlite/hash_id_test.go diff --git a/internal/storage/sqlite/hash_id_test.go b/internal/storage/sqlite/hash_id_test.go new file mode 100644 index 00000000..9a7cc868 --- /dev/null +++ b/internal/storage/sqlite/hash_id_test.go @@ -0,0 +1,192 @@ +package sqlite + +import ( + "context" + "testing" + "time" + + "github.com/steveyegge/beads/internal/types" +) + +func TestHashIDGeneration(t *testing.T) { + store, err := New(":memory:") + if err != nil { + t.Fatalf("Failed to create storage: %v", err) + } + defer func() { _ = store.Close() }() + + ctx := context.Background() + + // Set up database with prefix + if err := store.SetConfig(ctx, "issue_prefix", "bd"); err != nil { + t.Fatalf("Failed to set prefix: %v", err) + } + + // Create an issue - should get a hash ID + issue := &types.Issue{ + Title: "Test Issue", + Description: "Test description", + Status: types.StatusOpen, + Priority: 1, + IssueType: types.TypeTask, + } + + if err := store.CreateIssue(ctx, issue, "test-actor"); err != nil { + t.Fatalf("Failed to create issue: %v", err) + } + + // Verify hash ID format: bd-<8 hex chars> + if len(issue.ID) != 11 { // "bd-" (3) + 8 hex chars = 11 + t.Errorf("Expected ID length 11, got %d: %s", len(issue.ID), issue.ID) + } + + if issue.ID[:3] != "bd-" { + t.Errorf("Expected ID to start with 'bd-', got: %s", issue.ID) + } + + // Verify we can retrieve the issue + retrieved, err := store.GetIssue(ctx, issue.ID) + if err != nil { + t.Fatalf("Failed to get issue: %v", err) + } + + if retrieved.Title != issue.Title { + t.Errorf("Expected title %q, got %q", issue.Title, retrieved.Title) + } +} + +func TestHashIDDeterministic(t *testing.T) { + // Same inputs should produce same hash (with same nonce) + prefix := "bd" + title := "Test Issue" + description := "Test description" + actor := "test-actor" + timestamp := time.Now() + + id1 := generateHashID(prefix, title, description, actor, timestamp, 0) + id2 := generateHashID(prefix, title, description, actor, timestamp, 0) + + if id1 != id2 { + t.Errorf("Expected same hash for same inputs, got %s and %s", id1, id2) + } +} + +func TestHashIDCollisionHandling(t *testing.T) { + store, err := New(":memory:") + if err != nil { + t.Fatalf("Failed to create storage: %v", err) + } + defer func() { _ = store.Close() }() + + ctx := context.Background() + + // Set up database with prefix + if err := store.SetConfig(ctx, "issue_prefix", "bd"); err != nil { + t.Fatalf("Failed to set prefix: %v", err) + } + + // Create first issue + issue1 := &types.Issue{ + Title: "Duplicate Title", + Description: "Same description", + Status: types.StatusOpen, + Priority: 1, + IssueType: types.TypeTask, + } + + if err := store.CreateIssue(ctx, issue1, "actor"); err != nil { + t.Fatalf("Failed to create first issue: %v", err) + } + + // Create second issue with same content at same time + // This should get a different hash due to nonce increment + issue2 := &types.Issue{ + Title: "Duplicate Title", + Description: "Same description", + Status: types.StatusOpen, + Priority: 1, + IssueType: types.TypeTask, + CreatedAt: issue1.CreatedAt, // Force same timestamp + } + + if err := store.CreateIssue(ctx, issue2, "actor"); err != nil { + t.Fatalf("Failed to create second issue: %v", err) + } + + // Verify both issues exist with different IDs + if issue1.ID == issue2.ID { + t.Errorf("Expected different IDs for duplicate content, both got: %s", issue1.ID) + } + + // Verify both can be retrieved + _, err = store.GetIssue(ctx, issue1.ID) + if err != nil { + t.Errorf("Failed to retrieve first issue: %v", err) + } + + _, err = store.GetIssue(ctx, issue2.ID) + if err != nil { + t.Errorf("Failed to retrieve second issue: %v", err) + } +} + +func TestHashIDBatchCreation(t *testing.T) { + store, err := New(":memory:") + if err != nil { + t.Fatalf("Failed to create storage: %v", err) + } + defer func() { _ = store.Close() }() + + ctx := context.Background() + + // Set up database with prefix + if err := store.SetConfig(ctx, "issue_prefix", "bd"); err != nil { + t.Fatalf("Failed to set prefix: %v", err) + } + + // Create multiple issues with similar content + issues := []*types.Issue{ + { + Title: "Issue 1", + Description: "Description", + Status: types.StatusOpen, + Priority: 1, + IssueType: types.TypeTask, + }, + { + Title: "Issue 1", // Same title + Description: "Description", + Status: types.StatusOpen, + Priority: 1, + IssueType: types.TypeTask, + }, + { + Title: "Issue 2", + Description: "Description", + Status: types.StatusOpen, + Priority: 1, + IssueType: types.TypeTask, + }, + } + + if err := store.CreateIssues(ctx, issues, "actor"); err != nil { + t.Fatalf("Failed to create issues: %v", err) + } + + // Verify all issues got unique IDs + ids := make(map[string]bool) + for _, issue := range issues { + if ids[issue.ID] { + t.Errorf("Duplicate ID found: %s", issue.ID) + } + ids[issue.ID] = true + + // Verify hash ID format + if len(issue.ID) != 11 { + t.Errorf("Expected ID length 11, got %d: %s", len(issue.ID), issue.ID) + } + if issue.ID[:3] != "bd-" { + t.Errorf("Expected ID to start with 'bd-', got: %s", issue.ID) + } + } +} diff --git a/internal/storage/sqlite/sqlite.go b/internal/storage/sqlite/sqlite.go index 17d2f586..0387cee8 100644 --- a/internal/storage/sqlite/sqlite.go +++ b/internal/storage/sqlite/sqlite.go @@ -3,7 +3,9 @@ package sqlite import ( "context" + "crypto/sha256" "database/sql" + "encoding/hex" "encoding/json" "fmt" "os" @@ -704,6 +706,23 @@ func (s *SQLiteStorage) SyncAllCounters(ctx context.Context) error { // The database should ALWAYS have issue_prefix config set explicitly (by 'bd init' or auto-import) // Never derive prefix from filename - it leads to silent data corruption +// generateHashID creates a hash-based ID for a top-level issue. +// For child issues, use the parent ID with a numeric suffix (e.g., "bd-a3f8e9a2.1"). +// Includes a nonce parameter to handle collisions. +func generateHashID(prefix, title, description, creator string, timestamp time.Time, nonce int) string { + // Combine inputs into a stable content string + // Include nonce to handle hash collisions + content := fmt.Sprintf("%s|%s|%s|%d|%d", title, description, creator, timestamp.UnixNano(), nonce) + + // Hash the content + hash := sha256.Sum256([]byte(content)) + + // Use first 4 bytes (8 hex chars) for short, readable IDs + shortHash := hex.EncodeToString(hash[:4]) + + return fmt.Sprintf("%s-%s", prefix, shortHash) +} + // CreateIssue creates a new issue func (s *SQLiteStorage) CreateIssue(ctx context.Context, issue *types.Issue, actor string) error { // Validate issue before creating @@ -763,41 +782,28 @@ func (s *SQLiteStorage) CreateIssue(ctx context.Context, issue *types.Issue, act // Generate ID if not set (inside transaction to prevent race conditions) if issue.ID == "" { - // Atomically initialize counter (if needed) and get next ID (within transaction) - // This ensures the counter starts from the max existing ID, not 1 - // CRITICAL: We rely on BEGIN IMMEDIATE above to serialize this operation across processes - // - // The query works as follows: - // 1. Try to INSERT with last_id = MAX(existing IDs) or 1 if none exist - // 2. ON CONFLICT: update last_id to MAX(existing last_id, new calculated last_id) + 1 - // 3. RETURNING gives us the final incremented value - // - // This atomically handles three cases: - // - Counter doesn't exist: initialize from existing issues and return next ID - // - Counter exists but lower than max ID: update to max and return next ID - // - Counter exists and correct: just increment and return next ID - var nextID int - err = conn.QueryRowContext(ctx, ` - INSERT INTO issue_counters (prefix, last_id) - SELECT ?, COALESCE(MAX(CAST(substr(id, LENGTH(?) + 2) AS INTEGER)), 0) + 1 - FROM issues - WHERE id LIKE ? || '-%' - AND substr(id, LENGTH(?) + 2) GLOB '[0-9]*' - ON CONFLICT(prefix) DO UPDATE SET - last_id = MAX( - last_id, - (SELECT COALESCE(MAX(CAST(substr(id, LENGTH(?) + 2) AS INTEGER)), 0) - FROM issues - WHERE id LIKE ? || '-%' - AND substr(id, LENGTH(?) + 2) GLOB '[0-9]*') - ) + 1 - RETURNING last_id - `, prefix, prefix, prefix, prefix, prefix, prefix, prefix).Scan(&nextID) - if err != nil { - return fmt.Errorf("failed to generate next ID for prefix %s: %w", prefix, err) + // Generate hash-based ID with collision detection (bd-168) + // Try up to 10 times with different nonces to avoid collisions + var err error + for nonce := 0; nonce < 10; nonce++ { + candidate := generateHashID(prefix, issue.Title, issue.Description, actor, issue.CreatedAt, nonce) + + // Check if this ID already exists + var count int + err = conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM issues WHERE id = ?`, candidate).Scan(&count) + if err != nil { + return fmt.Errorf("failed to check for ID collision: %w", err) + } + + if count == 0 { + issue.ID = candidate + break + } + } + + if issue.ID == "" { + return fmt.Errorf("failed to generate unique ID after 10 attempts") } - - issue.ID = fmt.Sprintf("%s-%d", prefix, nextID) } else { // Validate that explicitly provided ID matches the configured prefix (bd-177) // This prevents wrong-prefix bugs when IDs are manually specified @@ -882,7 +888,7 @@ func validateBatchIssues(issues []*types.Issue) error { } // generateBatchIDs generates IDs for all issues that need them atomically -func generateBatchIDs(ctx context.Context, conn *sql.Conn, issues []*types.Issue, dbPath string) error { +func generateBatchIDs(ctx context.Context, conn *sql.Conn, issues []*types.Issue, actor string) error { // Get prefix from config (needed for both generation and validation) var prefix string err := conn.QueryRowContext(ctx, `SELECT value FROM config WHERE key = ?`, "issue_prefix").Scan(&prefix) @@ -893,53 +899,53 @@ func generateBatchIDs(ctx context.Context, conn *sql.Conn, issues []*types.Issue return fmt.Errorf("failed to get config: %w", err) } - // Count how many issues need IDs and validate explicitly provided IDs - needIDCount := 0 + // Validate explicitly provided IDs and generate hash IDs for those that need them expectedPrefix := prefix + "-" - for _, issue := range issues { - if issue.ID == "" { - needIDCount++ - } else { + usedIDs := make(map[string]bool) + + // First pass: record explicitly provided IDs + for i := range issues { + if issues[i].ID != "" { // Validate that explicitly provided ID matches the configured prefix (bd-177) - if !strings.HasPrefix(issue.ID, expectedPrefix) { - return fmt.Errorf("issue ID '%s' does not match configured prefix '%s'", issue.ID, prefix) + if !strings.HasPrefix(issues[i].ID, expectedPrefix) { + return fmt.Errorf("issue ID '%s' does not match configured prefix '%s'", issues[i].ID, prefix) } + usedIDs[issues[i].ID] = true } } - - if needIDCount == 0 { - return nil - } - - // Atomically reserve ID range - var nextID int - err = conn.QueryRowContext(ctx, ` - INSERT INTO issue_counters (prefix, last_id) - SELECT ?, COALESCE(MAX(CAST(substr(id, LENGTH(?) + 2) AS INTEGER)), 0) + ? - FROM issues - WHERE id LIKE ? || '-%' - AND substr(id, LENGTH(?) + 2) GLOB '[0-9]*' - ON CONFLICT(prefix) DO UPDATE SET - last_id = MAX( - last_id, - (SELECT COALESCE(MAX(CAST(substr(id, LENGTH(?) + 2) AS INTEGER)), 0) - FROM issues - WHERE id LIKE ? || '-%' - AND substr(id, LENGTH(?) + 2) GLOB '[0-9]*') - ) + ? - RETURNING last_id - `, prefix, prefix, needIDCount, prefix, prefix, prefix, prefix, prefix, needIDCount).Scan(&nextID) - if err != nil { - return fmt.Errorf("failed to generate ID range: %w", err) - } - - // Assign IDs sequentially from the reserved range and compute content hashes - currentID := nextID - needIDCount + 1 + + // Second pass: generate IDs for issues that need them, with collision detection for i := range issues { if issues[i].ID == "" { - issues[i].ID = fmt.Sprintf("%s-%d", prefix, currentID) - currentID++ + // Generate hash-based ID with collision detection (bd-168) + var generated bool + for nonce := 0; nonce < 10; nonce++ { + candidate := generateHashID(prefix, issues[i].Title, issues[i].Description, actor, issues[i].CreatedAt, nonce) + + // Check if this ID is already used in this batch or in the database + if usedIDs[candidate] { + continue + } + + var count int + err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM issues WHERE id = ?`, candidate).Scan(&count) + if err != nil { + return fmt.Errorf("failed to check for ID collision: %w", err) + } + + if count == 0 { + issues[i].ID = candidate + usedIDs[candidate] = true + generated = true + break + } + } + + if !generated { + return fmt.Errorf("failed to generate unique ID for issue %d after 10 attempts", i) + } } + // Compute content hash if not already set (bd-95) if issues[i].ContentHash == "" { issues[i].ContentHash = issues[i].ComputeContentHash() @@ -1104,7 +1110,7 @@ func (s *SQLiteStorage) CreateIssues(ctx context.Context, issues []*types.Issue, }() // Phase 3: Generate IDs for issues that need them - if err := generateBatchIDs(ctx, conn, issues, s.dbPath); err != nil { + if err := generateBatchIDs(ctx, conn, issues, actor); err != nil { return err }