Implement hash ID generation (bd-168)

- Add generateHashID function with SHA256-based IDs
- Update CreateIssue and CreateIssues to use hash IDs
- Add collision detection with nonce retry logic
- Add comprehensive tests for hash ID generation
- Hash IDs format: prefix-<8 hex chars> (e.g., bd-a3f8e9a2)

Amp-Thread-ID: https://ampcode.com/threads/T-48f75379-427f-4d72-bbc2-42bad0d0d62d
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-30 14:12:29 -07:00
parent 6091e87cd1
commit 2276d5e428
2 changed files with 273 additions and 75 deletions

View File

@@ -0,0 +1,192 @@
package sqlite
import (
"context"
"testing"
"time"
"github.com/steveyegge/beads/internal/types"
)
func TestHashIDGeneration(t *testing.T) {
store, err := New(":memory:")
if err != nil {
t.Fatalf("Failed to create storage: %v", err)
}
defer func() { _ = store.Close() }()
ctx := context.Background()
// Set up database with prefix
if err := store.SetConfig(ctx, "issue_prefix", "bd"); err != nil {
t.Fatalf("Failed to set prefix: %v", err)
}
// Create an issue - should get a hash ID
issue := &types.Issue{
Title: "Test Issue",
Description: "Test description",
Status: types.StatusOpen,
Priority: 1,
IssueType: types.TypeTask,
}
if err := store.CreateIssue(ctx, issue, "test-actor"); err != nil {
t.Fatalf("Failed to create issue: %v", err)
}
// Verify hash ID format: bd-<8 hex chars>
if len(issue.ID) != 11 { // "bd-" (3) + 8 hex chars = 11
t.Errorf("Expected ID length 11, got %d: %s", len(issue.ID), issue.ID)
}
if issue.ID[:3] != "bd-" {
t.Errorf("Expected ID to start with 'bd-', got: %s", issue.ID)
}
// Verify we can retrieve the issue
retrieved, err := store.GetIssue(ctx, issue.ID)
if err != nil {
t.Fatalf("Failed to get issue: %v", err)
}
if retrieved.Title != issue.Title {
t.Errorf("Expected title %q, got %q", issue.Title, retrieved.Title)
}
}
func TestHashIDDeterministic(t *testing.T) {
// Same inputs should produce same hash (with same nonce)
prefix := "bd"
title := "Test Issue"
description := "Test description"
actor := "test-actor"
timestamp := time.Now()
id1 := generateHashID(prefix, title, description, actor, timestamp, 0)
id2 := generateHashID(prefix, title, description, actor, timestamp, 0)
if id1 != id2 {
t.Errorf("Expected same hash for same inputs, got %s and %s", id1, id2)
}
}
func TestHashIDCollisionHandling(t *testing.T) {
store, err := New(":memory:")
if err != nil {
t.Fatalf("Failed to create storage: %v", err)
}
defer func() { _ = store.Close() }()
ctx := context.Background()
// Set up database with prefix
if err := store.SetConfig(ctx, "issue_prefix", "bd"); err != nil {
t.Fatalf("Failed to set prefix: %v", err)
}
// Create first issue
issue1 := &types.Issue{
Title: "Duplicate Title",
Description: "Same description",
Status: types.StatusOpen,
Priority: 1,
IssueType: types.TypeTask,
}
if err := store.CreateIssue(ctx, issue1, "actor"); err != nil {
t.Fatalf("Failed to create first issue: %v", err)
}
// Create second issue with same content at same time
// This should get a different hash due to nonce increment
issue2 := &types.Issue{
Title: "Duplicate Title",
Description: "Same description",
Status: types.StatusOpen,
Priority: 1,
IssueType: types.TypeTask,
CreatedAt: issue1.CreatedAt, // Force same timestamp
}
if err := store.CreateIssue(ctx, issue2, "actor"); err != nil {
t.Fatalf("Failed to create second issue: %v", err)
}
// Verify both issues exist with different IDs
if issue1.ID == issue2.ID {
t.Errorf("Expected different IDs for duplicate content, both got: %s", issue1.ID)
}
// Verify both can be retrieved
_, err = store.GetIssue(ctx, issue1.ID)
if err != nil {
t.Errorf("Failed to retrieve first issue: %v", err)
}
_, err = store.GetIssue(ctx, issue2.ID)
if err != nil {
t.Errorf("Failed to retrieve second issue: %v", err)
}
}
func TestHashIDBatchCreation(t *testing.T) {
store, err := New(":memory:")
if err != nil {
t.Fatalf("Failed to create storage: %v", err)
}
defer func() { _ = store.Close() }()
ctx := context.Background()
// Set up database with prefix
if err := store.SetConfig(ctx, "issue_prefix", "bd"); err != nil {
t.Fatalf("Failed to set prefix: %v", err)
}
// Create multiple issues with similar content
issues := []*types.Issue{
{
Title: "Issue 1",
Description: "Description",
Status: types.StatusOpen,
Priority: 1,
IssueType: types.TypeTask,
},
{
Title: "Issue 1", // Same title
Description: "Description",
Status: types.StatusOpen,
Priority: 1,
IssueType: types.TypeTask,
},
{
Title: "Issue 2",
Description: "Description",
Status: types.StatusOpen,
Priority: 1,
IssueType: types.TypeTask,
},
}
if err := store.CreateIssues(ctx, issues, "actor"); err != nil {
t.Fatalf("Failed to create issues: %v", err)
}
// Verify all issues got unique IDs
ids := make(map[string]bool)
for _, issue := range issues {
if ids[issue.ID] {
t.Errorf("Duplicate ID found: %s", issue.ID)
}
ids[issue.ID] = true
// Verify hash ID format
if len(issue.ID) != 11 {
t.Errorf("Expected ID length 11, got %d: %s", len(issue.ID), issue.ID)
}
if issue.ID[:3] != "bd-" {
t.Errorf("Expected ID to start with 'bd-', got: %s", issue.ID)
}
}
}

View File

@@ -3,7 +3,9 @@ package sqlite
import (
"context"
"crypto/sha256"
"database/sql"
"encoding/hex"
"encoding/json"
"fmt"
"os"
@@ -704,6 +706,23 @@ func (s *SQLiteStorage) SyncAllCounters(ctx context.Context) error {
// The database should ALWAYS have issue_prefix config set explicitly (by 'bd init' or auto-import)
// Never derive prefix from filename - it leads to silent data corruption
// generateHashID creates a hash-based ID for a top-level issue.
// For child issues, use the parent ID with a numeric suffix (e.g., "bd-a3f8e9a2.1").
// Includes a nonce parameter to handle collisions.
func generateHashID(prefix, title, description, creator string, timestamp time.Time, nonce int) string {
// Combine inputs into a stable content string
// Include nonce to handle hash collisions
content := fmt.Sprintf("%s|%s|%s|%d|%d", title, description, creator, timestamp.UnixNano(), nonce)
// Hash the content
hash := sha256.Sum256([]byte(content))
// Use first 4 bytes (8 hex chars) for short, readable IDs
shortHash := hex.EncodeToString(hash[:4])
return fmt.Sprintf("%s-%s", prefix, shortHash)
}
// CreateIssue creates a new issue
func (s *SQLiteStorage) CreateIssue(ctx context.Context, issue *types.Issue, actor string) error {
// Validate issue before creating
@@ -763,41 +782,28 @@ func (s *SQLiteStorage) CreateIssue(ctx context.Context, issue *types.Issue, act
// Generate ID if not set (inside transaction to prevent race conditions)
if issue.ID == "" {
// Atomically initialize counter (if needed) and get next ID (within transaction)
// This ensures the counter starts from the max existing ID, not 1
// CRITICAL: We rely on BEGIN IMMEDIATE above to serialize this operation across processes
//
// The query works as follows:
// 1. Try to INSERT with last_id = MAX(existing IDs) or 1 if none exist
// 2. ON CONFLICT: update last_id to MAX(existing last_id, new calculated last_id) + 1
// 3. RETURNING gives us the final incremented value
//
// This atomically handles three cases:
// - Counter doesn't exist: initialize from existing issues and return next ID
// - Counter exists but lower than max ID: update to max and return next ID
// - Counter exists and correct: just increment and return next ID
var nextID int
err = conn.QueryRowContext(ctx, `
INSERT INTO issue_counters (prefix, last_id)
SELECT ?, COALESCE(MAX(CAST(substr(id, LENGTH(?) + 2) AS INTEGER)), 0) + 1
FROM issues
WHERE id LIKE ? || '-%'
AND substr(id, LENGTH(?) + 2) GLOB '[0-9]*'
ON CONFLICT(prefix) DO UPDATE SET
last_id = MAX(
last_id,
(SELECT COALESCE(MAX(CAST(substr(id, LENGTH(?) + 2) AS INTEGER)), 0)
FROM issues
WHERE id LIKE ? || '-%'
AND substr(id, LENGTH(?) + 2) GLOB '[0-9]*')
) + 1
RETURNING last_id
`, prefix, prefix, prefix, prefix, prefix, prefix, prefix).Scan(&nextID)
if err != nil {
return fmt.Errorf("failed to generate next ID for prefix %s: %w", prefix, err)
// Generate hash-based ID with collision detection (bd-168)
// Try up to 10 times with different nonces to avoid collisions
var err error
for nonce := 0; nonce < 10; nonce++ {
candidate := generateHashID(prefix, issue.Title, issue.Description, actor, issue.CreatedAt, nonce)
// Check if this ID already exists
var count int
err = conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM issues WHERE id = ?`, candidate).Scan(&count)
if err != nil {
return fmt.Errorf("failed to check for ID collision: %w", err)
}
if count == 0 {
issue.ID = candidate
break
}
}
if issue.ID == "" {
return fmt.Errorf("failed to generate unique ID after 10 attempts")
}
issue.ID = fmt.Sprintf("%s-%d", prefix, nextID)
} else {
// Validate that explicitly provided ID matches the configured prefix (bd-177)
// This prevents wrong-prefix bugs when IDs are manually specified
@@ -882,7 +888,7 @@ func validateBatchIssues(issues []*types.Issue) error {
}
// generateBatchIDs generates IDs for all issues that need them atomically
func generateBatchIDs(ctx context.Context, conn *sql.Conn, issues []*types.Issue, dbPath string) error {
func generateBatchIDs(ctx context.Context, conn *sql.Conn, issues []*types.Issue, actor string) error {
// Get prefix from config (needed for both generation and validation)
var prefix string
err := conn.QueryRowContext(ctx, `SELECT value FROM config WHERE key = ?`, "issue_prefix").Scan(&prefix)
@@ -893,53 +899,53 @@ func generateBatchIDs(ctx context.Context, conn *sql.Conn, issues []*types.Issue
return fmt.Errorf("failed to get config: %w", err)
}
// Count how many issues need IDs and validate explicitly provided IDs
needIDCount := 0
// Validate explicitly provided IDs and generate hash IDs for those that need them
expectedPrefix := prefix + "-"
for _, issue := range issues {
if issue.ID == "" {
needIDCount++
} else {
usedIDs := make(map[string]bool)
// First pass: record explicitly provided IDs
for i := range issues {
if issues[i].ID != "" {
// Validate that explicitly provided ID matches the configured prefix (bd-177)
if !strings.HasPrefix(issue.ID, expectedPrefix) {
return fmt.Errorf("issue ID '%s' does not match configured prefix '%s'", issue.ID, prefix)
if !strings.HasPrefix(issues[i].ID, expectedPrefix) {
return fmt.Errorf("issue ID '%s' does not match configured prefix '%s'", issues[i].ID, prefix)
}
usedIDs[issues[i].ID] = true
}
}
if needIDCount == 0 {
return nil
}
// Atomically reserve ID range
var nextID int
err = conn.QueryRowContext(ctx, `
INSERT INTO issue_counters (prefix, last_id)
SELECT ?, COALESCE(MAX(CAST(substr(id, LENGTH(?) + 2) AS INTEGER)), 0) + ?
FROM issues
WHERE id LIKE ? || '-%'
AND substr(id, LENGTH(?) + 2) GLOB '[0-9]*'
ON CONFLICT(prefix) DO UPDATE SET
last_id = MAX(
last_id,
(SELECT COALESCE(MAX(CAST(substr(id, LENGTH(?) + 2) AS INTEGER)), 0)
FROM issues
WHERE id LIKE ? || '-%'
AND substr(id, LENGTH(?) + 2) GLOB '[0-9]*')
) + ?
RETURNING last_id
`, prefix, prefix, needIDCount, prefix, prefix, prefix, prefix, prefix, needIDCount).Scan(&nextID)
if err != nil {
return fmt.Errorf("failed to generate ID range: %w", err)
}
// Assign IDs sequentially from the reserved range and compute content hashes
currentID := nextID - needIDCount + 1
// Second pass: generate IDs for issues that need them, with collision detection
for i := range issues {
if issues[i].ID == "" {
issues[i].ID = fmt.Sprintf("%s-%d", prefix, currentID)
currentID++
// Generate hash-based ID with collision detection (bd-168)
var generated bool
for nonce := 0; nonce < 10; nonce++ {
candidate := generateHashID(prefix, issues[i].Title, issues[i].Description, actor, issues[i].CreatedAt, nonce)
// Check if this ID is already used in this batch or in the database
if usedIDs[candidate] {
continue
}
var count int
err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM issues WHERE id = ?`, candidate).Scan(&count)
if err != nil {
return fmt.Errorf("failed to check for ID collision: %w", err)
}
if count == 0 {
issues[i].ID = candidate
usedIDs[candidate] = true
generated = true
break
}
}
if !generated {
return fmt.Errorf("failed to generate unique ID for issue %d after 10 attempts", i)
}
}
// Compute content hash if not already set (bd-95)
if issues[i].ContentHash == "" {
issues[i].ContentHash = issues[i].ComputeContentHash()
@@ -1104,7 +1110,7 @@ func (s *SQLiteStorage) CreateIssues(ctx context.Context, issues []*types.Issue,
}()
// Phase 3: Generate IDs for issues that need them
if err := generateBatchIDs(ctx, conn, issues, s.dbPath); err != nil {
if err := generateBatchIDs(ctx, conn, issues, actor); err != nil {
return err
}