Files
beads/internal/storage/sqlite/ids.go
Steve Yegge 22d34a22dc Fix bd migrate loop: skip prefix validation during auto-import
When auto-importing issues from JSONL, issues with different prefixes
(e.g., gt-1 vs gastown-) would fail validation and cause an infinite
loop of failed migrations.

The fix adds SkipPrefixValidation option to CreateIssuesWithFullOptions
which propagates through EnsureIDs to skip prefix validation for issues
that already have IDs during import. This allows importing issues with
any prefix while still validating new issues created interactively.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 21:45:42 -08:00

283 lines
9.2 KiB
Go

package sqlite
import (
"context"
"crypto/sha256"
"database/sql"
"fmt"
"math/big"
"strings"
"time"
"github.com/steveyegge/beads/internal/types"
)
// base36Alphabet is the character set for base36 encoding (0-9, a-z)
const base36Alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
// encodeBase36 converts a byte slice to a base36 string of specified length
// Takes the first N bytes and converts them to base36 representation
func encodeBase36(data []byte, length int) string {
// Convert bytes to big integer
num := new(big.Int).SetBytes(data)
// Convert to base36
var result strings.Builder
base := big.NewInt(36)
zero := big.NewInt(0)
mod := new(big.Int)
// Build the string in reverse
chars := make([]byte, 0, length)
for num.Cmp(zero) > 0 {
num.DivMod(num, base, mod)
chars = append(chars, base36Alphabet[mod.Int64()])
}
// Reverse the string
for i := len(chars) - 1; i >= 0; i-- {
result.WriteByte(chars[i])
}
// Pad with zeros if needed
str := result.String()
if len(str) < length {
str = strings.Repeat("0", length-len(str)) + str
}
// Truncate to exact length if needed (keep least significant digits)
if len(str) > length {
str = str[len(str)-length:]
}
return str
}
// isValidBase36 checks if a string contains only base36 characters
func isValidBase36(s string) bool {
for _, c := range s {
if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z')) {
return false
}
}
return true
}
// isValidHex checks if a string contains only hex characters
func isValidHex(s string) bool {
for _, c := range s {
if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')) {
return false
}
}
return true
}
// ValidateIssueIDPrefix validates that an issue ID matches the configured prefix
// Supports both top-level (bd-a3f8e9) and hierarchical (bd-a3f8e9.1) IDs
func ValidateIssueIDPrefix(id, prefix string) error {
expectedPrefix := prefix + "-"
if !strings.HasPrefix(id, expectedPrefix) {
return fmt.Errorf("issue ID '%s' does not match configured prefix '%s'", id, prefix)
}
return nil
}
// GenerateIssueID generates a unique hash-based ID for an issue
// Uses adaptive length based on database size and tries multiple nonces on collision
func GenerateIssueID(ctx context.Context, conn *sql.Conn, prefix string, issue *types.Issue, actor string) (string, error) {
// Get adaptive base length based on current database size
baseLength, err := GetAdaptiveIDLength(ctx, conn, prefix)
if err != nil {
// Fallback to 6 on error
baseLength = 6
}
// Try baseLength, baseLength+1, baseLength+2, up to max of 8
maxLength := 8
if baseLength > maxLength {
baseLength = maxLength
}
for length := baseLength; length <= maxLength; length++ {
// Try up to 10 nonces at each length
for nonce := 0; nonce < 10; nonce++ {
candidate := generateHashID(prefix, issue.Title, issue.Description, actor, issue.CreatedAt, length, nonce)
// Check if this ID already exists
var count int
err = conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM issues WHERE id = ?`, candidate).Scan(&count)
if err != nil {
return "", fmt.Errorf("failed to check for ID collision: %w", err)
}
if count == 0 {
return candidate, nil
}
}
}
return "", fmt.Errorf("failed to generate unique ID after trying lengths %d-%d with 10 nonces each", baseLength, maxLength)
}
// GenerateBatchIssueIDs generates unique IDs for multiple issues in a single batch
// Tracks used IDs to prevent intra-batch collisions
func GenerateBatchIssueIDs(ctx context.Context, conn *sql.Conn, prefix string, issues []*types.Issue, actor string, usedIDs map[string]bool) error {
// Get adaptive base length based on current database size
baseLength, err := GetAdaptiveIDLength(ctx, conn, prefix)
if err != nil {
// Fallback to 6 on error
baseLength = 6
}
// Try baseLength, baseLength+1, baseLength+2, up to max of 8
maxLength := 8
if baseLength > maxLength {
baseLength = maxLength
}
for i := range issues {
if issues[i].ID == "" {
var generated bool
// Try lengths from baseLength to maxLength with progressive fallback
for length := baseLength; length <= maxLength && !generated; length++ {
for nonce := 0; nonce < 10; nonce++ {
candidate := generateHashID(prefix, issues[i].Title, issues[i].Description, actor, issues[i].CreatedAt, length, nonce)
// Check if this ID is already used in this batch or in the database
if usedIDs[candidate] {
continue
}
var count int
err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM issues WHERE id = ?`, candidate).Scan(&count)
if err != nil {
return fmt.Errorf("failed to check for ID collision: %w", err)
}
if count == 0 {
issues[i].ID = candidate
usedIDs[candidate] = true
generated = true
break
}
}
}
if !generated {
return fmt.Errorf("failed to generate unique ID for issue %d after trying lengths %d-%d with 10 nonces each", i, baseLength, maxLength)
}
}
}
return nil
}
// tryResurrectParent attempts to find and resurrect a deleted parent issue from the import batch
// Returns true if parent was found and will be created, false otherwise
func tryResurrectParent(parentID string, issues []*types.Issue) bool {
for _, issue := range issues {
if issue.ID == parentID {
return true // Parent exists in the batch being imported
}
}
return false // Parent not in this batch
}
// EnsureIDs generates or validates IDs for issues
// For issues with empty IDs, generates unique hash-based IDs
// For issues with existing IDs, validates they match the prefix and parent exists (if hierarchical)
// For hierarchical IDs with missing parents, behavior depends on orphanHandling mode
// When skipPrefixValidation is true, existing IDs are not validated against the prefix (used during import)
func EnsureIDs(ctx context.Context, conn *sql.Conn, prefix string, issues []*types.Issue, actor string, orphanHandling OrphanHandling, skipPrefixValidation bool) error {
usedIDs := make(map[string]bool)
// First pass: record explicitly provided IDs
for i := range issues {
if issues[i].ID != "" {
// Validate that explicitly provided ID matches the configured prefix (bd-177)
// Skip validation during import to allow issues with different prefixes (e.g., from renamed repos)
if !skipPrefixValidation {
if err := ValidateIssueIDPrefix(issues[i].ID, prefix); err != nil {
return wrapDBErrorf(err, "validate ID prefix for %s", issues[i].ID)
}
}
// For hierarchical IDs (bd-a3f8e9.1), ensure parent exists
if strings.Contains(issues[i].ID, ".") {
// Extract parent ID (everything before the last dot)
lastDot := strings.LastIndex(issues[i].ID, ".")
parentID := issues[i].ID[:lastDot]
var parentCount int
err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM issues WHERE id = ?`, parentID).Scan(&parentCount)
if err != nil {
return fmt.Errorf("failed to check parent existence: %w", err)
}
if parentCount == 0 {
// Handle missing parent based on mode
switch orphanHandling {
case OrphanStrict:
return fmt.Errorf("parent issue %s does not exist (strict mode)", parentID)
case OrphanResurrect:
if !tryResurrectParent(parentID, issues) {
return fmt.Errorf("parent issue %s does not exist and cannot be resurrected from import batch", parentID)
}
// Parent will be created in this batch (due to depth-sorting), so allow this child
case OrphanSkip:
// Mark issue for skipping by clearing its ID (will be filtered out later)
issues[i].ID = ""
continue
case OrphanAllow:
// Allow orphan - no validation
default:
// Default to allow for backward compatibility
}
}
}
usedIDs[issues[i].ID] = true
}
}
// Second pass: generate IDs for issues that need them
return GenerateBatchIssueIDs(ctx, conn, prefix, issues, actor, usedIDs)
}
// generateHashID creates a hash-based ID for a top-level issue.
// For child issues, use the parent ID with a numeric suffix (e.g., "bd-x7k9p.1").
// Supports adaptive length from 3-8 chars based on database size.
// Includes a nonce parameter to handle same-length collisions.
// Uses base36 encoding (0-9, a-z) for better information density than hex.
func generateHashID(prefix, title, description, creator string, timestamp time.Time, length, nonce int) string {
// Combine inputs into a stable content string
// Include nonce to handle hash collisions
content := fmt.Sprintf("%s|%s|%s|%d|%d", title, description, creator, timestamp.UnixNano(), nonce)
// Hash the content
hash := sha256.Sum256([]byte(content))
// Use base36 encoding with variable length (3-8 chars)
// Determine how many bytes to use based on desired output length
var numBytes int
switch length {
case 3:
numBytes = 2 // 2 bytes = 16 bits ≈ 3.09 base36 chars
case 4:
numBytes = 3 // 3 bytes = 24 bits ≈ 4.63 base36 chars
case 5:
numBytes = 4 // 4 bytes = 32 bits ≈ 6.18 base36 chars
case 6:
numBytes = 4 // 4 bytes = 32 bits ≈ 6.18 base36 chars
case 7:
numBytes = 5 // 5 bytes = 40 bits ≈ 7.73 base36 chars
case 8:
numBytes = 5 // 5 bytes = 40 bits ≈ 7.73 base36 chars
default:
numBytes = 3 // default to 3 chars
}
shortHash := encodeBase36(hash[:numBytes], length)
return fmt.Sprintf("%s-%s", prefix, shortHash)
}