From 1423bdc5fb9354185dc8b5d94a12885c063e0c85 Mon Sep 17 00:00:00 2001 From: mayor Date: Sun, 25 Jan 2026 16:50:00 -0800 Subject: [PATCH] fix(dolt): use adaptive ID length instead of hardcoded 6 chars Port the adaptive ID length algorithm from SQLite to Dolt backend. The ID length now scales from 3-8 characters based on database size using birthday paradox collision probability calculations. - Add adaptive_length.go with length computation based on issue count - Update generateIssueID to use adaptive length with nonce fallback - Add collision detection and retry logic matching SQLite behavior Fixes bd-c40999 Co-Authored-By: Claude Opus 4.5 Executed-By: mayor Role: mayor --- internal/storage/dolt/adaptive_length.go | 129 +++++++++++++++++++++++ internal/storage/dolt/issues.go | 49 +++++++-- 2 files changed, 170 insertions(+), 8 deletions(-) create mode 100644 internal/storage/dolt/adaptive_length.go diff --git a/internal/storage/dolt/adaptive_length.go b/internal/storage/dolt/adaptive_length.go new file mode 100644 index 00000000..606f06e6 --- /dev/null +++ b/internal/storage/dolt/adaptive_length.go @@ -0,0 +1,129 @@ +package dolt + +import ( + "context" + "database/sql" + "math" + "strconv" +) + +// AdaptiveIDConfig holds configuration for adaptive ID length scaling +type AdaptiveIDConfig struct { + // MaxCollisionProbability is the threshold at which we scale up ID length (e.g., 0.25 = 25%) + MaxCollisionProbability float64 + + // MinLength is the minimum hash length to use (default 3) + MinLength int + + // MaxLength is the maximum hash length to use (default 8) + MaxLength int +} + +// DefaultAdaptiveConfig returns sensible defaults for base36 encoding +// With base36 (0-9, a-z), we can use shorter IDs than hex: +// +// 3 chars: ~46K namespace, good for up to ~160 issues (25% collision prob) +// 4 chars: ~1.7M namespace, good for up to ~980 issues +// 5 chars: ~60M namespace, good for up to ~5.9K issues +// 6 chars: ~2.2B namespace, good for up to ~35K issues +// 7 chars: ~78B namespace, good for up to ~212K issues +// 8 chars: ~2.8T namespace, good for up to ~1M+ issues +func DefaultAdaptiveConfig() AdaptiveIDConfig { + return AdaptiveIDConfig{ + MaxCollisionProbability: 0.25, // 25% threshold + MinLength: 3, + MaxLength: 8, + } +} + +// collisionProbability calculates P(collision) using birthday paradox approximation +// P(collision) ≈ 1 - e^(-n²/2N) +// where n = number of items, N = total possible values +func collisionProbability(numIssues int, idLength int) float64 { + const base = 36.0 // base36 encoding (0-9, a-z) + totalPossibilities := math.Pow(base, float64(idLength)) + exponent := -float64(numIssues*numIssues) / (2.0 * totalPossibilities) + return 1.0 - math.Exp(exponent) +} + +// computeAdaptiveLength determines the optimal ID length for the current database size +func computeAdaptiveLength(numIssues int, config AdaptiveIDConfig) int { + // Try lengths from min to max, return first that meets threshold + for length := config.MinLength; length <= config.MaxLength; length++ { + prob := collisionProbability(numIssues, length) + if prob <= config.MaxCollisionProbability { + return length + } + } + + // If even maxLength doesn't meet threshold, return maxLength anyway + return config.MaxLength +} + +// getAdaptiveConfigTx reads adaptive ID config from database, returns defaults if not set +func getAdaptiveConfigTx(ctx context.Context, tx *sql.Tx) AdaptiveIDConfig { + config := DefaultAdaptiveConfig() + + // Read max_collision_prob + var probStr string + err := tx.QueryRowContext(ctx, `SELECT value FROM config WHERE `+"`key`"+` = ?`, "max_collision_prob").Scan(&probStr) + if err == nil && probStr != "" { + if prob, err := strconv.ParseFloat(probStr, 64); err == nil { + config.MaxCollisionProbability = prob + } + } + + // Read min_hash_length + var minLenStr string + err = tx.QueryRowContext(ctx, `SELECT value FROM config WHERE `+"`key`"+` = ?`, "min_hash_length").Scan(&minLenStr) + if err == nil && minLenStr != "" { + if minLen, err := strconv.Atoi(minLenStr); err == nil { + config.MinLength = minLen + } + } + + // Read max_hash_length + var maxLenStr string + err = tx.QueryRowContext(ctx, `SELECT value FROM config WHERE `+"`key`"+` = ?`, "max_hash_length").Scan(&maxLenStr) + if err == nil && maxLenStr != "" { + if maxLen, err := strconv.Atoi(maxLenStr); err == nil { + config.MaxLength = maxLen + } + } + + return config +} + +// countTopLevelIssuesTx returns the number of top-level issues (excluding child issues) +func countTopLevelIssuesTx(ctx context.Context, tx *sql.Tx, prefix string) (int, error) { + var count int + // Count only top-level issues (no dot in ID after prefix) + // Using INSTR for MySQL/Dolt compatibility + err := tx.QueryRowContext(ctx, ` + SELECT COUNT(*) + FROM issues + WHERE id LIKE CONCAT(?, '-%') + AND INSTR(SUBSTRING(id, LENGTH(?) + 2), '.') = 0 + `, prefix, prefix).Scan(&count) + if err != nil { + return 0, err + } + return count, nil +} + +// GetAdaptiveIDLengthTx returns the appropriate hash length based on database size +func GetAdaptiveIDLengthTx(ctx context.Context, tx *sql.Tx, prefix string) (int, error) { + // Get current issue count + numIssues, err := countTopLevelIssuesTx(ctx, tx, prefix) + if err != nil { + return 6, err // Fallback to 6 on error + } + + // Get adaptive config + config := getAdaptiveConfigTx(ctx, tx) + + // Compute optimal length + length := computeAdaptiveLength(numIssues, config) + + return length, nil +} diff --git a/internal/storage/dolt/issues.go b/internal/storage/dolt/issues.go index 915935bc..20b503f2 100644 --- a/internal/storage/dolt/issues.go +++ b/internal/storage/dolt/issues.go @@ -8,6 +8,7 @@ import ( "strings" "time" + "github.com/steveyegge/beads/internal/idgen" "github.com/steveyegge/beads/internal/types" ) @@ -617,15 +618,47 @@ func markDirty(ctx context.Context, tx *sql.Tx, issueID string) error { return err } -// nolint:unparam // error return kept for interface consistency -func generateIssueID(_ context.Context, _ *sql.Tx, prefix string, issue *types.Issue, _ string) (string, error) { - // Simple hash-based ID generation - // Use first 6 chars of content hash - hash := issue.ComputeContentHash() - if len(hash) > 6 { - hash = hash[:6] +// generateIssueID generates a unique hash-based ID for an issue +// Uses adaptive length based on database size and tries multiple nonces on collision +func generateIssueID(ctx context.Context, tx *sql.Tx, prefix string, issue *types.Issue, actor string) (string, error) { + // Get adaptive base length based on current database size + baseLength, err := GetAdaptiveIDLengthTx(ctx, tx, prefix) + if err != nil { + // Fallback to 6 on error + baseLength = 6 } - return fmt.Sprintf("%s-%s", prefix, hash), nil + + // Try baseLength, baseLength+1, baseLength+2, up to max of 8 + maxLength := 8 + if baseLength > maxLength { + baseLength = maxLength + } + + for length := baseLength; length <= maxLength; length++ { + // Try up to 10 nonces at each length + for nonce := 0; nonce < 10; nonce++ { + candidate := generateHashID(prefix, issue.Title, issue.Description, actor, issue.CreatedAt, length, nonce) + + // Check if this ID already exists + var count int + err = tx.QueryRowContext(ctx, `SELECT COUNT(*) FROM issues WHERE id = ?`, candidate).Scan(&count) + if err != nil { + return "", fmt.Errorf("failed to check for ID collision: %w", err) + } + + if count == 0 { + return candidate, nil + } + } + } + + return "", fmt.Errorf("failed to generate unique ID after trying lengths %d-%d with 10 nonces each", baseLength, maxLength) +} + +// generateHashID creates a hash-based ID for a top-level issue. +// Uses base36 encoding (0-9, a-z) for better information density than hex. +func generateHashID(prefix, title, description, creator string, timestamp time.Time, length, nonce int) string { + return idgen.GenerateHashID(prefix, title, description, creator, timestamp, length, nonce) } func isAllowedUpdateField(key string) bool {