fix(dolt): use adaptive ID length instead of hardcoded 6 chars
Port the adaptive ID length algorithm from SQLite to Dolt backend. The ID length now scales from 3-8 characters based on database size using birthday paradox collision probability calculations. - Add adaptive_length.go with length computation based on issue count - Update generateIssueID to use adaptive length with nonce fallback - Add collision detection and retry logic matching SQLite behavior Fixes bd-c40999 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> Executed-By: mayor Role: mayor
This commit is contained in:
@@ -0,0 +1,129 @@
|
|||||||
|
package dolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"math"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
|
// AdaptiveIDConfig holds configuration for adaptive ID length scaling
|
||||||
|
type AdaptiveIDConfig struct {
|
||||||
|
// MaxCollisionProbability is the threshold at which we scale up ID length (e.g., 0.25 = 25%)
|
||||||
|
MaxCollisionProbability float64
|
||||||
|
|
||||||
|
// MinLength is the minimum hash length to use (default 3)
|
||||||
|
MinLength int
|
||||||
|
|
||||||
|
// MaxLength is the maximum hash length to use (default 8)
|
||||||
|
MaxLength int
|
||||||
|
}
|
||||||
|
|
||||||
|
// DefaultAdaptiveConfig returns sensible defaults for base36 encoding
|
||||||
|
// With base36 (0-9, a-z), we can use shorter IDs than hex:
|
||||||
|
//
|
||||||
|
// 3 chars: ~46K namespace, good for up to ~160 issues (25% collision prob)
|
||||||
|
// 4 chars: ~1.7M namespace, good for up to ~980 issues
|
||||||
|
// 5 chars: ~60M namespace, good for up to ~5.9K issues
|
||||||
|
// 6 chars: ~2.2B namespace, good for up to ~35K issues
|
||||||
|
// 7 chars: ~78B namespace, good for up to ~212K issues
|
||||||
|
// 8 chars: ~2.8T namespace, good for up to ~1M+ issues
|
||||||
|
func DefaultAdaptiveConfig() AdaptiveIDConfig {
|
||||||
|
return AdaptiveIDConfig{
|
||||||
|
MaxCollisionProbability: 0.25, // 25% threshold
|
||||||
|
MinLength: 3,
|
||||||
|
MaxLength: 8,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// collisionProbability calculates P(collision) using birthday paradox approximation
|
||||||
|
// P(collision) ≈ 1 - e^(-n²/2N)
|
||||||
|
// where n = number of items, N = total possible values
|
||||||
|
func collisionProbability(numIssues int, idLength int) float64 {
|
||||||
|
const base = 36.0 // base36 encoding (0-9, a-z)
|
||||||
|
totalPossibilities := math.Pow(base, float64(idLength))
|
||||||
|
exponent := -float64(numIssues*numIssues) / (2.0 * totalPossibilities)
|
||||||
|
return 1.0 - math.Exp(exponent)
|
||||||
|
}
|
||||||
|
|
||||||
|
// computeAdaptiveLength determines the optimal ID length for the current database size
|
||||||
|
func computeAdaptiveLength(numIssues int, config AdaptiveIDConfig) int {
|
||||||
|
// Try lengths from min to max, return first that meets threshold
|
||||||
|
for length := config.MinLength; length <= config.MaxLength; length++ {
|
||||||
|
prob := collisionProbability(numIssues, length)
|
||||||
|
if prob <= config.MaxCollisionProbability {
|
||||||
|
return length
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If even maxLength doesn't meet threshold, return maxLength anyway
|
||||||
|
return config.MaxLength
|
||||||
|
}
|
||||||
|
|
||||||
|
// getAdaptiveConfigTx reads adaptive ID config from database, returns defaults if not set
|
||||||
|
func getAdaptiveConfigTx(ctx context.Context, tx *sql.Tx) AdaptiveIDConfig {
|
||||||
|
config := DefaultAdaptiveConfig()
|
||||||
|
|
||||||
|
// Read max_collision_prob
|
||||||
|
var probStr string
|
||||||
|
err := tx.QueryRowContext(ctx, `SELECT value FROM config WHERE `+"`key`"+` = ?`, "max_collision_prob").Scan(&probStr)
|
||||||
|
if err == nil && probStr != "" {
|
||||||
|
if prob, err := strconv.ParseFloat(probStr, 64); err == nil {
|
||||||
|
config.MaxCollisionProbability = prob
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read min_hash_length
|
||||||
|
var minLenStr string
|
||||||
|
err = tx.QueryRowContext(ctx, `SELECT value FROM config WHERE `+"`key`"+` = ?`, "min_hash_length").Scan(&minLenStr)
|
||||||
|
if err == nil && minLenStr != "" {
|
||||||
|
if minLen, err := strconv.Atoi(minLenStr); err == nil {
|
||||||
|
config.MinLength = minLen
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read max_hash_length
|
||||||
|
var maxLenStr string
|
||||||
|
err = tx.QueryRowContext(ctx, `SELECT value FROM config WHERE `+"`key`"+` = ?`, "max_hash_length").Scan(&maxLenStr)
|
||||||
|
if err == nil && maxLenStr != "" {
|
||||||
|
if maxLen, err := strconv.Atoi(maxLenStr); err == nil {
|
||||||
|
config.MaxLength = maxLen
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return config
|
||||||
|
}
|
||||||
|
|
||||||
|
// countTopLevelIssuesTx returns the number of top-level issues (excluding child issues)
|
||||||
|
func countTopLevelIssuesTx(ctx context.Context, tx *sql.Tx, prefix string) (int, error) {
|
||||||
|
var count int
|
||||||
|
// Count only top-level issues (no dot in ID after prefix)
|
||||||
|
// Using INSTR for MySQL/Dolt compatibility
|
||||||
|
err := tx.QueryRowContext(ctx, `
|
||||||
|
SELECT COUNT(*)
|
||||||
|
FROM issues
|
||||||
|
WHERE id LIKE CONCAT(?, '-%')
|
||||||
|
AND INSTR(SUBSTRING(id, LENGTH(?) + 2), '.') = 0
|
||||||
|
`, prefix, prefix).Scan(&count)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return count, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAdaptiveIDLengthTx returns the appropriate hash length based on database size
|
||||||
|
func GetAdaptiveIDLengthTx(ctx context.Context, tx *sql.Tx, prefix string) (int, error) {
|
||||||
|
// Get current issue count
|
||||||
|
numIssues, err := countTopLevelIssuesTx(ctx, tx, prefix)
|
||||||
|
if err != nil {
|
||||||
|
return 6, err // Fallback to 6 on error
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get adaptive config
|
||||||
|
config := getAdaptiveConfigTx(ctx, tx)
|
||||||
|
|
||||||
|
// Compute optimal length
|
||||||
|
length := computeAdaptiveLength(numIssues, config)
|
||||||
|
|
||||||
|
return length, nil
|
||||||
|
}
|
||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/steveyegge/beads/internal/idgen"
|
||||||
"github.com/steveyegge/beads/internal/types"
|
"github.com/steveyegge/beads/internal/types"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -617,15 +618,47 @@ func markDirty(ctx context.Context, tx *sql.Tx, issueID string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// nolint:unparam // error return kept for interface consistency
|
// generateIssueID generates a unique hash-based ID for an issue
|
||||||
func generateIssueID(_ context.Context, _ *sql.Tx, prefix string, issue *types.Issue, _ string) (string, error) {
|
// Uses adaptive length based on database size and tries multiple nonces on collision
|
||||||
// Simple hash-based ID generation
|
func generateIssueID(ctx context.Context, tx *sql.Tx, prefix string, issue *types.Issue, actor string) (string, error) {
|
||||||
// Use first 6 chars of content hash
|
// Get adaptive base length based on current database size
|
||||||
hash := issue.ComputeContentHash()
|
baseLength, err := GetAdaptiveIDLengthTx(ctx, tx, prefix)
|
||||||
if len(hash) > 6 {
|
if err != nil {
|
||||||
hash = hash[:6]
|
// Fallback to 6 on error
|
||||||
|
baseLength = 6
|
||||||
}
|
}
|
||||||
return fmt.Sprintf("%s-%s", prefix, hash), nil
|
|
||||||
|
// Try baseLength, baseLength+1, baseLength+2, up to max of 8
|
||||||
|
maxLength := 8
|
||||||
|
if baseLength > maxLength {
|
||||||
|
baseLength = maxLength
|
||||||
|
}
|
||||||
|
|
||||||
|
for length := baseLength; length <= maxLength; length++ {
|
||||||
|
// Try up to 10 nonces at each length
|
||||||
|
for nonce := 0; nonce < 10; nonce++ {
|
||||||
|
candidate := generateHashID(prefix, issue.Title, issue.Description, actor, issue.CreatedAt, length, nonce)
|
||||||
|
|
||||||
|
// Check if this ID already exists
|
||||||
|
var count int
|
||||||
|
err = tx.QueryRowContext(ctx, `SELECT COUNT(*) FROM issues WHERE id = ?`, candidate).Scan(&count)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to check for ID collision: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if count == 0 {
|
||||||
|
return candidate, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", fmt.Errorf("failed to generate unique ID after trying lengths %d-%d with 10 nonces each", baseLength, maxLength)
|
||||||
|
}
|
||||||
|
|
||||||
|
// generateHashID creates a hash-based ID for a top-level issue.
|
||||||
|
// Uses base36 encoding (0-9, a-z) for better information density than hex.
|
||||||
|
func generateHashID(prefix, title, description, creator string, timestamp time.Time, length, nonce int) string {
|
||||||
|
return idgen.GenerateHashID(prefix, title, description, creator, timestamp, length, nonce)
|
||||||
}
|
}
|
||||||
|
|
||||||
func isAllowedUpdateField(key string) bool {
|
func isAllowedUpdateField(key string) bool {
|
||||||
|
|||||||
Reference in New Issue
Block a user