Switch from hex to Base36 encoding for issue IDs (GH #213)

This change improves information density by using Base36 (0-9, a-z) instead of hex (0-9, a-f) for hash-based issue IDs. Key benefits: - Shorter IDs: Can now use 3-char IDs (was 4-char minimum) - Better scaling: 3 chars good for ~160 issues, 4 chars for ~980 issues - Case-insensitive: Maintains excellent CLI usability - Backward compatible: Old hex IDs continue to work Changes: - Implemented Base36 encoding with proper truncation (keep LSB) - Updated adaptive length thresholds (3-8 chars instead of 4-8) - Fixed collision probability math to match encoding (was calculating for base36 but encoding in hex - now both use base36) - Fixed ID parser bug (use prefixWithHyphen for substring matching) - Updated all tests and test data patterns Fixes #213 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-03 12:02:15 -08:00
parent add5599d7e
commit b4cb636d92
14 changed files with 176 additions and 89 deletions
--- a/internal/storage/sqlite/ids.go
+++ b/internal/storage/sqlite/ids.go
@@ -4,14 +4,75 @@ import (
 	"context"
 	"crypto/sha256"
 	"database/sql"
-	"encoding/hex"
 	"fmt"
+	"math/big"
 	"strings"
 	"time"

 	"github.com/steveyegge/beads/internal/types"
 )

+// base36Alphabet is the character set for base36 encoding (0-9, a-z)
+const base36Alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
+
+// encodeBase36 converts a byte slice to a base36 string of specified length
+// Takes the first N bytes and converts them to base36 representation
+func encodeBase36(data []byte, length int) string {
+	// Convert bytes to big integer
+	num := new(big.Int).SetBytes(data)
+
+	// Convert to base36
+	var result strings.Builder
+	base := big.NewInt(36)
+	zero := big.NewInt(0)
+	mod := new(big.Int)
+
+	// Build the string in reverse
+	chars := make([]byte, 0, length)
+	for num.Cmp(zero) > 0 {
+		num.DivMod(num, base, mod)
+		chars = append(chars, base36Alphabet[mod.Int64()])
+	}
+
+	// Reverse the string
+	for i := len(chars) - 1; i >= 0; i-- {
+		result.WriteByte(chars[i])
+	}
+
+	// Pad with zeros if needed
+	str := result.String()
+	if len(str) < length {
+		str = strings.Repeat("0", length-len(str)) + str
+	}
+
+	// Truncate to exact length if needed (keep least significant digits)
+	if len(str) > length {
+		str = str[len(str)-length:]
+	}
+
+	return str
+}
+
+// isValidBase36 checks if a string contains only base36 characters
+func isValidBase36(s string) bool {
+	for _, c := range s {
+		if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z')) {
+			return false
+		}
+	}
+	return true
+}
+
+// isValidHex checks if a string contains only hex characters
+func isValidHex(s string) bool {
+	for _, c := range s {
+		if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')) {
+			return false
+		}
+	}
+	return true
+}
+
 // ValidateIssueIDPrefix validates that an issue ID matches the configured prefix
 // Supports both top-level (bd-a3f8e9) and hierarchical (bd-a3f8e9.1) IDs
 func ValidateIssueIDPrefix(id, prefix string) error {
@@ -150,36 +211,39 @@ func EnsureIDs(ctx context.Context, conn *sql.Conn, prefix string, issues []*typ
 }

 // generateHashID creates a hash-based ID for a top-level issue.
-// For child issues, use the parent ID with a numeric suffix (e.g., "bd-a3f8e9.1").
-// Supports adaptive length from 4-8 chars based on database size (bd-ea2a13).
+// For child issues, use the parent ID with a numeric suffix (e.g., "bd-x7k9p.1").
+// Supports adaptive length from 3-8 chars based on database size.
 // Includes a nonce parameter to handle same-length collisions.
+// Uses base36 encoding (0-9, a-z) for better information density than hex.
 func generateHashID(prefix, title, description, creator string, timestamp time.Time, length, nonce int) string {
 	// Combine inputs into a stable content string
 	// Include nonce to handle hash collisions
 	content := fmt.Sprintf("%s|%s|%s|%d|%d", title, description, creator, timestamp.UnixNano(), nonce)
-	
+
 	// Hash the content
 	hash := sha256.Sum256([]byte(content))
-	
-	// Use variable length (4-8 hex chars)
-	// length determines how many bytes to use (2, 2.5, 3, 3.5, or 4)
-	var shortHash string
+
+	// Use base36 encoding with variable length (3-8 chars)
+	// Determine how many bytes to use based on desired output length
+	var numBytes int
 	switch length {
+	case 3:
+		numBytes = 2 // 2 bytes = 16 bits ≈ 3.09 base36 chars
 	case 4:
-		shortHash = hex.EncodeToString(hash[:2])
+		numBytes = 3 // 3 bytes = 24 bits ≈ 4.63 base36 chars
 	case 5:
-		// 2.5 bytes: use 3 bytes but take only first 5 chars
-		shortHash = hex.EncodeToString(hash[:3])[:5]
+		numBytes = 4 // 4 bytes = 32 bits ≈ 6.18 base36 chars
 	case 6:
-		shortHash = hex.EncodeToString(hash[:3])
+		numBytes = 4 // 4 bytes = 32 bits ≈ 6.18 base36 chars
 	case 7:
-		// 3.5 bytes: use 4 bytes but take only first 7 chars
-		shortHash = hex.EncodeToString(hash[:4])[:7]
+		numBytes = 5 // 5 bytes = 40 bits ≈ 7.73 base36 chars
 	case 8:
-		shortHash = hex.EncodeToString(hash[:4])
+		numBytes = 5 // 5 bytes = 40 bits ≈ 7.73 base36 chars
 	default:
-		shortHash = hex.EncodeToString(hash[:3]) // default to 6
+		numBytes = 3 // default to 3 chars
 	}
-	
+
+	shortHash := encodeBase36(hash[:numBytes], length)
+
 	return fmt.Sprintf("%s-%s", prefix, shortHash)
 }