package main import ( "fmt" "math" ) // Birthday paradox: P(collision) ≈ 1 - e^(-n²/2N) // where n = number of items, N = total possible values func collisionProbability(numIssues int, idLength int) float64 { base := 36.0 // lowercase alphanumeric totalPossibilities := math.Pow(base, float64(idLength)) exponent := -float64(numIssues*numIssues) / (2.0 * totalPossibilities) return 1.0 - math.Exp(exponent) } // Find the expected number of collisions func expectedCollisions(numIssues int, idLength int) float64 { // Expected number of pairs that collide totalPairs := float64(numIssues * (numIssues - 1) / 2) return totalPairs * (1.0 / math.Pow(36, float64(idLength))) } // Find optimal ID length for a given database size and max collision probability func optimalIdLength(numIssues int, maxCollisionProb float64) int { for length := 3; length <= 12; length++ { prob := collisionProbability(numIssues, length) if prob <= maxCollisionProb { return length } } return 12 // fallback } func main() { fmt.Println("=== Collision Probability Analysis ===") dbSizes := []int{50, 100, 200, 500, 1000, 2000, 5000, 10000} idLengths := []int{4, 5, 6, 7, 8} // Print table header fmt.Printf("%-10s", "DB Size") for _, length := range idLengths { fmt.Printf("%8d-char", length) } fmt.Println() fmt.Println("----------------------------------------------------------") // Print collision probabilities for _, size := range dbSizes { fmt.Printf("%-10d", size) for _, length := range idLengths { prob := collisionProbability(size, length) fmt.Printf("%11.2f%%", prob*100) } fmt.Println() } fmt.Println("\n=== Recommended ID Length by Threshold ===") thresholds := []float64{0.10, 0.25, 0.50} fmt.Printf("%-10s", "DB Size") for _, threshold := range thresholds { fmt.Printf("%10.0f%%", threshold*100) } fmt.Println() fmt.Println("----------------------------------") for _, size := range dbSizes { fmt.Printf("%-10d", size) for _, threshold := range thresholds { optimal := optimalIdLength(size, threshold) fmt.Printf("%10d", optimal) } fmt.Println() } fmt.Println("\n=== Expected Number of Collisions ===") fmt.Printf("%-10s", "DB Size") for _, length := range idLengths { fmt.Printf("%10d-char", length) } fmt.Println() fmt.Println("----------------------------------------------------------") for _, size := range dbSizes { fmt.Printf("%-10d", size) for _, length := range idLengths { expected := expectedCollisions(size, length) fmt.Printf("%14.2f", expected) } fmt.Println() } fmt.Println("\n=== Adaptive Scaling Strategy ===") fmt.Println("Threshold: 25% collision probability") fmt.Printf("%-15s %-12s %-20s\n", "DB Size Range", "ID Length", "Collision Prob") fmt.Println("-------------------------------------------------------") ranges := []struct { min, max int }{ {0, 50}, {51, 150}, {151, 500}, {501, 1500}, {1501, 5000}, {5001, 15000}, } threshold := 0.25 for _, r := range ranges { optimal := optimalIdLength(r.max, threshold) prob := collisionProbability(r.max, optimal) fmt.Printf("%-15s %-12d %18.2f%%\n", fmt.Sprintf("%d-%d", r.min, r.max), optimal, prob*100) } }