From 8c1f865e23131067731eff94fc106fd5829d4066 Mon Sep 17 00:00:00 2001 From: "Charles P. Cross" Date: Tue, 18 Nov 2025 05:20:11 -0500 Subject: [PATCH] Fix doctor incorrectly diagnosing hash IDs as sequential (issue #322) - Enhanced checkIDFormat to sample multiple issues instead of just one - Added detectHashBasedIDs function with robust multi-heuristic detection: * Checks for child_counters table (hash ID schema indicator) * Detects letters in IDs (base36 encoding) * Identifies leading zeros (common in hash IDs, rare in sequential) * Analyzes variable length patterns (adaptive hash IDs) * Checks for non-sequential numeric ordering - Added comprehensive test coverage (16 new test cases) - Fixes false positives for numeric-only hash IDs like 'pf-0088' Closes #322 --- cmd/bd/doctor.go | 137 ++++++++++++++++-- cmd/bd/doctor_test.go | 246 ++++++++++++++++++++++++++++++++ cmd/bd/migrate_hash_ids_test.go | 20 +++ 3 files changed, 391 insertions(+), 12 deletions(-) diff --git a/cmd/bd/doctor.go b/cmd/bd/doctor.go index 2b1194d3..a8f3ff4a 100644 --- a/cmd/bd/doctor.go +++ b/cmd/bd/doctor.go @@ -419,16 +419,8 @@ func checkIDFormat(path string) doctorCheck { } defer func() { _ = db.Close() }() // Intentionally ignore close error - // Get first issue to check ID format - var issueID string - err = db.QueryRow("SELECT id FROM issues ORDER BY created_at LIMIT 1").Scan(&issueID) - if err == sql.ErrNoRows { - return doctorCheck{ - Name: "Issue IDs", - Status: statusOK, - Message: "No issues yet (will use hash-based IDs)", - } - } + // Get sample of issues to check ID format (up to 10 for pattern analysis) + rows, err := db.Query("SELECT id FROM issues ORDER BY created_at LIMIT 10") if err != nil { return doctorCheck{ Name: "Issue IDs", @@ -436,9 +428,26 @@ func checkIDFormat(path string) doctorCheck { Message: "Unable to query issues", } } + defer rows.Close() - // Detect ID format - if isHashID(issueID) { + var issueIDs []string + for rows.Next() { + var id string + if err := rows.Scan(&id); err == nil { + issueIDs = append(issueIDs, id) + } + } + + if len(issueIDs) == 0 { + return doctorCheck{ + Name: "Issue IDs", + Status: statusOK, + Message: "No issues yet (will use hash-based IDs)", + } + } + + // Detect ID format using robust heuristic + if detectHashBasedIDs(db, issueIDs) { return doctorCheck{ Name: "Issue IDs", Status: statusOK, @@ -522,6 +531,110 @@ func getDatabaseVersionFromPath(dbPath string) string { return "unknown" } +// detectHashBasedIDs uses multiple heuristics to determine if the database uses hash-based IDs. +// This is more robust than checking a single ID's format, since base36 hash IDs can be all-numeric. +func detectHashBasedIDs(db *sql.DB, sampleIDs []string) bool { + // Heuristic 1: Check for child_counters table (added for hash ID support) + var tableName string + err := db.QueryRow(` + SELECT name FROM sqlite_master + WHERE type='table' AND name='child_counters' + `).Scan(&tableName) + if err == nil { + // child_counters table exists - this is a strong indicator of hash IDs + return true + } + + // Heuristic 2: Check if any sample ID clearly contains letters (a-z) + // Hash IDs use base36 (0-9, a-z), sequential IDs are purely numeric + for _, id := range sampleIDs { + if isHashID(id) { + return true + } + } + + // Heuristic 3: Look for patterns that indicate hash IDs + if len(sampleIDs) >= 2 { + // Extract suffixes (part after prefix-) for analysis + var suffixes []string + for _, id := range sampleIDs { + parts := strings.SplitN(id, "-", 2) + if len(parts) == 2 { + // Strip hierarchical suffix like .1 or .1.2 + baseSuffix := strings.Split(parts[1], ".")[0] + suffixes = append(suffixes, baseSuffix) + } + } + + if len(suffixes) >= 2 { + // Check for variable lengths (strong indicator of adaptive hash IDs) + // BUT: sequential IDs can also have variable length (1, 10, 100) + // So we need to check if the length variation is natural (1→2→3 digits) + // or random (3→8→4 chars typical of adaptive hash IDs) + lengths := make(map[int]int) // length -> count + for _, s := range suffixes { + lengths[len(s)]++ + } + + // If we have 3+ different lengths, likely hash IDs (adaptive length) + // Sequential IDs typically have 1-2 lengths (e.g., 1-9, 10-99, 100-999) + if len(lengths) >= 3 { + return true + } + + // Check for leading zeros (rare in sequential IDs, common in hash IDs) + // Sequential IDs: bd-1, bd-2, bd-10, bd-100 + // Hash IDs: bd-0088, bd-02a4, bd-05a1 + hasLeadingZero := false + for _, s := range suffixes { + if len(s) > 1 && s[0] == '0' { + hasLeadingZero = true + break + } + } + if hasLeadingZero { + return true + } + + // Check for non-sequential ordering + // Try to parse as integers - if they're not sequential, likely hash IDs + allNumeric := true + var nums []int + for _, s := range suffixes { + var num int + if _, err := fmt.Sscanf(s, "%d", &num); err == nil { + nums = append(nums, num) + } else { + allNumeric = false + break + } + } + + if allNumeric && len(nums) >= 2 { + // Check if they form a roughly sequential pattern (1,2,3 or 10,11,12) + // Hash IDs would be more random (e.g., 88, 13452, 676) + isSequentialPattern := true + for i := 1; i < len(nums); i++ { + diff := nums[i] - nums[i-1] + // Allow for some gaps (deleted issues), but should be mostly sequential + if diff < 0 || diff > 100 { + isSequentialPattern = false + break + } + } + // If the numbers are NOT sequential, they're likely hash IDs + if !isSequentialPattern { + return true + } + } + } + } + + // If we can't determine for sure, default to assuming sequential IDs + // This is conservative - better to recommend migration than miss sequential IDs + return false +} + // Note: isHashID is defined in migrate_hash_ids.go to avoid duplication // compareVersions compares two semantic version strings. diff --git a/cmd/bd/doctor_test.go b/cmd/bd/doctor_test.go index e1f6b561..ebddaa49 100644 --- a/cmd/bd/doctor_test.go +++ b/cmd/bd/doctor_test.go @@ -1,7 +1,9 @@ package main import ( + "database/sql" "encoding/json" + "fmt" "os" "path/filepath" "strings" @@ -98,6 +100,250 @@ func TestDoctorJSONOutput(t *testing.T) { // Note: isHashID is tested in migrate_hash_ids_test.go +func TestDetectHashBasedIDs(t *testing.T) { + tests := []struct { + name string + sampleIDs []string + hasTable bool + expected bool + }{ + { + name: "hash IDs with letters", + sampleIDs: []string{"bd-a3f8e9", "bd-b2c4d6"}, + hasTable: false, + expected: true, + }, + { + name: "hash IDs with mixed alphanumeric", + sampleIDs: []string{"bd-0134cc5a", "bd-abc123"}, + hasTable: false, + expected: true, + }, + { + name: "hash IDs all numeric with variable length", + sampleIDs: []string{"bd-0088", "bd-0134cc5a", "bd-02a4"}, + hasTable: false, + expected: true, // Variable length indicates hash IDs + }, + { + name: "hash IDs with leading zeros", + sampleIDs: []string{"bd-0088", "bd-02a4", "bd-05a1"}, + hasTable: false, + expected: true, // Leading zeros indicate hash IDs + }, + { + name: "hash IDs all numeric non-sequential", + sampleIDs: []string{"bd-0088", "bd-2312", "bd-0458"}, + hasTable: false, + expected: true, // Non-sequential pattern + }, + { + name: "sequential IDs", + sampleIDs: []string{"bd-1", "bd-2", "bd-3", "bd-4"}, + hasTable: false, + expected: false, // Sequential pattern + }, + { + name: "sequential IDs with gaps", + sampleIDs: []string{"bd-1", "bd-5", "bd-10", "bd-15"}, + hasTable: false, + expected: false, // Still sequential pattern (small gaps allowed) + }, + { + name: "database with child_counters table", + sampleIDs: []string{"bd-1", "bd-2"}, + hasTable: true, + expected: true, // child_counters table indicates hash IDs + }, + { + name: "hash IDs with hierarchical children", + sampleIDs: []string{"bd-a3f8e9.1", "bd-a3f8e9.2", "bd-b2c4d6"}, + hasTable: false, + expected: true, // Base IDs have letters + }, + { + name: "edge case: single ID with letters", + sampleIDs: []string{"bd-abc"}, + hasTable: false, + expected: true, + }, + { + name: "edge case: single sequential ID", + sampleIDs: []string{"bd-1"}, + hasTable: false, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create temporary database + tmpDir := t.TempDir() + dbPath := filepath.Join(tmpDir, "test.db") + + // Open database and create schema + db, err := sql.Open("sqlite3", dbPath) + if err != nil { + t.Fatalf("Failed to open database: %v", err) + } + defer db.Close() + + // Create issues table + _, err = db.Exec(` + CREATE TABLE IF NOT EXISTS issues ( + id TEXT PRIMARY KEY, + title TEXT, + created_at TIMESTAMP + ) + `) + if err != nil { + t.Fatalf("Failed to create issues table: %v", err) + } + + // Create child_counters table if test requires it + if tt.hasTable { + _, err = db.Exec(` + CREATE TABLE IF NOT EXISTS child_counters ( + parent_id TEXT PRIMARY KEY, + last_child INTEGER NOT NULL DEFAULT 0 + ) + `) + if err != nil { + t.Fatalf("Failed to create child_counters table: %v", err) + } + } + + // Insert sample issues + for _, id := range tt.sampleIDs { + _, err = db.Exec("INSERT INTO issues (id, title, created_at) VALUES (?, ?, datetime('now'))", + id, "Test issue") + if err != nil { + t.Fatalf("Failed to insert issue %s: %v", id, err) + } + } + + // Test detection + result := detectHashBasedIDs(db, tt.sampleIDs) + if result != tt.expected { + t.Errorf("detectHashBasedIDs() = %v, want %v", result, tt.expected) + } + }) + } +} + +func TestCheckIDFormat(t *testing.T) { + tests := []struct { + name string + issueIDs []string + createTable bool // create child_counters table + expectedStatus string + }{ + { + name: "hash IDs with letters", + issueIDs: []string{"bd-a3f8e9", "bd-b2c4d6", "bd-xyz123"}, + createTable: false, + expectedStatus: statusOK, + }, + { + name: "hash IDs all numeric with leading zeros", + issueIDs: []string{"bd-0088", "bd-02a4", "bd-05a1", "bd-0458"}, + createTable: false, + expectedStatus: statusOK, + }, + { + name: "hash IDs with child_counters table", + issueIDs: []string{"bd-123", "bd-456"}, + createTable: true, + expectedStatus: statusOK, + }, + { + name: "sequential IDs", + issueIDs: []string{"bd-1", "bd-2", "bd-3", "bd-4"}, + createTable: false, + expectedStatus: statusWarning, + }, + { + name: "mixed: mostly hash IDs", + issueIDs: []string{"bd-0088", "bd-0134cc5a", "bd-02a4"}, + createTable: false, + expectedStatus: statusOK, // Variable length = hash IDs + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create temporary workspace + tmpDir := t.TempDir() + beadsDir := filepath.Join(tmpDir, ".beads") + if err := os.Mkdir(beadsDir, 0750); err != nil { + t.Fatal(err) + } + + // Create database + dbPath := filepath.Join(beadsDir, "beads.db") + db, err := sql.Open("sqlite3", dbPath) + if err != nil { + t.Fatalf("Failed to open database: %v", err) + } + defer db.Close() + + // Create schema + _, err = db.Exec(` + CREATE TABLE IF NOT EXISTS issues ( + id TEXT PRIMARY KEY, + title TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + `) + if err != nil { + t.Fatalf("Failed to create issues table: %v", err) + } + + if tt.createTable { + _, err = db.Exec(` + CREATE TABLE IF NOT EXISTS child_counters ( + parent_id TEXT PRIMARY KEY, + last_child INTEGER NOT NULL DEFAULT 0 + ) + `) + if err != nil { + t.Fatalf("Failed to create child_counters table: %v", err) + } + } + + // Insert test issues + for i, id := range tt.issueIDs { + _, err = db.Exec( + "INSERT INTO issues (id, title, created_at) VALUES (?, ?, datetime('now', ?||' seconds'))", + id, "Test issue "+id, fmt.Sprintf("+%d", i)) + if err != nil { + t.Fatalf("Failed to insert issue %s: %v", id, err) + } + } + db.Close() + + // Run check + check := checkIDFormat(tmpDir) + + if check.Status != tt.expectedStatus { + t.Errorf("Expected status %s, got %s (message: %s)", tt.expectedStatus, check.Status, check.Message) + } + + if tt.expectedStatus == statusOK && check.Status == statusOK { + if !strings.Contains(check.Message, "hash-based") { + t.Errorf("Expected hash-based message, got: %s", check.Message) + } + } + + if tt.expectedStatus == statusWarning && check.Status == statusWarning { + if check.Fix == "" { + t.Error("Expected fix message for sequential IDs") + } + } + }) + } +} + func TestCheckInstallation(t *testing.T) { // Test with missing .beads directory tmpDir := t.TempDir() diff --git a/cmd/bd/migrate_hash_ids_test.go b/cmd/bd/migrate_hash_ids_test.go index 13a87297..2e150d3a 100644 --- a/cmd/bd/migrate_hash_ids_test.go +++ b/cmd/bd/migrate_hash_ids_test.go @@ -251,13 +251,33 @@ func TestIsHashID(t *testing.T) { id string expected bool }{ + // Sequential IDs (numeric only, short) {"bd-1", false}, {"bd-123", false}, + {"bd-9999", false}, + + // Hash IDs with letters {"bd-a3f8e9a2", true}, {"bd-abc123", true}, {"bd-123abc", true}, {"bd-a3f8e9a2.1", true}, {"bd-a3f8e9a2.1.2", true}, + + // Hash IDs that are numeric but 5+ characters (likely hash) + {"bd-12345", true}, + {"bd-0088", false}, // 4 chars, all numeric - ambiguous, defaults to false + {"bd-00880", true}, // 5+ chars, likely hash + + // Base36 hash IDs with letters + {"bd-5n3", true}, + {"bd-65w", true}, + {"bd-jmx", true}, + {"bd-4rt", true}, + + // Edge cases + {"bd-", false}, // Empty suffix + {"invalid", false}, // No dash + {"bd-0", false}, // Single digit } for _, tt := range tests {