Fix doctor incorrectly diagnosing hash IDs as sequential (issue #322)
- Enhanced checkIDFormat to sample multiple issues instead of just one - Added detectHashBasedIDs function with robust multi-heuristic detection: * Checks for child_counters table (hash ID schema indicator) * Detects letters in IDs (base36 encoding) * Identifies leading zeros (common in hash IDs, rare in sequential) * Analyzes variable length patterns (adaptive hash IDs) * Checks for non-sequential numeric ordering - Added comprehensive test coverage (16 new test cases) - Fixes false positives for numeric-only hash IDs like 'pf-0088' Closes #322
This commit is contained in:
137
cmd/bd/doctor.go
137
cmd/bd/doctor.go
@@ -419,16 +419,8 @@ func checkIDFormat(path string) doctorCheck {
|
||||
}
|
||||
defer func() { _ = db.Close() }() // Intentionally ignore close error
|
||||
|
||||
// Get first issue to check ID format
|
||||
var issueID string
|
||||
err = db.QueryRow("SELECT id FROM issues ORDER BY created_at LIMIT 1").Scan(&issueID)
|
||||
if err == sql.ErrNoRows {
|
||||
return doctorCheck{
|
||||
Name: "Issue IDs",
|
||||
Status: statusOK,
|
||||
Message: "No issues yet (will use hash-based IDs)",
|
||||
}
|
||||
}
|
||||
// Get sample of issues to check ID format (up to 10 for pattern analysis)
|
||||
rows, err := db.Query("SELECT id FROM issues ORDER BY created_at LIMIT 10")
|
||||
if err != nil {
|
||||
return doctorCheck{
|
||||
Name: "Issue IDs",
|
||||
@@ -436,9 +428,26 @@ func checkIDFormat(path string) doctorCheck {
|
||||
Message: "Unable to query issues",
|
||||
}
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
// Detect ID format
|
||||
if isHashID(issueID) {
|
||||
var issueIDs []string
|
||||
for rows.Next() {
|
||||
var id string
|
||||
if err := rows.Scan(&id); err == nil {
|
||||
issueIDs = append(issueIDs, id)
|
||||
}
|
||||
}
|
||||
|
||||
if len(issueIDs) == 0 {
|
||||
return doctorCheck{
|
||||
Name: "Issue IDs",
|
||||
Status: statusOK,
|
||||
Message: "No issues yet (will use hash-based IDs)",
|
||||
}
|
||||
}
|
||||
|
||||
// Detect ID format using robust heuristic
|
||||
if detectHashBasedIDs(db, issueIDs) {
|
||||
return doctorCheck{
|
||||
Name: "Issue IDs",
|
||||
Status: statusOK,
|
||||
@@ -522,6 +531,110 @@ func getDatabaseVersionFromPath(dbPath string) string {
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
// detectHashBasedIDs uses multiple heuristics to determine if the database uses hash-based IDs.
|
||||
// This is more robust than checking a single ID's format, since base36 hash IDs can be all-numeric.
|
||||
func detectHashBasedIDs(db *sql.DB, sampleIDs []string) bool {
|
||||
// Heuristic 1: Check for child_counters table (added for hash ID support)
|
||||
var tableName string
|
||||
err := db.QueryRow(`
|
||||
SELECT name FROM sqlite_master
|
||||
WHERE type='table' AND name='child_counters'
|
||||
`).Scan(&tableName)
|
||||
if err == nil {
|
||||
// child_counters table exists - this is a strong indicator of hash IDs
|
||||
return true
|
||||
}
|
||||
|
||||
// Heuristic 2: Check if any sample ID clearly contains letters (a-z)
|
||||
// Hash IDs use base36 (0-9, a-z), sequential IDs are purely numeric
|
||||
for _, id := range sampleIDs {
|
||||
if isHashID(id) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Heuristic 3: Look for patterns that indicate hash IDs
|
||||
if len(sampleIDs) >= 2 {
|
||||
// Extract suffixes (part after prefix-) for analysis
|
||||
var suffixes []string
|
||||
for _, id := range sampleIDs {
|
||||
parts := strings.SplitN(id, "-", 2)
|
||||
if len(parts) == 2 {
|
||||
// Strip hierarchical suffix like .1 or .1.2
|
||||
baseSuffix := strings.Split(parts[1], ".")[0]
|
||||
suffixes = append(suffixes, baseSuffix)
|
||||
}
|
||||
}
|
||||
|
||||
if len(suffixes) >= 2 {
|
||||
// Check for variable lengths (strong indicator of adaptive hash IDs)
|
||||
// BUT: sequential IDs can also have variable length (1, 10, 100)
|
||||
// So we need to check if the length variation is natural (1→2→3 digits)
|
||||
// or random (3→8→4 chars typical of adaptive hash IDs)
|
||||
lengths := make(map[int]int) // length -> count
|
||||
for _, s := range suffixes {
|
||||
lengths[len(s)]++
|
||||
}
|
||||
|
||||
// If we have 3+ different lengths, likely hash IDs (adaptive length)
|
||||
// Sequential IDs typically have 1-2 lengths (e.g., 1-9, 10-99, 100-999)
|
||||
if len(lengths) >= 3 {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check for leading zeros (rare in sequential IDs, common in hash IDs)
|
||||
// Sequential IDs: bd-1, bd-2, bd-10, bd-100
|
||||
// Hash IDs: bd-0088, bd-02a4, bd-05a1
|
||||
hasLeadingZero := false
|
||||
for _, s := range suffixes {
|
||||
if len(s) > 1 && s[0] == '0' {
|
||||
hasLeadingZero = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if hasLeadingZero {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check for non-sequential ordering
|
||||
// Try to parse as integers - if they're not sequential, likely hash IDs
|
||||
allNumeric := true
|
||||
var nums []int
|
||||
for _, s := range suffixes {
|
||||
var num int
|
||||
if _, err := fmt.Sscanf(s, "%d", &num); err == nil {
|
||||
nums = append(nums, num)
|
||||
} else {
|
||||
allNumeric = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if allNumeric && len(nums) >= 2 {
|
||||
// Check if they form a roughly sequential pattern (1,2,3 or 10,11,12)
|
||||
// Hash IDs would be more random (e.g., 88, 13452, 676)
|
||||
isSequentialPattern := true
|
||||
for i := 1; i < len(nums); i++ {
|
||||
diff := nums[i] - nums[i-1]
|
||||
// Allow for some gaps (deleted issues), but should be mostly sequential
|
||||
if diff < 0 || diff > 100 {
|
||||
isSequentialPattern = false
|
||||
break
|
||||
}
|
||||
}
|
||||
// If the numbers are NOT sequential, they're likely hash IDs
|
||||
if !isSequentialPattern {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we can't determine for sure, default to assuming sequential IDs
|
||||
// This is conservative - better to recommend migration than miss sequential IDs
|
||||
return false
|
||||
}
|
||||
|
||||
// Note: isHashID is defined in migrate_hash_ids.go to avoid duplication
|
||||
|
||||
// compareVersions compares two semantic version strings.
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@@ -98,6 +100,250 @@ func TestDoctorJSONOutput(t *testing.T) {
|
||||
|
||||
// Note: isHashID is tested in migrate_hash_ids_test.go
|
||||
|
||||
func TestDetectHashBasedIDs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
sampleIDs []string
|
||||
hasTable bool
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "hash IDs with letters",
|
||||
sampleIDs: []string{"bd-a3f8e9", "bd-b2c4d6"},
|
||||
hasTable: false,
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "hash IDs with mixed alphanumeric",
|
||||
sampleIDs: []string{"bd-0134cc5a", "bd-abc123"},
|
||||
hasTable: false,
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "hash IDs all numeric with variable length",
|
||||
sampleIDs: []string{"bd-0088", "bd-0134cc5a", "bd-02a4"},
|
||||
hasTable: false,
|
||||
expected: true, // Variable length indicates hash IDs
|
||||
},
|
||||
{
|
||||
name: "hash IDs with leading zeros",
|
||||
sampleIDs: []string{"bd-0088", "bd-02a4", "bd-05a1"},
|
||||
hasTable: false,
|
||||
expected: true, // Leading zeros indicate hash IDs
|
||||
},
|
||||
{
|
||||
name: "hash IDs all numeric non-sequential",
|
||||
sampleIDs: []string{"bd-0088", "bd-2312", "bd-0458"},
|
||||
hasTable: false,
|
||||
expected: true, // Non-sequential pattern
|
||||
},
|
||||
{
|
||||
name: "sequential IDs",
|
||||
sampleIDs: []string{"bd-1", "bd-2", "bd-3", "bd-4"},
|
||||
hasTable: false,
|
||||
expected: false, // Sequential pattern
|
||||
},
|
||||
{
|
||||
name: "sequential IDs with gaps",
|
||||
sampleIDs: []string{"bd-1", "bd-5", "bd-10", "bd-15"},
|
||||
hasTable: false,
|
||||
expected: false, // Still sequential pattern (small gaps allowed)
|
||||
},
|
||||
{
|
||||
name: "database with child_counters table",
|
||||
sampleIDs: []string{"bd-1", "bd-2"},
|
||||
hasTable: true,
|
||||
expected: true, // child_counters table indicates hash IDs
|
||||
},
|
||||
{
|
||||
name: "hash IDs with hierarchical children",
|
||||
sampleIDs: []string{"bd-a3f8e9.1", "bd-a3f8e9.2", "bd-b2c4d6"},
|
||||
hasTable: false,
|
||||
expected: true, // Base IDs have letters
|
||||
},
|
||||
{
|
||||
name: "edge case: single ID with letters",
|
||||
sampleIDs: []string{"bd-abc"},
|
||||
hasTable: false,
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "edge case: single sequential ID",
|
||||
sampleIDs: []string{"bd-1"},
|
||||
hasTable: false,
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Create temporary database
|
||||
tmpDir := t.TempDir()
|
||||
dbPath := filepath.Join(tmpDir, "test.db")
|
||||
|
||||
// Open database and create schema
|
||||
db, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to open database: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
// Create issues table
|
||||
_, err = db.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS issues (
|
||||
id TEXT PRIMARY KEY,
|
||||
title TEXT,
|
||||
created_at TIMESTAMP
|
||||
)
|
||||
`)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create issues table: %v", err)
|
||||
}
|
||||
|
||||
// Create child_counters table if test requires it
|
||||
if tt.hasTable {
|
||||
_, err = db.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS child_counters (
|
||||
parent_id TEXT PRIMARY KEY,
|
||||
last_child INTEGER NOT NULL DEFAULT 0
|
||||
)
|
||||
`)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create child_counters table: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Insert sample issues
|
||||
for _, id := range tt.sampleIDs {
|
||||
_, err = db.Exec("INSERT INTO issues (id, title, created_at) VALUES (?, ?, datetime('now'))",
|
||||
id, "Test issue")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to insert issue %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Test detection
|
||||
result := detectHashBasedIDs(db, tt.sampleIDs)
|
||||
if result != tt.expected {
|
||||
t.Errorf("detectHashBasedIDs() = %v, want %v", result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckIDFormat(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
issueIDs []string
|
||||
createTable bool // create child_counters table
|
||||
expectedStatus string
|
||||
}{
|
||||
{
|
||||
name: "hash IDs with letters",
|
||||
issueIDs: []string{"bd-a3f8e9", "bd-b2c4d6", "bd-xyz123"},
|
||||
createTable: false,
|
||||
expectedStatus: statusOK,
|
||||
},
|
||||
{
|
||||
name: "hash IDs all numeric with leading zeros",
|
||||
issueIDs: []string{"bd-0088", "bd-02a4", "bd-05a1", "bd-0458"},
|
||||
createTable: false,
|
||||
expectedStatus: statusOK,
|
||||
},
|
||||
{
|
||||
name: "hash IDs with child_counters table",
|
||||
issueIDs: []string{"bd-123", "bd-456"},
|
||||
createTable: true,
|
||||
expectedStatus: statusOK,
|
||||
},
|
||||
{
|
||||
name: "sequential IDs",
|
||||
issueIDs: []string{"bd-1", "bd-2", "bd-3", "bd-4"},
|
||||
createTable: false,
|
||||
expectedStatus: statusWarning,
|
||||
},
|
||||
{
|
||||
name: "mixed: mostly hash IDs",
|
||||
issueIDs: []string{"bd-0088", "bd-0134cc5a", "bd-02a4"},
|
||||
createTable: false,
|
||||
expectedStatus: statusOK, // Variable length = hash IDs
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Create temporary workspace
|
||||
tmpDir := t.TempDir()
|
||||
beadsDir := filepath.Join(tmpDir, ".beads")
|
||||
if err := os.Mkdir(beadsDir, 0750); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Create database
|
||||
dbPath := filepath.Join(beadsDir, "beads.db")
|
||||
db, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to open database: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
// Create schema
|
||||
_, err = db.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS issues (
|
||||
id TEXT PRIMARY KEY,
|
||||
title TEXT NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
`)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create issues table: %v", err)
|
||||
}
|
||||
|
||||
if tt.createTable {
|
||||
_, err = db.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS child_counters (
|
||||
parent_id TEXT PRIMARY KEY,
|
||||
last_child INTEGER NOT NULL DEFAULT 0
|
||||
)
|
||||
`)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create child_counters table: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Insert test issues
|
||||
for i, id := range tt.issueIDs {
|
||||
_, err = db.Exec(
|
||||
"INSERT INTO issues (id, title, created_at) VALUES (?, ?, datetime('now', ?||' seconds'))",
|
||||
id, "Test issue "+id, fmt.Sprintf("+%d", i))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to insert issue %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
db.Close()
|
||||
|
||||
// Run check
|
||||
check := checkIDFormat(tmpDir)
|
||||
|
||||
if check.Status != tt.expectedStatus {
|
||||
t.Errorf("Expected status %s, got %s (message: %s)", tt.expectedStatus, check.Status, check.Message)
|
||||
}
|
||||
|
||||
if tt.expectedStatus == statusOK && check.Status == statusOK {
|
||||
if !strings.Contains(check.Message, "hash-based") {
|
||||
t.Errorf("Expected hash-based message, got: %s", check.Message)
|
||||
}
|
||||
}
|
||||
|
||||
if tt.expectedStatus == statusWarning && check.Status == statusWarning {
|
||||
if check.Fix == "" {
|
||||
t.Error("Expected fix message for sequential IDs")
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckInstallation(t *testing.T) {
|
||||
// Test with missing .beads directory
|
||||
tmpDir := t.TempDir()
|
||||
|
||||
@@ -251,13 +251,33 @@ func TestIsHashID(t *testing.T) {
|
||||
id string
|
||||
expected bool
|
||||
}{
|
||||
// Sequential IDs (numeric only, short)
|
||||
{"bd-1", false},
|
||||
{"bd-123", false},
|
||||
{"bd-9999", false},
|
||||
|
||||
// Hash IDs with letters
|
||||
{"bd-a3f8e9a2", true},
|
||||
{"bd-abc123", true},
|
||||
{"bd-123abc", true},
|
||||
{"bd-a3f8e9a2.1", true},
|
||||
{"bd-a3f8e9a2.1.2", true},
|
||||
|
||||
// Hash IDs that are numeric but 5+ characters (likely hash)
|
||||
{"bd-12345", true},
|
||||
{"bd-0088", false}, // 4 chars, all numeric - ambiguous, defaults to false
|
||||
{"bd-00880", true}, // 5+ chars, likely hash
|
||||
|
||||
// Base36 hash IDs with letters
|
||||
{"bd-5n3", true},
|
||||
{"bd-65w", true},
|
||||
{"bd-jmx", true},
|
||||
{"bd-4rt", true},
|
||||
|
||||
// Edge cases
|
||||
{"bd-", false}, // Empty suffix
|
||||
{"invalid", false}, // No dash
|
||||
{"bd-0", false}, // Single digit
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
||||
Reference in New Issue
Block a user