Fix doctor incorrectly diagnosing hash IDs as sequential (issue #322)

- Enhanced checkIDFormat to sample multiple issues instead of just one
- Added detectHashBasedIDs function with robust multi-heuristic detection:
  * Checks for child_counters table (hash ID schema indicator)
  * Detects letters in IDs (base36 encoding)
  * Identifies leading zeros (common in hash IDs, rare in sequential)
  * Analyzes variable length patterns (adaptive hash IDs)
  * Checks for non-sequential numeric ordering
- Added comprehensive test coverage (16 new test cases)
- Fixes false positives for numeric-only hash IDs like 'pf-0088'

Closes #322
This commit is contained in:
Charles P. Cross
2025-11-18 05:20:11 -05:00
parent aeea3532cd
commit 8c1f865e23
3 changed files with 391 additions and 12 deletions

View File

@@ -419,16 +419,8 @@ func checkIDFormat(path string) doctorCheck {
}
defer func() { _ = db.Close() }() // Intentionally ignore close error
// Get first issue to check ID format
var issueID string
err = db.QueryRow("SELECT id FROM issues ORDER BY created_at LIMIT 1").Scan(&issueID)
if err == sql.ErrNoRows {
return doctorCheck{
Name: "Issue IDs",
Status: statusOK,
Message: "No issues yet (will use hash-based IDs)",
}
}
// Get sample of issues to check ID format (up to 10 for pattern analysis)
rows, err := db.Query("SELECT id FROM issues ORDER BY created_at LIMIT 10")
if err != nil {
return doctorCheck{
Name: "Issue IDs",
@@ -436,9 +428,26 @@ func checkIDFormat(path string) doctorCheck {
Message: "Unable to query issues",
}
}
defer rows.Close()
// Detect ID format
if isHashID(issueID) {
var issueIDs []string
for rows.Next() {
var id string
if err := rows.Scan(&id); err == nil {
issueIDs = append(issueIDs, id)
}
}
if len(issueIDs) == 0 {
return doctorCheck{
Name: "Issue IDs",
Status: statusOK,
Message: "No issues yet (will use hash-based IDs)",
}
}
// Detect ID format using robust heuristic
if detectHashBasedIDs(db, issueIDs) {
return doctorCheck{
Name: "Issue IDs",
Status: statusOK,
@@ -522,6 +531,110 @@ func getDatabaseVersionFromPath(dbPath string) string {
return "unknown"
}
// detectHashBasedIDs uses multiple heuristics to determine if the database uses hash-based IDs.
// This is more robust than checking a single ID's format, since base36 hash IDs can be all-numeric.
func detectHashBasedIDs(db *sql.DB, sampleIDs []string) bool {
// Heuristic 1: Check for child_counters table (added for hash ID support)
var tableName string
err := db.QueryRow(`
SELECT name FROM sqlite_master
WHERE type='table' AND name='child_counters'
`).Scan(&tableName)
if err == nil {
// child_counters table exists - this is a strong indicator of hash IDs
return true
}
// Heuristic 2: Check if any sample ID clearly contains letters (a-z)
// Hash IDs use base36 (0-9, a-z), sequential IDs are purely numeric
for _, id := range sampleIDs {
if isHashID(id) {
return true
}
}
// Heuristic 3: Look for patterns that indicate hash IDs
if len(sampleIDs) >= 2 {
// Extract suffixes (part after prefix-) for analysis
var suffixes []string
for _, id := range sampleIDs {
parts := strings.SplitN(id, "-", 2)
if len(parts) == 2 {
// Strip hierarchical suffix like .1 or .1.2
baseSuffix := strings.Split(parts[1], ".")[0]
suffixes = append(suffixes, baseSuffix)
}
}
if len(suffixes) >= 2 {
// Check for variable lengths (strong indicator of adaptive hash IDs)
// BUT: sequential IDs can also have variable length (1, 10, 100)
// So we need to check if the length variation is natural (1→2→3 digits)
// or random (3→8→4 chars typical of adaptive hash IDs)
lengths := make(map[int]int) // length -> count
for _, s := range suffixes {
lengths[len(s)]++
}
// If we have 3+ different lengths, likely hash IDs (adaptive length)
// Sequential IDs typically have 1-2 lengths (e.g., 1-9, 10-99, 100-999)
if len(lengths) >= 3 {
return true
}
// Check for leading zeros (rare in sequential IDs, common in hash IDs)
// Sequential IDs: bd-1, bd-2, bd-10, bd-100
// Hash IDs: bd-0088, bd-02a4, bd-05a1
hasLeadingZero := false
for _, s := range suffixes {
if len(s) > 1 && s[0] == '0' {
hasLeadingZero = true
break
}
}
if hasLeadingZero {
return true
}
// Check for non-sequential ordering
// Try to parse as integers - if they're not sequential, likely hash IDs
allNumeric := true
var nums []int
for _, s := range suffixes {
var num int
if _, err := fmt.Sscanf(s, "%d", &num); err == nil {
nums = append(nums, num)
} else {
allNumeric = false
break
}
}
if allNumeric && len(nums) >= 2 {
// Check if they form a roughly sequential pattern (1,2,3 or 10,11,12)
// Hash IDs would be more random (e.g., 88, 13452, 676)
isSequentialPattern := true
for i := 1; i < len(nums); i++ {
diff := nums[i] - nums[i-1]
// Allow for some gaps (deleted issues), but should be mostly sequential
if diff < 0 || diff > 100 {
isSequentialPattern = false
break
}
}
// If the numbers are NOT sequential, they're likely hash IDs
if !isSequentialPattern {
return true
}
}
}
}
// If we can't determine for sure, default to assuming sequential IDs
// This is conservative - better to recommend migration than miss sequential IDs
return false
}
// Note: isHashID is defined in migrate_hash_ids.go to avoid duplication
// compareVersions compares two semantic version strings.

View File

@@ -1,7 +1,9 @@
package main
import (
"database/sql"
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
@@ -98,6 +100,250 @@ func TestDoctorJSONOutput(t *testing.T) {
// Note: isHashID is tested in migrate_hash_ids_test.go
func TestDetectHashBasedIDs(t *testing.T) {
tests := []struct {
name string
sampleIDs []string
hasTable bool
expected bool
}{
{
name: "hash IDs with letters",
sampleIDs: []string{"bd-a3f8e9", "bd-b2c4d6"},
hasTable: false,
expected: true,
},
{
name: "hash IDs with mixed alphanumeric",
sampleIDs: []string{"bd-0134cc5a", "bd-abc123"},
hasTable: false,
expected: true,
},
{
name: "hash IDs all numeric with variable length",
sampleIDs: []string{"bd-0088", "bd-0134cc5a", "bd-02a4"},
hasTable: false,
expected: true, // Variable length indicates hash IDs
},
{
name: "hash IDs with leading zeros",
sampleIDs: []string{"bd-0088", "bd-02a4", "bd-05a1"},
hasTable: false,
expected: true, // Leading zeros indicate hash IDs
},
{
name: "hash IDs all numeric non-sequential",
sampleIDs: []string{"bd-0088", "bd-2312", "bd-0458"},
hasTable: false,
expected: true, // Non-sequential pattern
},
{
name: "sequential IDs",
sampleIDs: []string{"bd-1", "bd-2", "bd-3", "bd-4"},
hasTable: false,
expected: false, // Sequential pattern
},
{
name: "sequential IDs with gaps",
sampleIDs: []string{"bd-1", "bd-5", "bd-10", "bd-15"},
hasTable: false,
expected: false, // Still sequential pattern (small gaps allowed)
},
{
name: "database with child_counters table",
sampleIDs: []string{"bd-1", "bd-2"},
hasTable: true,
expected: true, // child_counters table indicates hash IDs
},
{
name: "hash IDs with hierarchical children",
sampleIDs: []string{"bd-a3f8e9.1", "bd-a3f8e9.2", "bd-b2c4d6"},
hasTable: false,
expected: true, // Base IDs have letters
},
{
name: "edge case: single ID with letters",
sampleIDs: []string{"bd-abc"},
hasTable: false,
expected: true,
},
{
name: "edge case: single sequential ID",
sampleIDs: []string{"bd-1"},
hasTable: false,
expected: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create temporary database
tmpDir := t.TempDir()
dbPath := filepath.Join(tmpDir, "test.db")
// Open database and create schema
db, err := sql.Open("sqlite3", dbPath)
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
defer db.Close()
// Create issues table
_, err = db.Exec(`
CREATE TABLE IF NOT EXISTS issues (
id TEXT PRIMARY KEY,
title TEXT,
created_at TIMESTAMP
)
`)
if err != nil {
t.Fatalf("Failed to create issues table: %v", err)
}
// Create child_counters table if test requires it
if tt.hasTable {
_, err = db.Exec(`
CREATE TABLE IF NOT EXISTS child_counters (
parent_id TEXT PRIMARY KEY,
last_child INTEGER NOT NULL DEFAULT 0
)
`)
if err != nil {
t.Fatalf("Failed to create child_counters table: %v", err)
}
}
// Insert sample issues
for _, id := range tt.sampleIDs {
_, err = db.Exec("INSERT INTO issues (id, title, created_at) VALUES (?, ?, datetime('now'))",
id, "Test issue")
if err != nil {
t.Fatalf("Failed to insert issue %s: %v", id, err)
}
}
// Test detection
result := detectHashBasedIDs(db, tt.sampleIDs)
if result != tt.expected {
t.Errorf("detectHashBasedIDs() = %v, want %v", result, tt.expected)
}
})
}
}
func TestCheckIDFormat(t *testing.T) {
tests := []struct {
name string
issueIDs []string
createTable bool // create child_counters table
expectedStatus string
}{
{
name: "hash IDs with letters",
issueIDs: []string{"bd-a3f8e9", "bd-b2c4d6", "bd-xyz123"},
createTable: false,
expectedStatus: statusOK,
},
{
name: "hash IDs all numeric with leading zeros",
issueIDs: []string{"bd-0088", "bd-02a4", "bd-05a1", "bd-0458"},
createTable: false,
expectedStatus: statusOK,
},
{
name: "hash IDs with child_counters table",
issueIDs: []string{"bd-123", "bd-456"},
createTable: true,
expectedStatus: statusOK,
},
{
name: "sequential IDs",
issueIDs: []string{"bd-1", "bd-2", "bd-3", "bd-4"},
createTable: false,
expectedStatus: statusWarning,
},
{
name: "mixed: mostly hash IDs",
issueIDs: []string{"bd-0088", "bd-0134cc5a", "bd-02a4"},
createTable: false,
expectedStatus: statusOK, // Variable length = hash IDs
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create temporary workspace
tmpDir := t.TempDir()
beadsDir := filepath.Join(tmpDir, ".beads")
if err := os.Mkdir(beadsDir, 0750); err != nil {
t.Fatal(err)
}
// Create database
dbPath := filepath.Join(beadsDir, "beads.db")
db, err := sql.Open("sqlite3", dbPath)
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
defer db.Close()
// Create schema
_, err = db.Exec(`
CREATE TABLE IF NOT EXISTS issues (
id TEXT PRIMARY KEY,
title TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
`)
if err != nil {
t.Fatalf("Failed to create issues table: %v", err)
}
if tt.createTable {
_, err = db.Exec(`
CREATE TABLE IF NOT EXISTS child_counters (
parent_id TEXT PRIMARY KEY,
last_child INTEGER NOT NULL DEFAULT 0
)
`)
if err != nil {
t.Fatalf("Failed to create child_counters table: %v", err)
}
}
// Insert test issues
for i, id := range tt.issueIDs {
_, err = db.Exec(
"INSERT INTO issues (id, title, created_at) VALUES (?, ?, datetime('now', ?||' seconds'))",
id, "Test issue "+id, fmt.Sprintf("+%d", i))
if err != nil {
t.Fatalf("Failed to insert issue %s: %v", id, err)
}
}
db.Close()
// Run check
check := checkIDFormat(tmpDir)
if check.Status != tt.expectedStatus {
t.Errorf("Expected status %s, got %s (message: %s)", tt.expectedStatus, check.Status, check.Message)
}
if tt.expectedStatus == statusOK && check.Status == statusOK {
if !strings.Contains(check.Message, "hash-based") {
t.Errorf("Expected hash-based message, got: %s", check.Message)
}
}
if tt.expectedStatus == statusWarning && check.Status == statusWarning {
if check.Fix == "" {
t.Error("Expected fix message for sequential IDs")
}
}
})
}
}
func TestCheckInstallation(t *testing.T) {
// Test with missing .beads directory
tmpDir := t.TempDir()

View File

@@ -251,13 +251,33 @@ func TestIsHashID(t *testing.T) {
id string
expected bool
}{
// Sequential IDs (numeric only, short)
{"bd-1", false},
{"bd-123", false},
{"bd-9999", false},
// Hash IDs with letters
{"bd-a3f8e9a2", true},
{"bd-abc123", true},
{"bd-123abc", true},
{"bd-a3f8e9a2.1", true},
{"bd-a3f8e9a2.1.2", true},
// Hash IDs that are numeric but 5+ characters (likely hash)
{"bd-12345", true},
{"bd-0088", false}, // 4 chars, all numeric - ambiguous, defaults to false
{"bd-00880", true}, // 5+ chars, likely hash
// Base36 hash IDs with letters
{"bd-5n3", true},
{"bd-65w", true},
{"bd-jmx", true},
{"bd-4rt", true},
// Edge cases
{"bd-", false}, // Empty suffix
{"invalid", false}, // No dash
{"bd-0", false}, // Single digit
}
for _, tt := range tests {