beads/cmd/bd/doctor/integrity.go

package doctor

import (
	"bufio"
	"database/sql"
	"fmt"
	"os"
	"path/filepath"
	"regexp"
	"strings"
	"time"

	_ "github.com/ncruces/go-sqlite3/driver"
	_ "github.com/ncruces/go-sqlite3/embed"
	"github.com/steveyegge/beads/cmd/bd/doctor/fix"
	"github.com/steveyegge/beads/internal/beads"
	"github.com/steveyegge/beads/internal/configfile"
	"github.com/steveyegge/beads/internal/git"
)

// CheckIDFormat checks whether issues use hash-based or sequential IDs
func CheckIDFormat(path string) DoctorCheck {
	beadsDir := filepath.Join(path, ".beads")

	// Check metadata.json first for custom database name
	var dbPath string
	if cfg, err := configfile.Load(beadsDir); err == nil && cfg != nil && cfg.Database != "" {
		dbPath = cfg.DatabasePath(beadsDir)
	} else {
		// Fall back to canonical database name
		dbPath = filepath.Join(beadsDir, beads.CanonicalDatabaseName)
	}

	// Check if using JSONL-only mode
	if _, err := os.Stat(dbPath); os.IsNotExist(err) {
		// Check if JSONL exists (--no-db mode)
		jsonlPath := filepath.Join(beadsDir, "issues.jsonl")
		if _, err := os.Stat(jsonlPath); err == nil {
			return DoctorCheck{
				Name:    "Issue IDs",
				Status:  StatusOK,
				Message: "N/A (JSONL-only mode)",
			}
		}
		// No database and no JSONL
		return DoctorCheck{
			Name:    "Issue IDs",
			Status:  StatusOK,
			Message: "No issues yet (will use hash-based IDs)",
		}
	}

	// Open database
	db, err := sql.Open("sqlite3", "file:"+dbPath+"?mode=ro")
	if err != nil {
		return DoctorCheck{
			Name:    "Issue IDs",
			Status:  StatusError,
			Message: "Unable to open database",
		}
	}
	defer func() { _ = db.Close() }() // Intentionally ignore close error

	// Get sample of issues to check ID format (up to 10 for pattern analysis)
	rows, err := db.Query("SELECT id FROM issues ORDER BY created_at LIMIT 10")
	if err != nil {
		return DoctorCheck{
			Name:    "Issue IDs",
			Status:  StatusError,
			Message: "Unable to query issues",
		}
	}
	defer rows.Close()

	var issueIDs []string
	for rows.Next() {
		var id string
		if err := rows.Scan(&id); err == nil {
			issueIDs = append(issueIDs, id)
		}
	}

	if len(issueIDs) == 0 {
		return DoctorCheck{
			Name:    "Issue IDs",
			Status:  StatusOK,
			Message: "No issues yet (will use hash-based IDs)",
		}
	}

	// Detect ID format using robust heuristic
	if DetectHashBasedIDs(db, issueIDs) {
		return DoctorCheck{
			Name:    "Issue IDs",
			Status:  StatusOK,
			Message: "hash-based ✓",
		}
	}

	// Sequential IDs - recommend migration
	return DoctorCheck{
		Name:    "Issue IDs",
		Status:  StatusWarning,
		Message: "sequential (e.g., bd-1, bd-2, ...)",
		Fix:     "Run 'bd migrate --to-hash-ids' to upgrade (prevents ID collisions in multi-worker scenarios)",
	}
}

// CheckDependencyCycles checks for circular dependencies in the issue graph
func CheckDependencyCycles(path string) DoctorCheck {
	beadsDir := filepath.Join(path, ".beads")
	dbPath := filepath.Join(beadsDir, beads.CanonicalDatabaseName)

	// If no database, skip this check
	if _, err := os.Stat(dbPath); os.IsNotExist(err) {
		return DoctorCheck{
			Name:    "Dependency Cycles",
			Status:  StatusOK,
			Message: "N/A (no database)",
		}
	}

	// Open database to check for cycles
	db, err := sql.Open("sqlite3", dbPath)
	if err != nil {
		return DoctorCheck{
			Name:    "Dependency Cycles",
			Status:  StatusWarning,
			Message: "Unable to open database",
			Detail:  err.Error(),
		}
	}
	defer db.Close()

	// Query for cycles using simplified SQL
	query := `
		WITH RECURSIVE paths AS (
			SELECT
				issue_id,
				depends_on_id,
				issue_id as start_id,
				issue_id || '→' || depends_on_id as path,
				0 as depth
			FROM dependencies

			UNION ALL

			SELECT
				d.issue_id,
				d.depends_on_id,
				p.start_id,
				p.path || '→' || d.depends_on_id,
				p.depth + 1
			FROM dependencies d
			JOIN paths p ON d.issue_id = p.depends_on_id
			WHERE p.depth < 100
			  AND p.path NOT LIKE '%' || d.depends_on_id || '→%'
		)
		SELECT DISTINCT start_id
		FROM paths
		WHERE depends_on_id = start_id`

	rows, err := db.Query(query)
	if err != nil {
		return DoctorCheck{
			Name:    "Dependency Cycles",
			Status:  StatusWarning,
			Message: "Unable to check for cycles",
			Detail:  err.Error(),
		}
	}
	defer rows.Close()

	cycleCount := 0
	var firstCycle string
	for rows.Next() {
		var startID string
		if err := rows.Scan(&startID); err != nil {
			continue
		}
		cycleCount++
		if cycleCount == 1 {
			firstCycle = startID
		}
	}

	if cycleCount == 0 {
		return DoctorCheck{
			Name:    "Dependency Cycles",
			Status:  StatusOK,
			Message: "No circular dependencies detected",
		}
	}

	return DoctorCheck{
		Name:    "Dependency Cycles",
		Status:  StatusError,
		Message: fmt.Sprintf("Found %d circular dependency cycle(s)", cycleCount),
		Detail:  fmt.Sprintf("First cycle involves: %s", firstCycle),
		Fix:     "Run 'bd dep cycles' to see full cycle paths, then 'bd dep remove' to break cycles",
	}
}

// CheckTombstones checks the health of tombstone records
// Reports: total tombstones, expiring soon (within 7 days), already expired
func CheckTombstones(path string) DoctorCheck {
	beadsDir := filepath.Join(path, ".beads")
	dbPath := filepath.Join(beadsDir, beads.CanonicalDatabaseName)

	// Skip if database doesn't exist
	if _, err := os.Stat(dbPath); os.IsNotExist(err) {
		return DoctorCheck{
			Name:    "Tombstones",
			Status:  StatusOK,
			Message: "N/A (no database)",
		}
	}

	db, err := sql.Open("sqlite3", dbPath)
	if err != nil {
		return DoctorCheck{
			Name:    "Tombstones",
			Status:  StatusWarning,
			Message: "Unable to open database",
			Detail:  err.Error(),
		}
	}
	defer db.Close()

	// Query tombstone statistics
	var totalTombstones int
	err = db.QueryRow("SELECT COUNT(*) FROM issues WHERE status = 'tombstone'").Scan(&totalTombstones)
	if err != nil {
		// Might be old schema without tombstone support
		return DoctorCheck{
			Name:    "Tombstones",
			Status:  StatusOK,
			Message: "N/A (schema may not support tombstones)",
		}
	}

	if totalTombstones == 0 {
		return DoctorCheck{
			Name:    "Tombstones",
			Status:  StatusOK,
			Message: "None (no deleted issues)",
		}
	}

	// Check for tombstones expiring within 7 days
	// Default TTL is 30 days, so expiring soon means deleted_at older than 23 days ago
	expiringThreshold := time.Now().Add(-23 * 24 * time.Hour).Format(time.RFC3339)
	expiredThreshold := time.Now().Add(-30 * 24 * time.Hour).Format(time.RFC3339)

	var expiringSoon, alreadyExpired int
	err = db.QueryRow(`
		SELECT COUNT(*) FROM issues
		WHERE status = 'tombstone'
		AND deleted_at IS NOT NULL
		AND deleted_at < ?
		AND deleted_at >= ?
	`, expiringThreshold, expiredThreshold).Scan(&expiringSoon)
	if err != nil {
		expiringSoon = 0
	}

	err = db.QueryRow(`
		SELECT COUNT(*) FROM issues
		WHERE status = 'tombstone'
		AND deleted_at IS NOT NULL
		AND deleted_at < ?
	`, expiredThreshold).Scan(&alreadyExpired)
	if err != nil {
		alreadyExpired = 0
	}

	// Build status message
	if alreadyExpired > 0 {
		return DoctorCheck{
			Name:    "Tombstones",
			Status:  StatusWarning,
			Message: fmt.Sprintf("%d total, %d expired", totalTombstones, alreadyExpired),
			Detail:  "Expired tombstones will be removed on next compact",
			Fix:     "Run 'bd compact' to prune expired tombstones",
		}
	}

	if expiringSoon > 0 {
		return DoctorCheck{
			Name:    "Tombstones",
			Status:  StatusOK,
			Message: fmt.Sprintf("%d total, %d expiring within 7 days", totalTombstones, expiringSoon),
		}
	}

	return DoctorCheck{
		Name:    "Tombstones",
		Status:  StatusOK,
		Message: fmt.Sprintf("%d total", totalTombstones),
	}
}

// CheckDeletionsManifest checks the status of deletions.jsonl and suggests migration to tombstones
func CheckDeletionsManifest(path string) DoctorCheck {
	beadsDir := filepath.Join(path, ".beads")

	// Skip if .beads doesn't exist
	if _, err := os.Stat(beadsDir); os.IsNotExist(err) {
		return DoctorCheck{
			Name:    "Deletions Manifest",
			Status:  StatusOK,
			Message: "N/A (no .beads directory)",
		}
	}

	// Check if we're in a git repository using worktree-aware detection
	_, err := git.GetGitDir()
	if err != nil {
		return DoctorCheck{
			Name:    "Deletions Manifest",
			Status:  StatusOK,
			Message: "N/A (not a git repository)",
		}
	}

	deletionsPath := filepath.Join(beadsDir, "deletions.jsonl")

	// Check if deletions.jsonl exists
	info, err := os.Stat(deletionsPath)
	if err == nil {
		// File exists - count entries (empty file is valid, means no deletions)
		if info.Size() == 0 {
			return DoctorCheck{
				Name:    "Deletions Manifest",
				Status:  StatusOK,
				Message: "Empty (no legacy deletions)",
			}
		}
		file, err := os.Open(deletionsPath) // #nosec G304 - controlled path
		if err == nil {
			defer file.Close()
			count := 0
			scanner := bufio.NewScanner(file)
			for scanner.Scan() {
				if len(scanner.Bytes()) > 0 {
					count++
				}
			}
			// Suggest migration to inline tombstones
			if count > 0 {
				return DoctorCheck{
					Name:    "Deletions Manifest",
					Status:  StatusWarning,
					Message: fmt.Sprintf("Legacy format (%d entries)", count),
					Detail:  "deletions.jsonl is deprecated in favor of inline tombstones",
					Fix:     "Run 'bd migrate-tombstones' to convert to inline tombstones",
				}
			}
			return DoctorCheck{
				Name:    "Deletions Manifest",
				Status:  StatusOK,
				Message: "Empty (no legacy deletions)",
			}
		}
	}

	// deletions.jsonl doesn't exist - this is the expected state with tombstones
	// Check for .migrated file to confirm migration happened
	migratedPath := filepath.Join(beadsDir, "deletions.jsonl.migrated")
	if _, err := os.Stat(migratedPath); err == nil {
		return DoctorCheck{
			Name:    "Deletions Manifest",
			Status:  StatusOK,
			Message: "Migrated to tombstones",
		}
	}

	// No deletions.jsonl and no .migrated file - check if JSONL exists
	jsonlPath := filepath.Join(beadsDir, "issues.jsonl")
	if _, err := os.Stat(jsonlPath); os.IsNotExist(err) {
		jsonlPath = filepath.Join(beadsDir, "beads.jsonl")
		if _, err := os.Stat(jsonlPath); os.IsNotExist(err) {
			return DoctorCheck{
				Name:    "Deletions Manifest",
				Status:  StatusOK,
				Message: "N/A (no JSONL file)",
			}
		}
	}

	// JSONL exists but no deletions tracking - this is fine for new repos using tombstones
	return DoctorCheck{
		Name:    "Deletions Manifest",
		Status:  StatusOK,
		Message: "Using inline tombstones",
	}
}

// Fix functions

// FixMigrateTombstones converts legacy deletions.jsonl entries to inline tombstones
func FixMigrateTombstones(path string) error {
	return fix.MigrateTombstones(path)
}

// Helper functions

// DetectHashBasedIDs uses multiple heuristics to determine if the database uses hash-based IDs.
// This is more robust than checking a single ID's format, since base36 hash IDs can be all-numeric.
func DetectHashBasedIDs(db *sql.DB, sampleIDs []string) bool {
	// Heuristic 1: Check for child_counters table (added for hash ID support)
	var tableName string
	err := db.QueryRow(`
		SELECT name FROM sqlite_master
		WHERE type='table' AND name='child_counters'
	`).Scan(&tableName)
	if err == nil {
		// child_counters table exists - this is a strong indicator of hash IDs
		return true
	}

	// Heuristic 2: Check if any sample ID clearly contains letters (a-z)
	// Hash IDs use base36 (0-9, a-z), sequential IDs are purely numeric
	for _, id := range sampleIDs {
		if isHashID(id) {
			return true
		}
	}

	// Heuristic 3: Look for patterns that indicate hash IDs
	if len(sampleIDs) >= 2 {
		// Extract suffixes (part after prefix-) for analysis
		var suffixes []string
		for _, id := range sampleIDs {
			parts := strings.SplitN(id, "-", 2)
			if len(parts) == 2 {
				// Strip hierarchical suffix like .1 or .1.2
				baseSuffix := strings.Split(parts[1], ".")[0]
				suffixes = append(suffixes, baseSuffix)
			}
		}

		if len(suffixes) >= 2 {
			// Check for variable lengths (strong indicator of adaptive hash IDs)
			// BUT: sequential IDs can also have variable length (1, 10, 100)
			// So we need to check if the length variation is natural (1→2→3 digits)
			// or random (3→8→4 chars typical of adaptive hash IDs)
			lengths := make(map[int]int) // length -> count
			for _, s := range suffixes {
				lengths[len(s)]++
			}

			// If we have 3+ different lengths, likely hash IDs (adaptive length)
			// Sequential IDs typically have 1-2 lengths (e.g., 1-9, 10-99, 100-999)
			if len(lengths) >= 3 {
				return true
			}

			// Check for leading zeros (rare in sequential IDs, common in hash IDs)
			// Sequential IDs: bd-1, bd-2, bd-10, bd-100
			// Hash IDs: bd-0088, bd-02a4, bd-05a1
			hasLeadingZero := false
			for _, s := range suffixes {
				if len(s) > 1 && s[0] == '0' {
					hasLeadingZero = true
					break
				}
			}
			if hasLeadingZero {
				return true
			}

			// Check for non-sequential ordering
			// Try to parse as integers - if they're not sequential, likely hash IDs
			allNumeric := true
			var nums []int
			for _, s := range suffixes {
				var num int
				if _, err := fmt.Sscanf(s, "%d", &num); err == nil {
					nums = append(nums, num)
				} else {
					allNumeric = false
					break
				}
			}

			if allNumeric && len(nums) >= 2 {
				// Check if they form a roughly sequential pattern (1,2,3 or 10,11,12)
				// Hash IDs would be more random (e.g., 88, 13452, 676)
				isSequentialPattern := true
				for i := 1; i < len(nums); i++ {
					diff := nums[i] - nums[i-1]
					// Allow for some gaps (deleted issues), but should be mostly sequential
					if diff < 0 || diff > 100 {
						isSequentialPattern = false
						break
					}
				}
				// If the numbers are NOT sequential, they're likely hash IDs
				if !isSequentialPattern {
					return true
				}
			}
		}
	}

	// If we can't determine for sure, default to assuming sequential IDs
	// This is conservative - better to recommend migration than miss sequential IDs
	return false
}

// isHashID checks if a single ID contains hash characteristics
// Hash IDs contain hex letters (a-f), sequential IDs are only digits
// May have hierarchical suffix like .1 or .1.2
func isHashID(id string) bool {
	lastSeperatorIndex := strings.LastIndex(id, "-")
	if lastSeperatorIndex == -1 {
		return false
	}

	suffix := id[lastSeperatorIndex+1:]
	// Strip hierarchical suffix like .1 or .1.2
	baseSuffix := strings.Split(suffix, ".")[0]

	if len(baseSuffix) == 0 {
		return false
	}

	// Must be valid Base36 (0-9, a-z)
	if !regexp.MustCompile(`^[0-9a-z]+$`).MatchString(baseSuffix) {
		return false
	}

	// If it's 5+ characters long, it's almost certainly a hash ID
	// (sequential IDs rarely exceed 9999 = 4 digits)
	if len(baseSuffix) >= 5 {
		return true
	}

	// For shorter IDs, check if it contains any letter (a-z)
	// Sequential IDs are purely numeric
	return regexp.MustCompile(`[a-z]`).MatchString(baseSuffix)
}