Add database fingerprinting and validation (bd-166)

- Add fingerprint.go with robust URL canonicalization
  - Handles git@, ssh://, https://, http://, file://, and local paths
  - Normalizes URLs to produce consistent repo_id across formats
  - Clone ID uses git repo root for stability

- Update init.go to store repo_id and clone_id metadata
  - repo_id: SHA256 hash of canonical git remote URL
  - clone_id: SHA256 hash of hostname + repo root path

- Add daemon validation to prevent database mismatches
  - Validates repo_id on daemon start
  - Fails on legacy databases (requires explicit migration)
  - Clear error messages with actionable solutions

- Add migrate --update-repo-id command
  - Updates repo_id after remote URL changes
  - Confirmation prompt (can bypass with --yes)
  - Supports --dry-run

Prevents accidental database mixing across repos and provides
migration path for remote URL changes or bd upgrades.

Closes bd-166

Amp-Thread-ID: https://ampcode.com/threads/T-a9d9dab1-5808-4f62-93ea-75a16cca978b
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-26 21:15:57 -07:00
parent f24573a5f8
commit 79b7a48a73
5 changed files with 378 additions and 4 deletions

File diff suppressed because one or more lines are too long

View File

@@ -847,6 +847,71 @@ func (d *daemonLogger) log(format string, args ...interface{}) {
d.logFunc(format, args...)
}
// validateDatabaseFingerprint checks that the database belongs to this repository
func validateDatabaseFingerprint(store storage.Storage, log *daemonLogger) error {
ctx := context.Background()
// Get stored repo ID
storedRepoID, err := store.GetMetadata(ctx, "repo_id")
if err != nil && err.Error() != "metadata key not found: repo_id" {
return fmt.Errorf("failed to read repo_id: %w", err)
}
// If no repo_id, this is a legacy database - require explicit migration
if storedRepoID == "" {
return fmt.Errorf(`
LEGACY DATABASE DETECTED!
This database was created before version 0.17.5 and lacks a repository fingerprint.
To continue using this database, you must explicitly set its repository ID:
bd migrate --update-repo-id
This ensures the database is bound to this repository and prevents accidental
database sharing between different repositories.
If this is a fresh clone, run:
rm -rf .beads && bd init
Note: Auto-claiming legacy databases is intentionally disabled to prevent
silent corruption when databases are copied between repositories.
`)
}
// Validate repo ID matches current repository
currentRepoID, err := beads.ComputeRepoID()
if err != nil {
log.log("Warning: could not compute current repository ID: %v", err)
return nil
}
if storedRepoID != currentRepoID {
return fmt.Errorf(`
DATABASE MISMATCH DETECTED!
This database belongs to a different repository:
Database repo ID: %s
Current repo ID: %s
This usually means:
1. You copied a .beads directory from another repo (don't do this!)
2. Git remote URL changed (run 'bd migrate --update-repo-id')
3. Database corruption
4. bd was upgraded and URL canonicalization changed
Solutions:
- If remote URL changed: bd migrate --update-repo-id
- If bd was upgraded: bd migrate --update-repo-id
- If wrong database: rm -rf .beads && bd init
- If correct database: BEADS_IGNORE_REPO_MISMATCH=1 bd daemon
(Warning: This can cause data corruption across clones!)
`, storedRepoID[:8], currentRepoID[:8])
}
log.log("Repository fingerprint validated: %s", currentRepoID[:8])
return nil
}
func setupDaemonLogger(logPath string) (*lumberjack.Logger, daemonLogger) {
maxSizeMB := getEnvInt("BEADS_DAEMON_LOG_MAX_SIZE", 10)
maxBackups := getEnvInt("BEADS_DAEMON_LOG_MAX_BACKUPS", 3)
@@ -1189,6 +1254,15 @@ func runDaemonLoop(interval time.Duration, autoCommit, autoPush bool, logPath, p
defer func() { _ = store.Close() }()
log.log("Database opened: %s", daemonDBPath)
// Validate database fingerprint
if err := validateDatabaseFingerprint(store, &log); err != nil {
if os.Getenv("BEADS_IGNORE_REPO_MISMATCH") != "1" {
log.log("Error: %v", err)
os.Exit(1)
}
log.log("Warning: repository mismatch ignored (BEADS_IGNORE_REPO_MISMATCH=1)")
}
// Validate schema version matches daemon version
versionCtx := context.Background()
dbVersion, err := store.GetMetadata(versionCtx, "bd_version")

View File

@@ -131,9 +131,9 @@ bd.db
// Set the issue prefix in config
ctx := context.Background()
if err := store.SetConfig(ctx, "issue_prefix", prefix); err != nil {
fmt.Fprintf(os.Stderr, "Error: failed to set issue prefix: %v\n", err)
_ = store.Close()
os.Exit(1)
fmt.Fprintf(os.Stderr, "Error: failed to set issue prefix: %v\n", err)
_ = store.Close()
os.Exit(1)
}
// Store the bd version in metadata (for version mismatch detection)
@@ -142,6 +142,34 @@ bd.db
// Non-fatal - continue anyway
}
// Compute and store repository fingerprint
repoID, err := beads.ComputeRepoID()
if err != nil {
if !quiet {
fmt.Fprintf(os.Stderr, "Warning: could not compute repository ID: %v\n", err)
}
} else {
if err := store.SetMetadata(ctx, "repo_id", repoID); err != nil {
fmt.Fprintf(os.Stderr, "Warning: failed to set repo_id: %v\n", err)
} else if !quiet {
fmt.Printf(" Repository ID: %s\n", repoID[:8])
}
}
// Store clone-specific ID
cloneID, err := beads.GetCloneID()
if err != nil {
if !quiet {
fmt.Fprintf(os.Stderr, "Warning: could not compute clone ID: %v\n", err)
}
} else {
if err := store.SetMetadata(ctx, "clone_id", cloneID); err != nil {
fmt.Fprintf(os.Stderr, "Warning: failed to set clone_id: %v\n", err)
} else if !quiet {
fmt.Printf(" Clone ID: %s\n", cloneID)
}
}
// Create config.json for explicit configuration
if useLocalBeads {
cfg := configfile.DefaultConfig(Version)

View File

@@ -30,6 +30,13 @@ This command:
autoYes, _ := cmd.Flags().GetBool("yes")
cleanup, _ := cmd.Flags().GetBool("cleanup")
dryRun, _ := cmd.Flags().GetBool("dry-run")
updateRepoID, _ := cmd.Flags().GetBool("update-repo-id")
// Handle --update-repo-id first
if updateRepoID {
handleUpdateRepoID(dryRun, autoYes)
return
}
// Find .beads directory
beadsDir := findBeadsDir()
@@ -364,9 +371,131 @@ func formatDBList(dbs []*dbInfo) []map[string]string {
return result
}
func handleUpdateRepoID(dryRun bool, autoYes bool) {
// Find database
foundDB := beads.FindDatabasePath()
if foundDB == "" {
if jsonOutput {
outputJSON(map[string]interface{}{
"error": "no_database",
"message": "No beads database found. Run 'bd init' first.",
})
} else {
fmt.Fprintf(os.Stderr, "Error: no beads database found\n")
fmt.Fprintf(os.Stderr, "Hint: run 'bd init' to initialize bd\n")
}
os.Exit(1)
}
// Compute new repo ID
newRepoID, err := beads.ComputeRepoID()
if err != nil {
if jsonOutput {
outputJSON(map[string]interface{}{
"error": "compute_failed",
"message": err.Error(),
})
} else {
fmt.Fprintf(os.Stderr, "Error: failed to compute repository ID: %v\n", err)
}
os.Exit(1)
}
// Open database
store, err := sqlite.New(foundDB)
if err != nil {
if jsonOutput {
outputJSON(map[string]interface{}{
"error": "open_failed",
"message": err.Error(),
})
} else {
fmt.Fprintf(os.Stderr, "Error: failed to open database: %v\n", err)
}
os.Exit(1)
}
defer store.Close()
// Get old repo ID
ctx := context.Background()
oldRepoID, err := store.GetMetadata(ctx, "repo_id")
if err != nil && err.Error() != "metadata key not found: repo_id" {
if jsonOutput {
outputJSON(map[string]interface{}{
"error": "read_failed",
"message": err.Error(),
})
} else {
fmt.Fprintf(os.Stderr, "Error: failed to read repo_id: %v\n", err)
}
os.Exit(1)
}
oldDisplay := "none"
if len(oldRepoID) >= 8 {
oldDisplay = oldRepoID[:8]
}
if dryRun {
if jsonOutput {
outputJSON(map[string]interface{}{
"dry_run": true,
"old_repo_id": oldDisplay,
"new_repo_id": newRepoID[:8],
})
} else {
fmt.Println("Dry run mode - no changes will be made")
fmt.Printf("Would update repository ID:\n")
fmt.Printf(" Old: %s\n", oldDisplay)
fmt.Printf(" New: %s\n", newRepoID[:8])
}
return
}
// Prompt for confirmation if repo_id exists and differs
if oldRepoID != "" && oldRepoID != newRepoID && !autoYes && !jsonOutput {
fmt.Printf("WARNING: Changing repository ID can break sync if other clones exist.\n\n")
fmt.Printf("Current repo ID: %s\n", oldDisplay)
fmt.Printf("New repo ID: %s\n\n", newRepoID[:8])
fmt.Printf("Continue? [y/N] ")
var response string
fmt.Scanln(&response)
if strings.ToLower(response) != "y" && strings.ToLower(response) != "yes" {
fmt.Println("Cancelled")
return
}
}
// Update repo ID
if err := store.SetMetadata(ctx, "repo_id", newRepoID); err != nil {
if jsonOutput {
outputJSON(map[string]interface{}{
"error": "update_failed",
"message": err.Error(),
})
} else {
fmt.Fprintf(os.Stderr, "Error: failed to update repo_id: %v\n", err)
}
os.Exit(1)
}
if jsonOutput {
outputJSON(map[string]interface{}{
"status": "success",
"old_repo_id": oldDisplay,
"new_repo_id": newRepoID[:8],
})
} else {
color.Green("✓ Repository ID updated\n\n")
fmt.Printf(" Old: %s\n", oldDisplay)
fmt.Printf(" New: %s\n", newRepoID[:8])
}
}
func init() {
migrateCmd.Flags().Bool("yes", false, "Auto-confirm cleanup prompts")
migrateCmd.Flags().Bool("cleanup", false, "Remove old database files after migration")
migrateCmd.Flags().Bool("dry-run", false, "Show what would be done without making changes")
migrateCmd.Flags().Bool("update-repo-id", false, "Update repository ID (use after changing git remote)")
rootCmd.AddCommand(migrateCmd)
}

143
fingerprint.go Normal file
View File

@@ -0,0 +1,143 @@
package beads
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"net/url"
"os"
"os/exec"
"path/filepath"
"strings"
)
// ComputeRepoID generates a unique identifier for this git repository
func ComputeRepoID() (string, error) {
cmd := exec.Command("git", "config", "--get", "remote.origin.url")
output, err := cmd.Output()
if err != nil {
cmd = exec.Command("git", "rev-parse", "--show-toplevel")
output, err = cmd.Output()
if err != nil {
return "", fmt.Errorf("not a git repository")
}
repoPath := strings.TrimSpace(string(output))
absPath, err := filepath.Abs(repoPath)
if err != nil {
absPath = repoPath
}
evalPath, err := filepath.EvalSymlinks(absPath)
if err != nil {
evalPath = absPath
}
normalized := filepath.ToSlash(evalPath)
hash := sha256.Sum256([]byte(normalized))
return hex.EncodeToString(hash[:16]), nil
}
repoURL := strings.TrimSpace(string(output))
canonical, err := canonicalizeGitURL(repoURL)
if err != nil {
return "", fmt.Errorf("failed to canonicalize URL: %w", err)
}
hash := sha256.Sum256([]byte(canonical))
return hex.EncodeToString(hash[:16]), nil
}
func canonicalizeGitURL(rawURL string) (string, error) {
rawURL = strings.TrimSpace(rawURL)
if strings.Contains(rawURL, "://") {
u, err := url.Parse(rawURL)
if err != nil {
return "", fmt.Errorf("invalid URL: %w", err)
}
host := strings.ToLower(u.Hostname())
if port := u.Port(); port != "" && port != "22" && port != "80" && port != "443" {
host = host + ":" + port
}
path := strings.TrimRight(u.Path, "/")
path = strings.TrimSuffix(path, ".git")
path = filepath.ToSlash(path)
return host + path, nil
}
// Detect scp-style URLs: [user@]host:path
// Must contain ":" before any "/" and not be a Windows path
colonIdx := strings.Index(rawURL, ":")
slashIdx := strings.Index(rawURL, "/")
if colonIdx > 0 && (slashIdx == -1 || colonIdx < slashIdx) {
// Could be scp-style or Windows path (C:/)
// Windows paths have colon at position 1 and are followed by backslash or forward slash
if colonIdx == 1 && len(rawURL) > 2 && (rawURL[2] == '/' || rawURL[2] == '\\') {
// Windows path, fall through to local path handling
} else {
// scp-style: [user@]host:path
parts := strings.SplitN(rawURL, ":", 2)
if len(parts) == 2 {
hostPart := parts[0]
pathPart := parts[1]
atIdx := strings.LastIndex(hostPart, "@")
if atIdx >= 0 {
hostPart = hostPart[atIdx+1:]
}
host := strings.ToLower(hostPart)
path := strings.TrimRight(pathPart, "/")
path = strings.TrimSuffix(path, ".git")
path = filepath.ToSlash(path)
return host + "/" + path, nil
}
}
}
absPath, err := filepath.Abs(rawURL)
if err != nil {
absPath = rawURL
}
evalPath, err := filepath.EvalSymlinks(absPath)
if err != nil {
evalPath = absPath
}
return filepath.ToSlash(evalPath), nil
}
// GetCloneID generates a unique ID for this specific clone (not shared with other clones)
func GetCloneID() (string, error) {
hostname, err := os.Hostname()
if err != nil {
return "", fmt.Errorf("failed to get hostname: %w", err)
}
cmd := exec.Command("git", "rev-parse", "--show-toplevel")
output, err := cmd.Output()
if err != nil {
return "", fmt.Errorf("not a git repository: %w", err)
}
repoRoot := strings.TrimSpace(string(output))
absPath, err := filepath.Abs(repoRoot)
if err != nil {
absPath = repoRoot
}
evalPath, err := filepath.EvalSymlinks(absPath)
if err != nil {
evalPath = absPath
}
normalizedPath := filepath.ToSlash(evalPath)
hash := sha256.Sum256([]byte(hostname + ":" + normalizedPath))
return hex.EncodeToString(hash[:8]), nil
}