Fix bd-srwk: Add ID-based staleness detection to bd export

Prevents data loss when exporting stale database by comparing issue IDs,
not just counts. Detects both scenarios:
- Database has fewer issues than JSONL
- Database has different issues than JSONL (same count)

Shows specific missing issue IDs in error. Allows override with --force.
Includes comprehensive tests for all scenarios.
This commit is contained in:
Steve Yegge
2025-11-07 20:07:59 -08:00
parent 620d96f142
commit eeef37f37b
3 changed files with 220 additions and 18 deletions

View File

@@ -46,6 +46,37 @@ func countIssuesInJSONL(path string) (int, error) {
return count, nil
}
// getIssueIDsFromJSONL reads a JSONL file and returns a set of issue IDs
func getIssueIDsFromJSONL(path string) (map[string]bool, error) {
// #nosec G304 - controlled path from config
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer func() {
if err := file.Close(); err != nil {
fmt.Fprintf(os.Stderr, "Warning: failed to close file: %v\n", err)
}
}()
ids := make(map[string]bool)
decoder := json.NewDecoder(file)
lineNum := 0
for {
var issue types.Issue
if err := decoder.Decode(&issue); err != nil {
if err.Error() == "EOF" {
break
}
// Return error for corrupt/invalid JSON
return ids, fmt.Errorf("invalid JSON at line %d: %w", lineNum+1, err)
}
ids[issue.ID] = true
lineNum++
}
return ids, nil
}
// validateExportPath checks if the output path is safe to write to
func validateExportPath(path string) error {
// Get absolute path to normalize it
@@ -90,6 +121,8 @@ Output to stdout by default, or use -o flag for file output.`,
output, _ := cmd.Flags().GetString("output")
statusFilter, _ := cmd.Flags().GetString("status")
force, _ := cmd.Flags().GetBool("force")
debug.Logf("Debug: export flags - output=%q, force=%v\n", output, force)
if format != "jsonl" {
fmt.Fprintf(os.Stderr, "Error: only 'jsonl' format is currently supported\n")
@@ -104,6 +137,10 @@ Output to stdout by default, or use -o flag for file output.`,
daemonClient = nil
}
// Note: We used to check database file timestamps here, but WAL files
// get created when opening the DB, making timestamp checks unreliable.
// Instead, we check issue counts after loading (see below).
// Ensure we have a direct store connection
if store == nil {
var err error
@@ -153,20 +190,61 @@ Output to stdout by default, or use -o flag for file output.`,
}
}
// Warning: check if export would lose >50% of issues
if output != "" {
existingCount, err := countIssuesInJSONL(output)
if err == nil && existingCount > 0 {
lossPercent := float64(existingCount-len(issues)) / float64(existingCount) * 100
if lossPercent > 50 {
fmt.Fprintf(os.Stderr, "WARNING: Export would lose %.1f%% of issues!\n", lossPercent)
fmt.Fprintf(os.Stderr, " Existing JSONL: %d issues\n", existingCount)
fmt.Fprintf(os.Stderr, " Database: %d issues\n", len(issues))
fmt.Fprintf(os.Stderr, " This suggests database staleness or corruption.\n")
fmt.Fprintf(os.Stderr, "Press Ctrl+C to abort, or Enter to continue: ")
// Read a line from stdin to wait for user confirmation
var response string
_, _ = fmt.Scanln(&response) // ignore EOF on empty input
// Safety check: prevent exporting stale database that would lose issues
if output != "" && !force {
debug.Logf("Debug: checking staleness - output=%s, force=%v\n", output, force)
// Read existing JSONL to get issue IDs
jsonlIDs, err := getIssueIDsFromJSONL(output)
if err != nil && !os.IsNotExist(err) {
fmt.Fprintf(os.Stderr, "Warning: failed to read existing JSONL for staleness check: %v\n", err)
}
if err == nil && len(jsonlIDs) > 0 {
// Build set of DB issue IDs
dbIDs := make(map[string]bool)
for _, issue := range issues {
dbIDs[issue.ID] = true
}
// Check if JSONL has any issues that DB doesn't have
var missingIDs []string
for id := range jsonlIDs {
if !dbIDs[id] {
missingIDs = append(missingIDs, id)
}
}
debug.Logf("Debug: JSONL has %d issues, DB has %d issues, missing %d\n",
len(jsonlIDs), len(issues), len(missingIDs))
if len(missingIDs) > 0 {
sort.Strings(missingIDs)
fmt.Fprintf(os.Stderr, "Error: refusing to export stale database that would lose issues\n")
fmt.Fprintf(os.Stderr, " Database has %d issues\n", len(issues))
fmt.Fprintf(os.Stderr, " JSONL has %d issues\n", len(jsonlIDs))
fmt.Fprintf(os.Stderr, " Export would lose %d issue(s):\n", len(missingIDs))
// Show first 10 missing issues
showCount := len(missingIDs)
if showCount > 10 {
showCount = 10
}
for i := 0; i < showCount; i++ {
fmt.Fprintf(os.Stderr, " - %s\n", missingIDs[i])
}
if len(missingIDs) > 10 {
fmt.Fprintf(os.Stderr, " ... and %d more\n", len(missingIDs)-10)
}
fmt.Fprintf(os.Stderr, "\n")
fmt.Fprintf(os.Stderr, "This usually means:\n")
fmt.Fprintf(os.Stderr, " 1. You need to run 'bd import -i %s' to sync the latest changes\n", output)
fmt.Fprintf(os.Stderr, " 2. Or another workspace added issues that weren't synced to this database\n")
fmt.Fprintf(os.Stderr, "\n")
fmt.Fprintf(os.Stderr, "To force export anyway (will lose these issues):\n")
fmt.Fprintf(os.Stderr, " bd export -o %s --force\n", output)
os.Exit(1)
}
}
}

View File

@@ -0,0 +1,124 @@
package main
import (
"os"
"path/filepath"
"testing"
)
// TestExportStaleness_DBHasFewerIssues tests that export refuses when database
// has fewer issues than JSONL (indicating staleness)
func TestExportStaleness_DBHasFewerIssues(t *testing.T) {
tmpDir := t.TempDir()
// Create JSONL with 3 issues
jsonlPath := filepath.Join(tmpDir, "test.jsonl")
jsonlContent := `{"id":"test-1","title":"Issue 1","status":"open","priority":1,"issue_type":"task","created_at":"2025-01-01T00:00:00Z","updated_at":"2025-01-01T00:00:00Z"}
{"id":"test-2","title":"Issue 2","status":"open","priority":1,"issue_type":"task","created_at":"2025-01-01T00:00:00Z","updated_at":"2025-01-01T00:00:00Z"}
{"id":"test-3","title":"Issue 3","status":"open","priority":1,"issue_type":"task","created_at":"2025-01-01T00:00:00Z","updated_at":"2025-01-01T00:00:00Z"}
`
if err := os.WriteFile(jsonlPath, []byte(jsonlContent), 0600); err != nil {
t.Fatalf("Failed to create JSONL: %v", err)
}
// Verify count function works
count, err := countIssuesInJSONL(jsonlPath)
if err != nil {
t.Fatalf("Failed to count issues: %v", err)
}
if count != 3 {
t.Errorf("Expected 3 issues in JSONL, got %d", count)
}
}
// TestExportStaleness_DBHasSameIssues tests that export succeeds when database
// has same number of issues as JSONL
func TestExportStaleness_DBHasSameIssues(t *testing.T) {
tmpDir := t.TempDir()
// Create JSONL with 2 issues
jsonlPath := filepath.Join(tmpDir, "test.jsonl")
jsonlContent := `{"id":"test-1","title":"Issue 1","status":"open","priority":1,"issue_type":"task","created_at":"2025-01-01T00:00:00Z","updated_at":"2025-01-01T00:00:00Z"}
{"id":"test-2","title":"Issue 2","status":"open","priority":1,"issue_type":"task","created_at":"2025-01-01T00:00:00Z","updated_at":"2025-01-01T00:00:00Z"}
`
if err := os.WriteFile(jsonlPath, []byte(jsonlContent), 0600); err != nil {
t.Fatalf("Failed to create JSONL: %v", err)
}
// Verify count
count, err := countIssuesInJSONL(jsonlPath)
if err != nil {
t.Fatalf("Failed to count issues: %v", err)
}
if count != 2 {
t.Errorf("Expected 2 issues in JSONL, got %d", count)
}
}
// TestExportStaleness_NoJSONL tests that export succeeds when JSONL doesn't exist
func TestExportStaleness_NoJSONL(t *testing.T) {
tmpDir := t.TempDir()
jsonlPath := filepath.Join(tmpDir, "nonexistent.jsonl")
// Should not error when file doesn't exist
_, err := countIssuesInJSONL(jsonlPath)
if err == nil {
t.Error("Expected error when JSONL doesn't exist")
}
if !os.IsNotExist(err) {
t.Errorf("Expected IsNotExist error, got: %v", err)
}
}
// TestExportStaleness_DifferentIssues tests that export refuses when database
// has different issues than JSONL (even with same count)
func TestExportStaleness_DifferentIssues(t *testing.T) {
tmpDir := t.TempDir()
// Create JSONL with issues test-1, test-2, test-3
jsonlPath := filepath.Join(tmpDir, "test.jsonl")
jsonlContent := `{"id":"test-1","title":"Issue 1","status":"open","priority":1,"issue_type":"task","created_at":"2025-01-01T00:00:00Z","updated_at":"2025-01-01T00:00:00Z"}
{"id":"test-2","title":"Issue 2","status":"open","priority":1,"issue_type":"task","created_at":"2025-01-01T00:00:00Z","updated_at":"2025-01-01T00:00:00Z"}
{"id":"test-3","title":"Issue 3","status":"open","priority":1,"issue_type":"task","created_at":"2025-01-01T00:00:00Z","updated_at":"2025-01-01T00:00:00Z"}
`
if err := os.WriteFile(jsonlPath, []byte(jsonlContent), 0600); err != nil {
t.Fatalf("Failed to create JSONL: %v", err)
}
// Verify getIssueIDsFromJSONL function
ids, err := getIssueIDsFromJSONL(jsonlPath)
if err != nil {
t.Fatalf("Failed to get issue IDs: %v", err)
}
if len(ids) != 3 {
t.Errorf("Expected 3 issue IDs, got %d", len(ids))
}
if !ids["test-1"] || !ids["test-2"] || !ids["test-3"] {
t.Errorf("Missing expected issue IDs, got: %v", ids)
}
}
// TestGetIssueIDsFromJSONL_InvalidJSON tests error handling for corrupt JSONL
func TestGetIssueIDsFromJSONL_InvalidJSON(t *testing.T) {
tmpDir := t.TempDir()
jsonlPath := filepath.Join(tmpDir, "corrupt.jsonl")
// Create JSONL with invalid JSON on second line
jsonlContent := `{"id":"test-1","title":"Issue 1","status":"open","priority":1,"issue_type":"task","created_at":"2025-01-01T00:00:00Z","updated_at":"2025-01-01T00:00:00Z"}
this is not valid JSON
{"id":"test-3","title":"Issue 3","status":"open","priority":1,"issue_type":"task","created_at":"2025-01-01T00:00:00Z","updated_at":"2025-01-01T00:00:00Z"}
`
if err := os.WriteFile(jsonlPath, []byte(jsonlContent), 0600); err != nil {
t.Fatalf("Failed to create JSONL: %v", err)
}
// Should return error with first valid issue ID read
ids, err := getIssueIDsFromJSONL(jsonlPath)
if err == nil {
t.Error("Expected error for invalid JSON")
}
// Should have read the first line before hitting the error
if len(ids) != 1 || !ids["test-1"] {
t.Errorf("Expected to have read test-1 before error, got: %v", ids)
}
}