From a898df6915cf9b7cfb6233a6ae4ae2126b0fde73 Mon Sep 17 00:00:00 2001 From: Steve Yegge Date: Sun, 26 Oct 2025 20:29:10 -0700 Subject: [PATCH] WIP: bd-164 timestamp-only export deduplication (~80% complete) Implemented content hash-based deduplication to skip exports when only timestamps changed. Core logic complete, needs export_hashes table wiring. Completed: - Added computeIssueContentHash() excluding timestamps - Created shouldSkipExport() logic - Updated export loop to skip timestamp-only changes - Added hash.go with content hashing - Extended Storage interface Remaining: - Complete export_hashes table migration - Add SetExportHash/GetExportHash to interface - Revert content_hash from dirty_issues approach - Wire up hash persistence in export - Testing See bd-164 notes for details. Amp-Thread-ID: https://ampcode.com/threads/T-d70657d1-4433-4f7e-b10a-3fccf8bf17fb Co-authored-by: Amp --- .beads/bd.jsonl | 2 +- cmd/bd/export.go | 74 +++++++++++++++++++++- internal/storage/sqlite/dependencies.go | 8 +-- internal/storage/sqlite/dirty.go | 82 ++++++++++++++++++++----- internal/storage/sqlite/hash.go | 35 +++++++++++ internal/storage/sqlite/schema.go | 8 +++ internal/storage/sqlite/sqlite.go | 21 ++++++- internal/storage/storage.go | 1 + 8 files changed, 210 insertions(+), 21 deletions(-) create mode 100644 internal/storage/sqlite/hash.go diff --git a/.beads/bd.jsonl b/.beads/bd.jsonl index ebe0a7d3..21d94341 100644 --- a/.beads/bd.jsonl +++ b/.beads/bd.jsonl @@ -69,7 +69,7 @@ {"id":"bd-160","title":"Critical: Multi-clone sync is fundamentally broken","description":"## Problem\n\nTwo clones of the same beads repo working on non-overlapping issues cannot stay in sync. The JSONL export/import mechanism creates catastrophic divergence instead of keeping databases synchronized.\n\n## What Happened (2025-10-26)\n\nTwo repos working simultaneously:\n- ~/src/beads (bd.db, 159 issues) - worked on bd-153, bd-152, bd-150, closed them\n- ~/src/fred/beads (beads.db, 165 issues) - worked on bd-159, bd-160-164\n\nResult after attempting sync:\n- Databases completely diverged (159 vs 165 issues)\n- JSONL files contain conflicting state\n- Database corruption in fred/beads\n- bd-150/152/153 show as closed in one repo, open in the other\n- No clear recovery path without manual database copying\n- git pull + bd sync does NOT synchronize state\n\n## Root Cause Analysis\n\n### SMOKING GUN: Daemon Import is a NO-OP\n**Location**: cmd/bd/daemon.go:791-797\n\nThe daemon's importToJSONLWithStore() function returns nil without actually importing.\nThis means the daemon exports DB to JSONL, commits, pulls from remote, but NEVER imports remote changes back into the database.\n\nResult: Remote changes are pulled but never imported, daemon keeps exporting stale state.\n\n### Other Root Causes\n\n1. **Database naming inconsistency**: One repo uses bd.db, other uses beads.db - no enforcement\n2. **Daemon state divergence**: Each repo's daemon maintains separate state, never converges\n3. **JSONL import/export race conditions**: Auto-import can overwrite local changes before export\n4. **No conflict resolution**: When databases diverge, there's no merge strategy\n5. **Timestamp-only changes**: bd-159 - exports trigger even with no real changes\n6. **Multiple daemons**: No coordination between daemon instances\n\n## Impact\n\n**Beads is unusable for multi-developer or multi-agent workflows**. The core promise - git-based sync via JSONL - is broken.\n\n## Fix Strategy (Epic)\n\nThis issue is tracked as an EPIC with child issues:\n\n### Phase 1: Stop the Bleeding (P0)\n- Implement daemon JSONL import (fixes the NO-OP)\n- Add database integrity checks\n- Fix timestamp-only exports (bd-159)\n\n### Phase 2: Database Consistency (P0)\n- Enforce canonical database naming\n- Add database fingerprinting\n- Migration tooling\n\n### Phase 3: Conflict Resolution (P1)\n- Implement version tracking\n- Three-way merge detection\n- Interactive conflict resolution\n\n### Phase 4: Testing \u0026 Validation (P1)\n- Multi-clone integration tests\n- Stress tests\n- Documentation\n\n## Severity\n\nP0 - This breaks the fundamental use case of beads. Without reliable sync, the tool is unusable for any multi-agent or team scenario.","status":"open","priority":0,"issue_type":"bug","created_at":"2025-10-26T19:42:43.355244-07:00","updated_at":"2025-10-26T19:53:08.681645-07:00"} {"id":"bd-161","title":"Implement daemon JSONL import (fix NO-OP stub)","description":"## Critical Bug\n\nThe daemon's sync loop calls importToJSONLWithStore() but this function is a NO-OP stub that returns nil without importing any changes.\n\n## Location\n\n**File**: cmd/bd/daemon.go:791-797\n\nCurrent implementation:\n```go\nfunc importToJSONLWithStore(ctx context.Context, store storage.Storage, jsonlPath string) error {\n // TODO Phase 4: Implement direct import for daemon\n // Currently a no-op - daemon doesn't import git changes into DB\n return nil\n}\n```\n\n## Impact\n\nThis is the PRIMARY cause of bd-160. When the daemon:\n1. Exports DB → JSONL\n2. Commits changes\n3. Pulls from remote (gets other clone's changes)\n4. Calls importToJSONLWithStore() ← **Does nothing!**\n5. Pushes commits (overwrites remote with stale state)\n\nResult: Perpetual divergence between clones.\n\n## Implementation Approach\n\nReplace the NO-OP with actual import logic:\n\n```go\nfunc importToJSONLWithStore(ctx context.Context, store storage.Storage, jsonlPath string) error {\n // Read JSONL file\n file, err := os.Open(jsonlPath)\n if err != nil {\n return fmt.Errorf(\"failed to open JSONL: %w\", err)\n }\n defer file.Close()\n \n // Parse all issues\n var issues []*types.Issue\n scanner := bufio.NewScanner(file)\n for scanner.Scan() {\n var issue types.Issue\n if err := json.Unmarshal(scanner.Bytes(), \u0026issue); err != nil {\n return fmt.Errorf(\"failed to parse issue: %w\", err)\n }\n issues = append(issues, \u0026issue)\n }\n \n if err := scanner.Err(); err != nil {\n return fmt.Errorf(\"failed to read JSONL: %w\", err)\n }\n \n // Use existing import logic with auto-conflict resolution\n opts := ImportOptions{\n ResolveCollisions: true, // Auto-resolve ID conflicts\n DryRun: false,\n SkipUpdate: false,\n Strict: false,\n }\n \n _, err = importIssuesCore(ctx, \"\", store, issues, opts)\n return err\n}\n```\n\n## Testing\n\nAfter implementation, test with:\n```bash\n# Create two clones\ngit init repo1 \u0026\u0026 cd repo1 \u0026\u0026 bd init \u0026\u0026 bd daemon\nbd new \"Issue A\"\ngit add . \u0026\u0026 git commit -m \"init\"\n\ncd .. \u0026\u0026 git clone repo1 repo2 \u0026\u0026 cd repo2 \u0026\u0026 bd init \u0026\u0026 bd daemon\n\n# Make changes in repo1\ncd ../repo1 \u0026\u0026 bd new \"Issue B\"\n\n# Wait for daemon sync, then check repo2\nsleep 10\ncd ../repo2 \u0026\u0026 bd list # Should show both Issue A and B\n```\n\n## Success Criteria\n\n- Daemon imports remote changes after git pull\n- Issue count converges across clones within one sync cycle\n- No manual intervention needed\n- Existing collision resolution logic handles conflicts\n\n## Estimated Effort\n\n30-60 minutes\n\n## Priority\n\nP0 - This is the critical path fix for bd-160","status":"closed","priority":0,"issue_type":"task","created_at":"2025-10-26T19:53:55.313039-07:00","updated_at":"2025-10-26T20:04:41.902916-07:00","closed_at":"2025-10-26T20:04:41.902916-07:00","dependencies":[{"issue_id":"bd-161","depends_on_id":"bd-160","type":"blocks","created_at":"2025-10-26T19:53:55.3136-07:00","created_by":"daemon"}]} {"id":"bd-162","title":"Add database integrity checks to sync operations","description":"## Problem\n\nWhen databases diverge (due to the import NO-OP bug or race conditions), there are no safety checks to detect or prevent catastrophic data loss.\n\nNeed integrity checks before/after sync operations to catch divergence early.\n\n## Implementation Locations\n\n**Pre-export checks** (cmd/bd/daemon.go:948, sync.go:108):\n- Before exportToJSONLWithStore()\n- Before exportToJSONL()\n\n**Post-import checks** (cmd/bd/daemon.go:985):\n- After importToJSONLWithStore()\n\n## Checks to Implement\n\n### 1. Database vs JSONL Count Divergence\n\nBefore export:\n```go\nfunc validatePreExport(store storage.Storage, jsonlPath string) error {\n dbIssues, _ := store.SearchIssues(ctx, \"\", types.IssueFilter{})\n dbCount := len(dbIssues)\n \n jsonlCount, _ := countIssuesInJSONL(jsonlPath)\n \n if dbCount == 0 \u0026\u0026 jsonlCount \u003e 0 {\n return fmt.Errorf(\"refusing to export empty DB over %d issues in JSONL\", jsonlCount)\n }\n \n divergencePercent := math.Abs(float64(dbCount-jsonlCount)) / float64(jsonlCount) * 100\n if divergencePercent \u003e 50 {\n log.Printf(\"WARNING: DB has %d issues, JSONL has %d (%.1f%% divergence)\", \n dbCount, jsonlCount, divergencePercent)\n log.Printf(\"This suggests sync failure - investigate before proceeding\")\n }\n \n return nil\n}\n```\n\n### 2. Duplicate ID Detection\n\n```go\nfunc checkDuplicateIDs(store storage.Storage) error {\n // Query for duplicate IDs\n rows, _ := db.Query(`\n SELECT id, COUNT(*) as cnt \n FROM issues \n GROUP BY id \n HAVING cnt \u003e 1\n `)\n \n var duplicates []string\n for rows.Next() {\n var id string\n var count int\n rows.Scan(\u0026id, \u0026count)\n duplicates = append(duplicates, fmt.Sprintf(\"%s (x%d)\", id, count))\n }\n \n if len(duplicates) \u003e 0 {\n return fmt.Errorf(\"database corruption: duplicate IDs: %v\", duplicates)\n }\n return nil\n}\n```\n\n### 3. Orphaned Dependencies\n\n```go\nfunc checkOrphanedDeps(store storage.Storage) ([]string, error) {\n // Find dependencies pointing to non-existent issues\n rows, _ := db.Query(`\n SELECT DISTINCT d.depends_on_id \n FROM dependencies d \n LEFT JOIN issues i ON d.depends_on_id = i.id \n WHERE i.id IS NULL\n `)\n \n var orphaned []string\n for rows.Next() {\n var id string\n rows.Scan(\u0026id)\n orphaned = append(orphaned, id)\n }\n \n if len(orphaned) \u003e 0 {\n log.Printf(\"WARNING: Found %d orphaned dependencies: %v\", len(orphaned), orphaned)\n }\n \n return orphaned, nil\n}\n```\n\n### 4. Post-Import Validation\n\nAfter import, verify:\n```go\nfunc validatePostImport(before, after int) error {\n if after \u003c before {\n return fmt.Errorf(\"import reduced issue count: %d → %d (data loss!)\", before, after)\n }\n if after == before {\n log.Printf(\"Import complete: no changes\")\n } else {\n log.Printf(\"Import complete: %d → %d issues (+%d)\", before, after, after-before)\n }\n return nil\n}\n```\n\n## Integration Points\n\nAdd to daemon sync loop (daemon.go:920-999):\n```go\n// Before export\nif err := validatePreExport(store, jsonlPath); err != nil {\n log.log(\"Pre-export validation failed: %v\", err)\n return\n}\n\n// Export...\n\n// Before import\nbeforeCount := countDBIssues(store)\n\n// Import...\n\n// After import\nafterCount := countDBIssues(store)\nif err := validatePostImport(beforeCount, afterCount); err != nil {\n log.log(\"Post-import validation failed: %v\", err)\n}\n```\n\n## Testing\n\nCreate test scenarios:\n1. Empty DB, non-empty JSONL → should error\n2. Duplicate IDs in DB → should error\n3. Orphaned dependencies → should warn\n4. Import reduces count → should error\n\n## Success Criteria\n\n- Catches divergence \u003e50% before export\n- Detects duplicate IDs\n- Reports orphaned dependencies\n- Validates import doesn't lose data\n- All checks logged clearly\n\n## Estimated Effort\n\n2-3 hours\n\n## Priority\n\nP0 - Safety checks prevent data loss during sync","status":"closed","priority":0,"issue_type":"task","created_at":"2025-10-26T19:54:22.558861-07:00","updated_at":"2025-10-26T20:17:37.981054-07:00","closed_at":"2025-10-26T20:17:37.981054-07:00","dependencies":[{"issue_id":"bd-162","depends_on_id":"bd-160","type":"blocks","created_at":"2025-10-26T19:54:22.55941-07:00","created_by":"daemon"}]} -{"id":"bd-164","title":"Fix timestamp-only export deduplication (bd-159)","description":"## Problem\n\nExport deduplication logic is supposed to skip timestamp-only changes, but it's not working. This causes:\n- Spurious git commits every sync cycle\n- Increased race condition window\n- Harder to detect real changes\n- Amplifies bd-160 sync issues\n\nRelated to bd-159.\n\n## Location\n\n**File**: cmd/bd/export.go:236-246\n\nCurrent code clears dirty flags for all exported issues:\n```go\nif output == \"\" || output == findJSONLPath() {\n if err := store.ClearDirtyIssuesByID(ctx, exportedIDs); err != nil {\n fmt.Fprintf(os.Stderr, \"Warning: failed to clear dirty issues: %v\\n\", err)\n }\n clearAutoFlushState()\n}\n```\n\nProblem: No check whether issue actually changed (beyond timestamps).\n\n## Root Cause\n\nIssues are marked dirty on ANY update, including:\n- Timestamp updates (UpdatedAt field)\n- No-op updates (same values written)\n- Database reopens (sqlite WAL journal replays)\n\n## Implementation Approach\n\n### 1. Add Content Hash to dirty_issues Table\n\n```sql\nALTER TABLE dirty_issues ADD COLUMN content_hash TEXT;\n```\n\nThe hash should exclude timestamp fields:\n```go\nfunc computeIssueContentHash(issue *types.Issue) string {\n // Clone issue and zero out timestamps\n normalized := *issue\n normalized.CreatedAt = time.Time{}\n normalized.UpdatedAt = time.Time{}\n \n // Serialize to JSON\n data, _ := json.Marshal(normalized)\n \n // SHA256 hash\n hash := sha256.Sum256(data)\n return hex.EncodeToString(hash[:])\n}\n```\n\n### 2. Track Previous Export State\n\nStore issue snapshots in issue_snapshots table (already exists):\n```go\nfunc saveExportSnapshot(ctx context.Context, store storage.Storage, issue *types.Issue) error {\n snapshot := \u0026types.IssueSnapshot{\n IssueID: issue.ID,\n SnapshotAt: time.Now(),\n Title: issue.Title,\n Description: issue.Description,\n Status: issue.Status,\n // ... all fields except timestamps\n }\n return store.SaveSnapshot(ctx, snapshot)\n}\n```\n\n### 3. Deduplicate During Export\n\nIn export.go:\n```go\n// Before encoding each issue\nif shouldSkipExport(ctx, store, issue) {\n skippedCount++\n continue\n}\n\nfunc shouldSkipExport(ctx context.Context, store storage.Storage, issue *types.Issue) bool {\n // Get last exported snapshot\n snapshot, err := store.GetLatestSnapshot(ctx, issue.ID)\n if err != nil || snapshot == nil {\n return false // No snapshot, must export\n }\n \n // Compare content hash\n currentHash := computeIssueContentHash(issue)\n snapshotHash := computeSnapshotHash(snapshot)\n \n if currentHash == snapshotHash {\n // Timestamp-only change, skip\n log.Printf(\"Skipping %s (timestamp-only change)\", issue.ID)\n return true\n }\n \n return false\n}\n```\n\n### 4. Update on Real Export\n\nOnly save snapshot when actually exporting:\n```go\nfor _, issue := range issues {\n if shouldSkipExport(ctx, store, issue) {\n continue\n }\n \n if err := encoder.Encode(issue); err != nil {\n return err\n }\n \n // Save snapshot of exported state\n saveExportSnapshot(ctx, store, issue)\n exportedIDs = append(exportedIDs, issue.ID)\n}\n```\n\n## Alternative: Simpler Approach\n\nIf snapshot complexity is too much, use a simpler hash:\n\n```go\n// In dirty_issues table, store hash when marking dirty\nfunc markIssueDirty(ctx context.Context, issueID string, issue *types.Issue) error {\n hash := computeIssueContentHash(issue)\n \n _, err := db.Exec(`\n INSERT INTO dirty_issues (issue_id, content_hash) \n VALUES (?, ?)\n ON CONFLICT(issue_id) DO UPDATE SET content_hash = ?\n `, issueID, hash, hash)\n \n return err\n}\n\n// During export, check if hash changed\nfunc hasRealChanges(ctx context.Context, store storage.Storage, issue *types.Issue) bool {\n var storedHash string\n err := db.QueryRow(\"SELECT content_hash FROM dirty_issues WHERE issue_id = ?\", issue.ID).Scan(\u0026storedHash)\n if err != nil {\n return true // No stored hash, export it\n }\n \n currentHash := computeIssueContentHash(issue)\n return currentHash != storedHash\n}\n```\n\n## Testing\n\nTest cases:\n1. Update issue timestamp only → no export\n2. Update issue title → export\n3. Multiple timestamp updates → single export\n4. Database reopen → no spurious exports\n\nValidation:\n```bash\n# Start daemon, wait 1 hour\nbd daemon --interval 5s\nsleep 3600\n\n# Check git log - should be 0 commits\ngit log --since=\"1 hour ago\" --oneline | wc -l # expect: 0\n```\n\n## Success Criteria\n\n- Zero spurious exports for timestamp-only changes\n- Real changes still exported immediately\n- No performance regression\n- bd-159 resolved\n\n## Estimated Effort\n\n2-3 hours\n\n## Priority\n\nP0 - Prevents noise that amplifies bd-160 sync issues","notes":"## Implementation Progress\n\nImplemented timestamp-only export deduplication with content hashing approach. Current status:\n\n### Completed:\n1. ✅ Added content hash computation function (SHA256 of issue minus timestamps)\n2. ✅ Created shouldSkipExport() function in export.go\n3. ✅ Added GetDirtyIssueHash() to Storage interface\n4. ✅ Updated export loop to skip timestamp-only changes\n5. ✅ Build succeeds, code compiles\n\n### In Progress:\nWorking on hash persistence strategy. Initial approach (storing hash in dirty_issues) has complexity with maintaining hash across mark-dirty → export cycle.\n\n### Next Steps:\nImplementing export_hashes table to track last-exported content hash separately from dirty tracking. This cleanly separates:\n- dirty_issues: tracks \"needs export\" flag (cleared after export)\n- export_hashes: tracks \"last exported state\" (persists for comparison)\n\n### Testing Needed:\nOnce hash persistence is complete, need to verify:\n1. Timestamp-only updates don't trigger export\n2. Real content changes DO trigger export \n3. No performance regression\n4. Works with daemon auto-sync","status":"in_progress","priority":1,"issue_type":"task","created_at":"2025-10-26T19:54:58.248715-07:00","updated_at":"2025-10-26T20:27:23.353836-07:00","dependencies":[{"issue_id":"bd-164","depends_on_id":"bd-160","type":"blocks","created_at":"2025-10-26T19:54:58.24935-07:00","created_by":"daemon"},{"issue_id":"bd-164","depends_on_id":"bd-159","type":"related","created_at":"2025-10-26T19:54:58.249718-07:00","created_by":"daemon"}]} +{"id":"bd-164","title":"Fix timestamp-only export deduplication (bd-159)","description":"## Problem\n\nExport deduplication logic is supposed to skip timestamp-only changes, but it's not working. This causes:\n- Spurious git commits every sync cycle\n- Increased race condition window\n- Harder to detect real changes\n- Amplifies bd-160 sync issues\n\nRelated to bd-159.\n\n## Location\n\n**File**: cmd/bd/export.go:236-246\n\nCurrent code clears dirty flags for all exported issues:\n```go\nif output == \"\" || output == findJSONLPath() {\n if err := store.ClearDirtyIssuesByID(ctx, exportedIDs); err != nil {\n fmt.Fprintf(os.Stderr, \"Warning: failed to clear dirty issues: %v\\n\", err)\n }\n clearAutoFlushState()\n}\n```\n\nProblem: No check whether issue actually changed (beyond timestamps).\n\n## Root Cause\n\nIssues are marked dirty on ANY update, including:\n- Timestamp updates (UpdatedAt field)\n- No-op updates (same values written)\n- Database reopens (sqlite WAL journal replays)\n\n## Implementation Approach\n\n### 1. Add Content Hash to dirty_issues Table\n\n```sql\nALTER TABLE dirty_issues ADD COLUMN content_hash TEXT;\n```\n\nThe hash should exclude timestamp fields:\n```go\nfunc computeIssueContentHash(issue *types.Issue) string {\n // Clone issue and zero out timestamps\n normalized := *issue\n normalized.CreatedAt = time.Time{}\n normalized.UpdatedAt = time.Time{}\n \n // Serialize to JSON\n data, _ := json.Marshal(normalized)\n \n // SHA256 hash\n hash := sha256.Sum256(data)\n return hex.EncodeToString(hash[:])\n}\n```\n\n### 2. Track Previous Export State\n\nStore issue snapshots in issue_snapshots table (already exists):\n```go\nfunc saveExportSnapshot(ctx context.Context, store storage.Storage, issue *types.Issue) error {\n snapshot := \u0026types.IssueSnapshot{\n IssueID: issue.ID,\n SnapshotAt: time.Now(),\n Title: issue.Title,\n Description: issue.Description,\n Status: issue.Status,\n // ... all fields except timestamps\n }\n return store.SaveSnapshot(ctx, snapshot)\n}\n```\n\n### 3. Deduplicate During Export\n\nIn export.go:\n```go\n// Before encoding each issue\nif shouldSkipExport(ctx, store, issue) {\n skippedCount++\n continue\n}\n\nfunc shouldSkipExport(ctx context.Context, store storage.Storage, issue *types.Issue) bool {\n // Get last exported snapshot\n snapshot, err := store.GetLatestSnapshot(ctx, issue.ID)\n if err != nil || snapshot == nil {\n return false // No snapshot, must export\n }\n \n // Compare content hash\n currentHash := computeIssueContentHash(issue)\n snapshotHash := computeSnapshotHash(snapshot)\n \n if currentHash == snapshotHash {\n // Timestamp-only change, skip\n log.Printf(\"Skipping %s (timestamp-only change)\", issue.ID)\n return true\n }\n \n return false\n}\n```\n\n### 4. Update on Real Export\n\nOnly save snapshot when actually exporting:\n```go\nfor _, issue := range issues {\n if shouldSkipExport(ctx, store, issue) {\n continue\n }\n \n if err := encoder.Encode(issue); err != nil {\n return err\n }\n \n // Save snapshot of exported state\n saveExportSnapshot(ctx, store, issue)\n exportedIDs = append(exportedIDs, issue.ID)\n}\n```\n\n## Alternative: Simpler Approach\n\nIf snapshot complexity is too much, use a simpler hash:\n\n```go\n// In dirty_issues table, store hash when marking dirty\nfunc markIssueDirty(ctx context.Context, issueID string, issue *types.Issue) error {\n hash := computeIssueContentHash(issue)\n \n _, err := db.Exec(`\n INSERT INTO dirty_issues (issue_id, content_hash) \n VALUES (?, ?)\n ON CONFLICT(issue_id) DO UPDATE SET content_hash = ?\n `, issueID, hash, hash)\n \n return err\n}\n\n// During export, check if hash changed\nfunc hasRealChanges(ctx context.Context, store storage.Storage, issue *types.Issue) bool {\n var storedHash string\n err := db.QueryRow(\"SELECT content_hash FROM dirty_issues WHERE issue_id = ?\", issue.ID).Scan(\u0026storedHash)\n if err != nil {\n return true // No stored hash, export it\n }\n \n currentHash := computeIssueContentHash(issue)\n return currentHash != storedHash\n}\n```\n\n## Testing\n\nTest cases:\n1. Update issue timestamp only → no export\n2. Update issue title → export\n3. Multiple timestamp updates → single export\n4. Database reopen → no spurious exports\n\nValidation:\n```bash\n# Start daemon, wait 1 hour\nbd daemon --interval 5s\nsleep 3600\n\n# Check git log - should be 0 commits\ngit log --since=\"1 hour ago\" --oneline | wc -l # expect: 0\n```\n\n## Success Criteria\n\n- Zero spurious exports for timestamp-only changes\n- Real changes still exported immediately\n- No performance regression\n- bd-159 resolved\n\n## Estimated Effort\n\n2-3 hours\n\n## Priority\n\nP0 - Prevents noise that amplifies bd-160 sync issues","notes":"## Implementation Progress (2025-10-26)\n\nImplemented ~80% of timestamp-only export deduplication. Current status:\n\n### ✅ Completed:\n1. Added computeIssueContentHash() function in cmd/bd/export.go (SHA256 of issue minus timestamps)\n2. Created shouldSkipExport() function in export.go to check for timestamp-only changes\n3. Added GetDirtyIssueHash() to Storage interface (internal/storage/storage.go:55)\n4. Updated export loop (cmd/bd/export.go:280-304) to skip timestamp-only changes\n5. Added hash.go with content hashing logic in internal/storage/sqlite/\n6. Updated dirty.go with hash-aware MarkIssueDirty functions\n7. Fixed all markIssuesDirtyTx() calls in dependencies.go to pass store parameter\n8. Build succeeds, code compiles\n\n### 🔄 Remaining Work:\n1. **Hash persistence strategy** - Need to complete export_hashes table approach:\n - Created schema for export_hashes table (schema.go:122-128)\n - Need to add migration logic in sqlite.go\n - Need to implement SetExportHash() and GetExportHash() in Storage interface\n - Update shouldSkipExport() to query export_hashes instead of dirty_issues\n - Update export logic to call SetExportHash() after successful export\n\n2. **Revert incomplete changes**:\n - Remove content_hash column from dirty_issues (was wrong approach)\n - Simplify MarkIssueDirty back to original (no hash parameter)\n - Simplify markIssuesDirtyTx back to original\n\n3. **Testing**:\n - Test timestamp-only updates don't trigger export\n - Test real content changes DO trigger export\n - Test daemon auto-sync behavior\n - No performance regression\n\n### Design Decision:\nUsing export_hashes table (separate from dirty_issues) to track last-exported content hash:\n- dirty_issues: tracks \"needs export\" flag (cleared after export) \n- export_hashes: tracks \"last exported state\" (persists for comparison)\n\nThis cleanly separates concerns and avoids lifecycle complexity.\n\n### Files Modified:\n- cmd/bd/export.go (hash computation, skip logic)\n- internal/storage/storage.go (interface)\n- internal/storage/sqlite/schema.go (export_hashes table)\n- internal/storage/sqlite/hash.go (NEW)\n- internal/storage/sqlite/dirty.go (hash-aware marking - TO BE REVERTED)\n- internal/storage/sqlite/dependencies.go (markIssuesDirtyTx calls - TO BE REVERTED)\n- internal/storage/sqlite/sqlite.go (migration - TO BE COMPLETED)\n\n### Estimated Time to Complete:\n30-45 minutes to finish export_hashes implementation and testing.","status":"in_progress","priority":1,"issue_type":"task","created_at":"2025-10-26T19:54:58.248715-07:00","updated_at":"2025-10-26T20:28:59.447887-07:00","dependencies":[{"issue_id":"bd-164","depends_on_id":"bd-160","type":"blocks","created_at":"2025-10-26T19:54:58.24935-07:00","created_by":"daemon"},{"issue_id":"bd-164","depends_on_id":"bd-159","type":"related","created_at":"2025-10-26T19:54:58.249718-07:00","created_by":"daemon"}]} {"id":"bd-165","title":"Enforce canonical database naming (beads.db)","description":"## Problem\n\nCurrently, different clones can use different database filenames (bd.db, beads.db, issues.db), causing incompatibility when attempting to sync.\n\nExample from bd-160:\n- ~/src/beads uses bd.db\n- ~/src/fred/beads uses beads.db\n- Sync fails because they're fundamentally different databases\n\n## Solution\n\nEnforce a single canonical database name: **beads.db**\n\n## Implementation\n\n### 1. Define Canonical Name\n\n**File**: beads.go or constants.go (create if needed)\n\n```go\npackage beads\n\n// CanonicalDatabaseName is the required database filename for all beads repositories\nconst CanonicalDatabaseName = \"beads.db\"\n\n// LegacyDatabaseNames are old names that should be migrated\nvar LegacyDatabaseNames = []string{\"bd.db\", \"issues.db\", \"bugs.db\"}\n```\n\n### 2. Update bd init Command\n\n**File**: cmd/bd/init.go\n\n```go\nfunc runInit(cmd *cobra.Command, args []string) error {\n beadsDir := \".beads\"\n os.MkdirAll(beadsDir, 0755)\n \n dbPath := filepath.Join(beadsDir, beads.CanonicalDatabaseName)\n \n // Check for legacy databases\n for _, legacy := range beads.LegacyDatabaseNames {\n legacyPath := filepath.Join(beadsDir, legacy)\n if exists(legacyPath) {\n fmt.Printf(\"Found legacy database: %s\\n\", legacy)\n fmt.Printf(\"Migrating to canonical name: %s\\n\", beads.CanonicalDatabaseName)\n \n // Rename to canonical\n if err := os.Rename(legacyPath, dbPath); err != nil {\n return fmt.Errorf(\"migration failed: %w\", err)\n }\n fmt.Printf(\"✓ Migrated %s → %s\\n\", legacy, beads.CanonicalDatabaseName)\n }\n }\n \n // Create new database if doesn't exist\n if !exists(dbPath) {\n store, err := sqlite.New(dbPath)\n if err != nil {\n return err\n }\n defer store.Close()\n \n // Initialize with version metadata\n store.SetMetadata(context.Background(), \"bd_version\", Version)\n store.SetMetadata(context.Background(), \"db_name\", beads.CanonicalDatabaseName)\n }\n \n // ... rest of init\n}\n```\n\n### 3. Validate on Daemon Start\n\n**File**: cmd/bd/daemon.go:1076-1095\n\nUpdate the existing multiple-DB check:\n```go\n// Check for multiple .db files\nbeadsDir := filepath.Dir(daemonDBPath)\nmatches, err := filepath.Glob(filepath.Join(beadsDir, \"*.db\"))\nif err == nil \u0026\u0026 len(matches) \u003e 1 {\n log.log(\"Error: Multiple database files found:\")\n for _, match := range matches {\n log.log(\" - %s\", filepath.Base(match))\n }\n log.log(\"\")\n log.log(\"Beads requires a single canonical database: %s\", beads.CanonicalDatabaseName)\n log.log(\"Run 'bd init' to migrate legacy databases\")\n os.Exit(1)\n}\n\n// Validate using canonical name\nif filepath.Base(daemonDBPath) != beads.CanonicalDatabaseName {\n log.log(\"Error: Non-canonical database name: %s\", filepath.Base(daemonDBPath))\n log.log(\"Expected: %s\", beads.CanonicalDatabaseName)\n log.log(\"Run 'bd init' to migrate to canonical name\")\n os.Exit(1)\n}\n```\n\n### 4. Add Migration Command\n\n**File**: cmd/bd/migrate.go (create new)\n\n```go\nvar migrateCmd = \u0026cobra.Command{\n Use: \"migrate\",\n Short: \"Migrate database to canonical naming and schema\",\n Run: func(cmd *cobra.Command, args []string) {\n beadsDir := \".beads\"\n \n // Find current database\n var currentDB string\n for _, name := range append(beads.LegacyDatabaseNames, beads.CanonicalDatabaseName) {\n path := filepath.Join(beadsDir, name)\n if exists(path) {\n currentDB = path\n break\n }\n }\n \n if currentDB == \"\" {\n fmt.Println(\"No database found\")\n return\n }\n \n targetPath := filepath.Join(beadsDir, beads.CanonicalDatabaseName)\n \n if currentDB == targetPath {\n fmt.Println(\"Database already using canonical name\")\n return\n }\n \n // Backup first\n backupPath := currentDB + \".backup\"\n copyFile(currentDB, backupPath)\n fmt.Printf(\"Created backup: %s\\n\", backupPath)\n \n // Rename\n if err := os.Rename(currentDB, targetPath); err != nil {\n fmt.Fprintf(os.Stderr, \"Migration failed: %v\\n\", err)\n os.Exit(1)\n }\n \n fmt.Printf(\"✓ Migrated: %s → %s\\n\", filepath.Base(currentDB), beads.CanonicalDatabaseName)\n \n // Update metadata\n store, _ := sqlite.New(targetPath)\n defer store.Close()\n store.SetMetadata(context.Background(), \"db_name\", beads.CanonicalDatabaseName)\n },\n}\n```\n\n### 5. Update FindDatabasePath\n\n**File**: beads.go (or wherever FindDatabasePath is defined)\n\n```go\nfunc FindDatabasePath() string {\n beadsDir := findBeadsDir()\n if beadsDir == \"\" {\n return \"\"\n }\n \n // First try canonical name\n canonical := filepath.Join(beadsDir, CanonicalDatabaseName)\n if exists(canonical) {\n return canonical\n }\n \n // Check for legacy names (warn user)\n for _, legacy := range LegacyDatabaseNames {\n path := filepath.Join(beadsDir, legacy)\n if exists(path) {\n fmt.Fprintf(os.Stderr, \"WARNING: Using legacy database name: %s\\n\", legacy)\n fmt.Fprintf(os.Stderr, \"Run 'bd migrate' to upgrade to canonical name: %s\\n\", CanonicalDatabaseName)\n return path\n }\n }\n \n return \"\"\n}\n```\n\n## Testing\n\n```bash\n# Test migration\nmkdir test-repo \u0026\u0026 cd test-repo \u0026\u0026 git init\nmkdir .beads\nsqlite3 .beads/bd.db \"CREATE TABLE test (id int);\"\n\nbd init # Should detect and migrate bd.db → beads.db\n\n# Verify\nls .beads/*.db # Should only show beads.db\n\n# Test daemon rejection\nsqlite3 .beads/old.db \"CREATE TABLE test (id int);\"\nbd daemon # Should error: multiple databases found\n\n# Test clean init\nrm -rf test-repo2 \u0026\u0026 mkdir test-repo2 \u0026\u0026 cd test-repo2\nbd init # Should create .beads/beads.db directly\n```\n\n## Rollout Strategy\n\n1. Add migration logic to bd init\n2. Update FindDatabasePath to warn on legacy names\n3. Add 'bd migrate' command for manual migration\n4. Update docs to specify canonical name\n5. Add daemon validation after 2 releases\n\n## Success Criteria\n\n- All new repositories use beads.db\n- bd init auto-migrates legacy names\n- bd daemon rejects non-canonical names\n- Clear migration path for existing users\n- No data loss during migration\n\n## Estimated Effort\n\n3-4 hours\n\n## Priority\n\nP0 - Critical for multi-clone compatibility","status":"open","priority":0,"issue_type":"task","created_at":"2025-10-26T19:55:39.056716-07:00","updated_at":"2025-10-26T19:55:39.056716-07:00","dependencies":[{"issue_id":"bd-165","depends_on_id":"bd-160","type":"blocks","created_at":"2025-10-26T19:55:39.057336-07:00","created_by":"daemon"}]} {"id":"bd-166","title":"Add database fingerprinting and validation","description":"## Problem\n\nWhen multiple clones exist, there's no validation that they're actually clones of the same repository. Different repos can accidentally share databases, causing data corruption.\n\nNeed database fingerprinting to ensure clones belong to the same logical repository.\n\n## Solution\n\nAdd repository fingerprint to database metadata and validate on daemon start.\n\n## Implementation\n\n### 1. Compute Repository ID\n\n**File**: pkg/fingerprint.go (create new)\n\n```go\npackage beads\n\nimport (\n \"crypto/sha256\"\n \"encoding/hex\"\n \"fmt\"\n \"os/exec\"\n)\n\n// ComputeRepoID generates a unique identifier for this git repository\nfunc ComputeRepoID() (string, error) {\n // Get git remote URL (canonical repo identifier)\n cmd := exec.Command(\"git\", \"config\", \"--get\", \"remote.origin.url\")\n output, err := cmd.Output()\n if err != nil {\n // No remote configured, use local path\n cmd = exec.Command(\"git\", \"rev-parse\", \"--show-toplevel\")\n output, err = cmd.Output()\n if err != nil {\n return \"\", fmt.Errorf(\"not a git repository\")\n }\n }\n \n repoURL := strings.TrimSpace(string(output))\n \n // Normalize URL (remove .git suffix, https vs git@, etc.)\n repoURL = normalizeGitURL(repoURL)\n \n // SHA256 hash for privacy (don't expose repo URL in database)\n hash := sha256.Sum256([]byte(repoURL))\n return hex.EncodeToString(hash[:16]), nil // Use first 16 bytes\n}\n\nfunc normalizeGitURL(url string) string {\n // Convert git@github.com:user/repo.git → github.com/user/repo\n // Convert https://github.com/user/repo.git → github.com/user/repo\n url = strings.TrimSuffix(url, \".git\")\n url = strings.ReplaceAll(url, \"git@\", \"\")\n url = strings.ReplaceAll(url, \"https://\", \"\")\n url = strings.ReplaceAll(url, \"http://\", \"\")\n url = strings.ReplaceAll(url, \":\", \"/\")\n return url\n}\n\n// GetCloneID generates a unique ID for this specific clone (not shared with other clones)\nfunc GetCloneID() string {\n // Use hostname + path for uniqueness\n hostname, _ := os.Hostname()\n path, _ := os.Getwd()\n hash := sha256.Sum256([]byte(hostname + \":\" + path))\n return hex.EncodeToString(hash[:8])\n}\n```\n\n### 2. Store Fingerprint on Init\n\n**File**: cmd/bd/init.go\n\n```go\nfunc runInit(cmd *cobra.Command, args []string) error {\n // ... create database ...\n \n // Compute and store repo ID\n repoID, err := beads.ComputeRepoID()\n if err != nil {\n fmt.Fprintf(os.Stderr, \"Warning: could not compute repo ID: %v\\n\", err)\n } else {\n if err := store.SetMetadata(ctx, \"repo_id\", repoID); err != nil {\n return fmt.Errorf(\"failed to set repo_id: %w\", err)\n }\n fmt.Printf(\"Repository ID: %s\\n\", repoID[:8])\n }\n \n // Store clone ID\n cloneID := beads.GetCloneID()\n if err := store.SetMetadata(ctx, \"clone_id\", cloneID); err != nil {\n return fmt.Errorf(\"failed to set clone_id: %w\", err)\n }\n fmt.Printf(\"Clone ID: %s\\n\", cloneID)\n \n // Store creation timestamp\n if err := store.SetMetadata(ctx, \"created_at\", time.Now().Format(time.RFC3339)); err != nil {\n return fmt.Errorf(\"failed to set created_at: %w\", err)\n }\n \n return nil\n}\n```\n\n### 3. Validate on Database Open\n\n**File**: cmd/bd/daemon.go (in runDaemonLoop)\n\n```go\nfunc validateDatabaseFingerprint(store storage.Storage) error {\n ctx := context.Background()\n \n // Get stored repo ID\n storedRepoID, err := store.GetMetadata(ctx, \"repo_id\")\n if err != nil \u0026\u0026 err.Error() != \"metadata key not found: repo_id\" {\n return fmt.Errorf(\"failed to read repo_id: %w\", err)\n }\n \n // If no repo_id, this is a legacy database - set it now\n if storedRepoID == \"\" {\n repoID, err := beads.ComputeRepoID()\n if err != nil {\n log.log(\"Warning: could not compute repo ID: %v\", err)\n return nil // Non-fatal for backward compat\n }\n \n log.log(\"Legacy database detected, setting repo_id: %s\", repoID[:8])\n if err := store.SetMetadata(ctx, \"repo_id\", repoID); err != nil {\n return fmt.Errorf(\"failed to set repo_id: %w\", err)\n }\n return nil\n }\n \n // Validate repo ID matches\n currentRepoID, err := beads.ComputeRepoID()\n if err != nil {\n log.log(\"Warning: could not compute current repo ID: %v\", err)\n return nil // Non-fatal\n }\n \n if storedRepoID != currentRepoID {\n return fmt.Errorf(`\nDATABASE MISMATCH DETECTED!\n\nThis database belongs to a different repository:\n Database repo ID: %s\n Current repo ID: %s\n\nThis usually means:\n 1. You copied a .beads directory from another repo (don't do this!)\n 2. Git remote URL changed (run 'bd migrate' to update)\n 3. Database corruption\n\nSolutions:\n - If remote URL changed: bd migrate --update-repo-id\n - If wrong database: rm -rf .beads \u0026\u0026 bd init\n - If correct database: BEADS_IGNORE_REPO_MISMATCH=1 bd daemon\n`, storedRepoID[:8], currentRepoID[:8])\n }\n \n return nil\n}\n\n// In runDaemonLoop, after opening database:\nif err := validateDatabaseFingerprint(store); err != nil {\n if os.Getenv(\"BEADS_IGNORE_REPO_MISMATCH\") != \"1\" {\n log.log(\"Error: %v\", err)\n os.Exit(1)\n }\n log.log(\"Warning: repo mismatch ignored (BEADS_IGNORE_REPO_MISMATCH=1)\")\n}\n```\n\n### 4. Add Update Command for Remote Changes\n\n**File**: cmd/bd/migrate.go\n\n```go\nvar updateRepoID bool\n\nfunc init() {\n migrateCmd.Flags().BoolVar(\u0026updateRepoID, \"update-repo-id\", false, \n \"Update repository ID (use after changing git remote)\")\n}\n\n// In migrate command:\nif updateRepoID {\n newRepoID, err := beads.ComputeRepoID()\n if err != nil {\n fmt.Fprintf(os.Stderr, \"Error: %v\\n\", err)\n os.Exit(1)\n }\n \n oldRepoID, _ := store.GetMetadata(ctx, \"repo_id\")\n \n fmt.Printf(\"Updating repository ID:\\n\")\n fmt.Printf(\" Old: %s\\n\", oldRepoID[:8])\n fmt.Printf(\" New: %s\\n\", newRepoID[:8])\n \n if err := store.SetMetadata(ctx, \"repo_id\", newRepoID); err != nil {\n fmt.Fprintf(os.Stderr, \"Error: %v\\n\", err)\n os.Exit(1)\n }\n \n fmt.Println(\"✓ Repository ID updated\")\n}\n```\n\n## Metadata Schema\n\nAdd to `metadata` table:\n\n| Key | Value | Description |\n|-----|-------|-------------|\n| repo_id | sha256(git_remote)[..16] | Repository fingerprint |\n| clone_id | sha256(hostname:path)[..8] | Clone-specific ID |\n| created_at | RFC3339 timestamp | Database creation time |\n| db_name | \"beads.db\" | Canonical database name |\n| bd_version | \"v0.x.x\" | Schema version |\n\n## Testing\n\n```bash\n# Test repo ID generation\ncd /tmp/test-repo \u0026\u0026 git init\ngit remote add origin https://github.com/user/repo.git\nbd init\nbd show-meta repo_id # Should show consistent hash\n\n# Test mismatch detection\ncd /tmp/other-repo \u0026\u0026 git init\ngit remote add origin https://github.com/other/repo.git\ncp -r /tmp/test-repo/.beads /tmp/other-repo/\nbd daemon # Should error: repo mismatch\n\n# Test migration\ngit remote set-url origin https://github.com/user/new-repo.git\nbd migrate --update-repo-id # Should update successfully\n```\n\n## Success Criteria\n\n- New databases automatically get repo_id\n- Daemon validates repo_id on start\n- Clear error messages on mismatch\n- Migration path for remote URL changes\n- Legacy databases automatically fingerprinted\n\n## Estimated Effort\n\n3-4 hours\n\n## Priority\n\nP0 - Prevents accidental database mixing across repos","status":"open","priority":0,"issue_type":"task","created_at":"2025-10-26T19:56:18.53693-07:00","updated_at":"2025-10-26T19:56:18.53693-07:00","dependencies":[{"issue_id":"bd-166","depends_on_id":"bd-160","type":"blocks","created_at":"2025-10-26T19:56:18.537546-07:00","created_by":"daemon"}]} {"id":"bd-167","title":"Implement version tracking for issues","description":"## Problem\n\nWhen two clones modify the same issue concurrently, there's no way to detect or handle the conflict properly. Last writer wins arbitrarily, losing data.\n\nNeed version tracking to implement proper conflict detection and resolution.\n\n## Solution\n\nAdd version counter and last-modified metadata to issues for Last-Writer-Wins (LWW) conflict resolution.\n\n## Database Schema Changes\n\n**File**: internal/storage/sqlite/schema.go\n\n```sql\n-- Add version tracking columns to issues table\nALTER TABLE issues ADD COLUMN version INTEGER DEFAULT 1 NOT NULL;\nALTER TABLE issues ADD COLUMN modified_by TEXT DEFAULT '' NOT NULL;\nALTER TABLE issues ADD COLUMN modified_at DATETIME;\n\n-- Create index for version-based queries\nCREATE INDEX IF NOT EXISTS idx_issues_version ON issues(id, version);\n\n-- Store modification history\nCREATE TABLE IF NOT EXISTS issue_versions (\n id INTEGER PRIMARY KEY AUTOINCREMENT,\n issue_id TEXT NOT NULL,\n version INTEGER NOT NULL,\n modified_by TEXT NOT NULL,\n modified_at DATETIME NOT NULL,\n snapshot BLOB NOT NULL, -- JSON snapshot of issue at this version\n FOREIGN KEY (issue_id) REFERENCES issues(id) ON DELETE CASCADE,\n UNIQUE(issue_id, version)\n);\n\nCREATE INDEX IF NOT EXISTS idx_issue_versions_lookup \n ON issue_versions(issue_id, version);\n```\n\n## Implementation\n\n### 1. Update Issue Type\n\n**File**: internal/types/issue.go\n\n```go\ntype Issue struct {\n ID string `json:\"id\"`\n Title string `json:\"title\"`\n Description string `json:\"description\"`\n Status Status `json:\"status\"`\n Priority int `json:\"priority\"`\n CreatedAt time.Time `json:\"created_at\"`\n UpdatedAt time.Time `json:\"updated_at\"`\n \n // Version tracking (new fields)\n Version int `json:\"version\"` // Incremented on each update\n ModifiedBy string `json:\"modified_by\"` // Clone ID that made the change\n ModifiedAt time.Time `json:\"modified_at\"` // When the change was made\n \n // ... rest of fields\n}\n```\n\n### 2. Increment Version on Update\n\n**File**: internal/storage/sqlite/sqlite.go\n\n```go\nfunc (s *SQLiteStorage) UpdateIssue(ctx context.Context, issue *types.Issue) error {\n // Get current version from database\n var currentVersion int\n var currentModifiedAt time.Time\n err := s.db.QueryRowContext(ctx, `\n SELECT version, modified_at \n FROM issues \n WHERE id = ?\n `, issue.ID).Scan(\u0026currentVersion, \u0026currentModifiedAt)\n \n if err != nil \u0026\u0026 err != sql.ErrNoRows {\n return fmt.Errorf(\"failed to get current version: %w\", err)\n }\n \n // Detect conflict: incoming version is stale\n if issue.Version \u003e 0 \u0026\u0026 issue.Version \u003c currentVersion {\n return \u0026ConflictError{\n IssueID: issue.ID,\n LocalVersion: currentVersion,\n RemoteVersion: issue.Version,\n LocalModified: currentModifiedAt,\n RemoteModified: issue.ModifiedAt,\n }\n }\n \n // No conflict or local is newer: increment version\n issue.Version = currentVersion + 1\n issue.ModifiedBy = getCloneID() // From fingerprinting\n issue.ModifiedAt = time.Now()\n \n // Save version snapshot before updating\n if err := s.saveVersionSnapshot(ctx, issue); err != nil {\n // Non-fatal warning\n log.Printf(\"Warning: failed to save version snapshot: %v\", err)\n }\n \n // Perform update\n _, err = s.db.ExecContext(ctx, `\n UPDATE issues SET\n title = ?,\n description = ?,\n status = ?,\n priority = ?,\n updated_at = ?,\n version = ?,\n modified_by = ?,\n modified_at = ?\n WHERE id = ?\n `, issue.Title, issue.Description, issue.Status, issue.Priority,\n issue.UpdatedAt, issue.Version, issue.ModifiedBy, issue.ModifiedAt,\n issue.ID)\n \n return err\n}\n\nfunc (s *SQLiteStorage) saveVersionSnapshot(ctx context.Context, issue *types.Issue) error {\n snapshot, _ := json.Marshal(issue)\n \n _, err := s.db.ExecContext(ctx, `\n INSERT INTO issue_versions (issue_id, version, modified_by, modified_at, snapshot)\n VALUES (?, ?, ?, ?, ?)\n `, issue.ID, issue.Version, issue.ModifiedBy, issue.ModifiedAt, snapshot)\n \n return err\n}\n```\n\n### 3. Conflict Detection on Import\n\n**File**: cmd/bd/import_core.go\n\n```go\ntype ConflictError struct {\n IssueID string\n LocalVersion int\n RemoteVersion int\n LocalModified time.Time\n RemoteModified time.Time\n LocalIssue *types.Issue\n RemoteIssue *types.Issue\n}\n\nfunc (e *ConflictError) Error() string {\n return fmt.Sprintf(\"conflict on %s: local v%d (modified %s) vs remote v%d (modified %s)\",\n e.IssueID, e.LocalVersion, e.LocalModified, e.RemoteVersion, e.RemoteModified)\n}\n\nfunc detectVersionConflict(local, remote *types.Issue) *ConflictError {\n // No conflict if same version\n if local.Version == remote.Version {\n return nil\n }\n \n // Remote is newer - no conflict\n if remote.Version \u003e local.Version {\n return nil\n }\n \n // Local is newer - remote is stale\n if remote.Version \u003c local.Version {\n // Check if concurrent modification (both diverged from same base)\n if local.ModifiedAt.Sub(remote.ModifiedAt).Abs() \u003c 1*time.Minute {\n return \u0026ConflictError{\n IssueID: local.ID,\n LocalVersion: local.Version,\n RemoteVersion: remote.Version,\n LocalModified: local.ModifiedAt,\n RemoteModified: remote.ModifiedAt,\n LocalIssue: local,\n RemoteIssue: remote,\n }\n }\n }\n \n return nil\n}\n```\n\n### 4. Conflict Resolution Strategies\n\n```go\ntype ConflictStrategy int\n\nconst (\n StrategyLWW ConflictStrategy = iota // Last Writer Wins (use newest modified_at)\n StrategyHighestVersion // Use highest version number\n StrategyInteractive // Prompt user\n StrategyMerge // Three-way merge (future)\n)\n\nfunc resolveConflict(conflict *ConflictError, strategy ConflictStrategy) (*types.Issue, error) {\n switch strategy {\n case StrategyLWW:\n if conflict.RemoteModified.After(conflict.LocalModified) {\n return conflict.RemoteIssue, nil\n }\n return conflict.LocalIssue, nil\n \n case StrategyHighestVersion:\n if conflict.RemoteVersion \u003e conflict.LocalVersion {\n return conflict.RemoteIssue, nil\n }\n return conflict.LocalIssue, nil\n \n case StrategyInteractive:\n return promptUserForResolution(conflict)\n \n default:\n return nil, fmt.Errorf(\"unknown conflict strategy: %v\", strategy)\n }\n}\n```\n\n## Migration for Existing Databases\n\n**File**: cmd/bd/migrate.go\n\n```go\nfunc migrateToVersionTracking(store storage.Storage) error {\n ctx := context.Background()\n \n // Add columns if not exist\n _, err := db.Exec(`\n ALTER TABLE issues ADD COLUMN IF NOT EXISTS version INTEGER DEFAULT 1 NOT NULL\n `)\n if err != nil {\n return err\n }\n \n _, err = db.Exec(`\n ALTER TABLE issues ADD COLUMN IF NOT EXISTS modified_by TEXT DEFAULT ''\n `)\n if err != nil {\n return err\n }\n \n _, err = db.Exec(`\n ALTER TABLE issues ADD COLUMN IF NOT EXISTS modified_at DATETIME\n `)\n if err != nil {\n return err\n }\n \n // Backfill modified_at from updated_at\n _, err = db.Exec(`\n UPDATE issues SET modified_at = updated_at WHERE modified_at IS NULL\n `)\n \n return err\n}\n```\n\n## Testing\n\n```bash\n# Test version increment\nbd create \"Test issue\"\nbd show bd-1 --json | jq .version # Should be 1\nbd update bd-1 --title \"Updated\"\nbd show bd-1 --json | jq .version # Should be 2\n\n# Test conflict detection\n# Clone A: modify bd-1\ncd repo-a \u0026\u0026 bd update bd-1 --title \"A's version\"\n# Clone B: modify bd-1\ncd repo-b \u0026\u0026 bd update bd-1 --title \"B's version\"\n\n# Sync\ncd repo-a \u0026\u0026 bd sync # Should detect conflict\n```\n\n## Success Criteria\n\n- All issues have version numbers\n- Version increments on each update\n- Conflicts detected when importing stale versions\n- Version history preserved in issue_versions table\n- Migration works for existing databases\n\n## Estimated Effort\n\n4-5 hours\n\n## Priority\n\nP1 - Enables proper conflict detection (required before three-way merge)","status":"open","priority":1,"issue_type":"task","created_at":"2025-10-26T19:57:01.745351-07:00","updated_at":"2025-10-26T19:57:01.745351-07:00","dependencies":[{"issue_id":"bd-167","depends_on_id":"bd-160","type":"blocks","created_at":"2025-10-26T19:57:01.746071-07:00","created_by":"daemon"}]} diff --git a/cmd/bd/export.go b/cmd/bd/export.go index f8bc91fc..20d06354 100644 --- a/cmd/bd/export.go +++ b/cmd/bd/export.go @@ -2,18 +2,71 @@ package main import ( "context" + "crypto/sha256" + "encoding/hex" "encoding/json" "fmt" "os" "path/filepath" "sort" "strings" + "time" "github.com/spf13/cobra" + "github.com/steveyegge/beads/internal/storage" "github.com/steveyegge/beads/internal/storage/sqlite" "github.com/steveyegge/beads/internal/types" ) +// computeIssueContentHash computes a SHA256 hash of an issue's content, excluding timestamps. +// This is used for detecting timestamp-only changes during export deduplication. +func computeIssueContentHash(issue *types.Issue) (string, error) { + // Clone issue and zero out timestamps to exclude them from hash + normalized := *issue + normalized.CreatedAt = time.Time{} + normalized.UpdatedAt = time.Time{} + + // Also zero out ClosedAt if present + if normalized.ClosedAt != nil { + zeroTime := time.Time{} + normalized.ClosedAt = &zeroTime + } + + // Serialize to JSON + data, err := json.Marshal(normalized) + if err != nil { + return "", err + } + + // SHA256 hash + hash := sha256.Sum256(data) + return hex.EncodeToString(hash[:]), nil +} + +// shouldSkipExport checks if an issue should be skipped during export because +// it only has timestamp changes (no actual content changes). +func shouldSkipExport(ctx context.Context, store storage.Storage, issue *types.Issue) (bool, error) { + // Get the stored hash from dirty_issues table + storedHash, err := store.GetDirtyIssueHash(ctx, issue.ID) + if err != nil { + return false, err + } + + // If no hash stored, we must export (first export or old data) + if storedHash == "" { + return false, nil + } + + // Compute current hash + currentHash, err := computeIssueContentHash(issue) + if err != nil { + return false, err + } + + // If hashes match, only timestamps changed - skip export + return currentHash == storedHash, nil +} + // countIssuesInJSONL counts the number of issues in a JSONL file func countIssuesInJSONL(path string) (int, error) { file, err := os.Open(path) @@ -221,16 +274,35 @@ Output to stdout by default, or use -o flag for file output.`, out = tempFile } - // Write JSONL + // Write JSONL (with timestamp-only deduplication for bd-164) encoder := json.NewEncoder(out) exportedIDs := make([]string, 0, len(issues)) + skippedCount := 0 for _, issue := range issues { + // Check if this is only a timestamp change (bd-164) + skip, err := shouldSkipExport(ctx, store, issue) + if err != nil { + // Log warning but continue - don't fail export on hash check errors + fmt.Fprintf(os.Stderr, "Warning: failed to check if %s should skip: %v\n", issue.ID, err) + skip = false + } + + if skip { + skippedCount++ + continue + } + if err := encoder.Encode(issue); err != nil { fmt.Fprintf(os.Stderr, "Error encoding issue %s: %v\n", issue.ID, err) os.Exit(1) } exportedIDs = append(exportedIDs, issue.ID) } + + // Report skipped issues if any (helps debugging bd-159) + if skippedCount > 0 && (output == "" || output == findJSONLPath()) { + fmt.Fprintf(os.Stderr, "Skipped %d issue(s) with timestamp-only changes\n", skippedCount) + } // Only clear dirty issues and auto-flush state if exporting to the default JSONL path // This prevents clearing dirty flags when exporting to custom paths (e.g., bd export -o backup.jsonl) diff --git a/internal/storage/sqlite/dependencies.go b/internal/storage/sqlite/dependencies.go index 84690346..83184d49 100644 --- a/internal/storage/sqlite/dependencies.go +++ b/internal/storage/sqlite/dependencies.go @@ -151,7 +151,7 @@ func (s *SQLiteStorage) AddDependency(ctx context.Context, dep *types.Dependency // Mark both issues as dirty for incremental export // (dependencies are exported with each issue, so both need updating) - if err := markIssuesDirtyTx(ctx, tx, []string{dep.IssueID, dep.DependsOnID}); err != nil { + if err := markIssuesDirtyTx(ctx, tx, s, []string{dep.IssueID, dep.DependsOnID}); err != nil { return err } @@ -264,7 +264,7 @@ func (s *SQLiteStorage) addDependencyUnchecked(ctx context.Context, dep *types.D } // Mark both issues as dirty - if err := markIssuesDirtyTx(ctx, tx, []string{dep.IssueID, dep.DependsOnID}); err != nil { + if err := markIssuesDirtyTx(ctx, tx, s, []string{dep.IssueID, dep.DependsOnID}); err != nil { return err } @@ -305,7 +305,7 @@ func (s *SQLiteStorage) RemoveDependency(ctx context.Context, issueID, dependsOn } // Mark both issues as dirty for incremental export - if err := markIssuesDirtyTx(ctx, tx, []string{issueID, dependsOnID}); err != nil { + if err := markIssuesDirtyTx(ctx, tx, s, []string{issueID, dependsOnID}); err != nil { return err } @@ -348,7 +348,7 @@ func (s *SQLiteStorage) removeDependencyIfExists(ctx context.Context, issueID, d } // Mark both issues as dirty for incremental export - if err := markIssuesDirtyTx(ctx, tx, []string{issueID, dependsOnID}); err != nil { + if err := markIssuesDirtyTx(ctx, tx, s, []string{issueID, dependsOnID}); err != nil { return err } diff --git a/internal/storage/sqlite/dirty.go b/internal/storage/sqlite/dirty.go index 78819bb1..1e89b84a 100644 --- a/internal/storage/sqlite/dirty.go +++ b/internal/storage/sqlite/dirty.go @@ -11,11 +11,22 @@ import ( // MarkIssueDirty marks an issue as dirty (needs to be exported to JSONL) // This should be called whenever an issue is created, updated, or has dependencies changed func (s *SQLiteStorage) MarkIssueDirty(ctx context.Context, issueID string) error { - _, err := s.db.ExecContext(ctx, ` - INSERT INTO dirty_issues (issue_id, marked_at) - VALUES (?, ?) - ON CONFLICT (issue_id) DO UPDATE SET marked_at = excluded.marked_at - `, issueID, time.Now()) + // Fetch the issue to compute its content hash + issue, err := s.GetIssue(ctx, issueID) + if err != nil { + return err + } + + hash, err := computeIssueContentHash(issue) + if err != nil { + return err + } + + _, err = s.db.ExecContext(ctx, ` + INSERT INTO dirty_issues (issue_id, marked_at, content_hash) + VALUES (?, ?, ?) + ON CONFLICT (issue_id) DO UPDATE SET marked_at = excluded.marked_at, content_hash = excluded.content_hash + `, issueID, time.Now(), hash) return err } @@ -34,9 +45,9 @@ func (s *SQLiteStorage) MarkIssuesDirty(ctx context.Context, issueIDs []string) now := time.Now() stmt, err := tx.PrepareContext(ctx, ` - INSERT INTO dirty_issues (issue_id, marked_at) - VALUES (?, ?) - ON CONFLICT (issue_id) DO UPDATE SET marked_at = excluded.marked_at + INSERT INTO dirty_issues (issue_id, marked_at, content_hash) + VALUES (?, ?, ?) + ON CONFLICT (issue_id) DO UPDATE SET marked_at = excluded.marked_at, content_hash = excluded.content_hash `) if err != nil { return fmt.Errorf("failed to prepare statement: %w", err) @@ -44,7 +55,18 @@ func (s *SQLiteStorage) MarkIssuesDirty(ctx context.Context, issueIDs []string) defer func() { _ = stmt.Close() }() for _, issueID := range issueIDs { - if _, err := stmt.ExecContext(ctx, issueID, now); err != nil { + // Fetch issue to compute content hash + issue, err := s.GetIssue(ctx, issueID) + if err != nil { + return fmt.Errorf("failed to get issue %s: %w", issueID, err) + } + + hash, err := computeIssueContentHash(issue) + if err != nil { + return fmt.Errorf("failed to compute hash for issue %s: %w", issueID, err) + } + + if _, err := stmt.ExecContext(ctx, issueID, now, hash); err != nil { return fmt.Errorf("failed to mark issue %s dirty: %w", issueID, err) } } @@ -75,6 +97,27 @@ func (s *SQLiteStorage) GetDirtyIssues(ctx context.Context) ([]string, error) { return issueIDs, rows.Err() } +// GetDirtyIssueHash returns the stored content hash for a dirty issue, if it exists +func (s *SQLiteStorage) GetDirtyIssueHash(ctx context.Context, issueID string) (string, error) { + var hash sql.NullString + err := s.db.QueryRowContext(ctx, ` + SELECT content_hash FROM dirty_issues WHERE issue_id = ? + `, issueID).Scan(&hash) + + if err == sql.ErrNoRows { + return "", nil // Issue not dirty + } + if err != nil { + return "", fmt.Errorf("failed to get dirty issue hash: %w", err) + } + + if !hash.Valid { + return "", nil // No hash stored yet + } + + return hash.String, nil +} + // ClearDirtyIssues removes all entries from the dirty_issues table // This should be called after a successful JSONL export // @@ -128,16 +171,16 @@ func (s *SQLiteStorage) GetDirtyIssueCount(ctx context.Context) (int, error) { // markIssuesDirtyTx marks multiple issues as dirty within an existing transaction // This is a helper for operations that need to mark issues dirty as part of a larger transaction -func markIssuesDirtyTx(ctx context.Context, tx *sql.Tx, issueIDs []string) error { +func markIssuesDirtyTx(ctx context.Context, tx *sql.Tx, store *SQLiteStorage, issueIDs []string) error { if len(issueIDs) == 0 { return nil } now := time.Now() stmt, err := tx.PrepareContext(ctx, ` - INSERT INTO dirty_issues (issue_id, marked_at) - VALUES (?, ?) - ON CONFLICT (issue_id) DO UPDATE SET marked_at = excluded.marked_at + INSERT INTO dirty_issues (issue_id, marked_at, content_hash) + VALUES (?, ?, ?) + ON CONFLICT (issue_id) DO UPDATE SET marked_at = excluded.marked_at, content_hash = excluded.content_hash `) if err != nil { return fmt.Errorf("failed to prepare dirty statement: %w", err) @@ -145,7 +188,18 @@ func markIssuesDirtyTx(ctx context.Context, tx *sql.Tx, issueIDs []string) error defer func() { _ = stmt.Close() }() for _, issueID := range issueIDs { - if _, err := stmt.ExecContext(ctx, issueID, now); err != nil { + // Fetch issue to compute content hash + issue, err := store.GetIssue(ctx, issueID) + if err != nil { + return fmt.Errorf("failed to get issue %s: %w", issueID, err) + } + + hash, err := computeIssueContentHash(issue) + if err != nil { + return fmt.Errorf("failed to compute hash for issue %s: %w", issueID, err) + } + + if _, err := stmt.ExecContext(ctx, issueID, now, hash); err != nil { return fmt.Errorf("failed to mark issue %s dirty: %w", issueID, err) } } diff --git a/internal/storage/sqlite/hash.go b/internal/storage/sqlite/hash.go new file mode 100644 index 00000000..70c1acdb --- /dev/null +++ b/internal/storage/sqlite/hash.go @@ -0,0 +1,35 @@ +package sqlite + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "time" + + "github.com/steveyegge/beads/internal/types" +) + +// computeIssueContentHash computes a SHA256 hash of an issue's content, excluding timestamps. +// This is used for detecting timestamp-only changes during export deduplication. +func computeIssueContentHash(issue *types.Issue) (string, error) { + // Clone issue and zero out timestamps to exclude them from hash + normalized := *issue + normalized.CreatedAt = time.Time{} + normalized.UpdatedAt = time.Time{} + + // Also zero out ClosedAt if present + if normalized.ClosedAt != nil { + zeroTime := time.Time{} + normalized.ClosedAt = &zeroTime + } + + // Serialize to JSON + data, err := json.Marshal(normalized) + if err != nil { + return "", err + } + + // SHA256 hash + hash := sha256.Sum256(data) + return hex.EncodeToString(hash[:]), nil +} diff --git a/internal/storage/sqlite/schema.go b/internal/storage/sqlite/schema.go index d390e101..8905f96a 100644 --- a/internal/storage/sqlite/schema.go +++ b/internal/storage/sqlite/schema.go @@ -120,6 +120,14 @@ CREATE TABLE IF NOT EXISTS dirty_issues ( CREATE INDEX IF NOT EXISTS idx_dirty_issues_marked_at ON dirty_issues(marked_at); +-- Tracks content hash of last export for each issue (for timestamp-only dedup, bd-164) +CREATE TABLE IF NOT EXISTS export_hashes ( + issue_id TEXT PRIMARY KEY, + content_hash TEXT NOT NULL, + exported_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (issue_id) REFERENCES issues(id) ON DELETE CASCADE +); + -- Issue counters table (for atomic ID generation) CREATE TABLE IF NOT EXISTS issue_counters ( prefix TEXT PRIMARY KEY, diff --git a/internal/storage/sqlite/sqlite.go b/internal/storage/sqlite/sqlite.go index 534f28e7..285c6e5a 100644 --- a/internal/storage/sqlite/sqlite.go +++ b/internal/storage/sqlite/sqlite.go @@ -144,6 +144,7 @@ func migrateDirtyIssuesTable(db *sql.DB) error { CREATE TABLE dirty_issues ( issue_id TEXT PRIMARY KEY, marked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + content_hash TEXT, FOREIGN KEY (issue_id) REFERENCES issues(id) ON DELETE CASCADE ); CREATE INDEX idx_dirty_issues_marked_at ON dirty_issues(marked_at); @@ -159,7 +160,25 @@ func migrateDirtyIssuesTable(db *sql.DB) error { return fmt.Errorf("failed to check for dirty_issues table: %w", err) } - // Table exists, no migration needed + // Table exists, check if content_hash column exists (migration for bd-164) + var hasContentHash bool + err = db.QueryRow(` + SELECT COUNT(*) > 0 FROM pragma_table_info('dirty_issues') + WHERE name = 'content_hash' + `).Scan(&hasContentHash) + + if err != nil { + return fmt.Errorf("failed to check for content_hash column: %w", err) + } + + if !hasContentHash { + // Add content_hash column to existing table + _, err = db.Exec(`ALTER TABLE dirty_issues ADD COLUMN content_hash TEXT`) + if err != nil { + return fmt.Errorf("failed to add content_hash column: %w", err) + } + } + return nil } diff --git a/internal/storage/storage.go b/internal/storage/storage.go index 5cf3d0b3..25a06dfc 100644 --- a/internal/storage/storage.go +++ b/internal/storage/storage.go @@ -52,6 +52,7 @@ type Storage interface { // Dirty tracking (for incremental JSONL export) GetDirtyIssues(ctx context.Context) ([]string, error) + GetDirtyIssueHash(ctx context.Context, issueID string) (string, error) // For timestamp-only dedup (bd-164) ClearDirtyIssues(ctx context.Context) error // WARNING: Race condition (bd-52), use ClearDirtyIssuesByID ClearDirtyIssuesByID(ctx context.Context, issueIDs []string) error