feat: Add sandbox escape hatches for daemon lock issues (GH #353 Phase 1)
Implements three quick fixes for users stuck in sandboxed environments (e.g., Codex) where daemon cannot be stopped: 1. **--force flag for bd import** - Forces metadata update even when DB is synced with JSONL - Fixes stuck state caused by stale daemon cache - Shows: "Metadata updated (database already in sync with JSONL)" 2. **--allow-stale global flag** - Emergency escape hatch to bypass staleness check - Shows warning: "⚠️ Staleness check skipped (--allow-stale)" - Allows operations on potentially stale data 3. **Improved error message** - Added sandbox-specific guidance to staleness error - Suggests --sandbox, --force, and --allow-stale flags - Provides clear fix steps for different scenarios Also fixed: - Removed unused import in cmd/bd/duplicates_test.go Follow-up work filed: - bd-u3t: Phase 2 - Sandbox auto-detection - bd-e0o: Phase 3 - Daemon robustness enhancements - bd-9nw: Documentation updates Fixes #353 (Phase 1) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -2,7 +2,6 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"path/filepath"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/steveyegge/beads/internal/types"
|
"github.com/steveyegge/beads/internal/types"
|
||||||
|
|||||||
@@ -69,6 +69,7 @@ NOTE: Import requires direct database access and does not work with daemon mode.
|
|||||||
dedupeAfter, _ := cmd.Flags().GetBool("dedupe-after")
|
dedupeAfter, _ := cmd.Flags().GetBool("dedupe-after")
|
||||||
clearDuplicateExternalRefs, _ := cmd.Flags().GetBool("clear-duplicate-external-refs")
|
clearDuplicateExternalRefs, _ := cmd.Flags().GetBool("clear-duplicate-external-refs")
|
||||||
orphanHandling, _ := cmd.Flags().GetString("orphan-handling")
|
orphanHandling, _ := cmd.Flags().GetString("orphan-handling")
|
||||||
|
force, _ := cmd.Flags().GetBool("force")
|
||||||
|
|
||||||
// Open input
|
// Open input
|
||||||
in := os.Stdin
|
in := os.Stdin
|
||||||
@@ -309,7 +310,8 @@ NOTE: Import requires direct database access and does not work with daemon mode.
|
|||||||
|
|
||||||
// Update last_import_hash metadata to enable content-based staleness detection (bd-khnb fix)
|
// Update last_import_hash metadata to enable content-based staleness detection (bd-khnb fix)
|
||||||
// This prevents git operations from resurrecting deleted issues by comparing content instead of mtime
|
// This prevents git operations from resurrecting deleted issues by comparing content instead of mtime
|
||||||
if input != "" {
|
// When --force is true, ALWAYS update metadata even if no changes were made
|
||||||
|
if input != "" && (result.Created > 0 || result.Updated > 0 || len(result.IDMapping) > 0 || force) {
|
||||||
if currentHash, err := computeJSONLHash(input); err == nil {
|
if currentHash, err := computeJSONLHash(input); err == nil {
|
||||||
if err := store.SetMetadata(ctx, "last_import_hash", currentHash); err != nil {
|
if err := store.SetMetadata(ctx, "last_import_hash", currentHash); err != nil {
|
||||||
// Non-fatal warning: Metadata update failures are intentionally non-fatal to prevent blocking
|
// Non-fatal warning: Metadata update failures are intentionally non-fatal to prevent blocking
|
||||||
@@ -358,6 +360,11 @@ NOTE: Import requires direct database access and does not work with daemon mode.
|
|||||||
}
|
}
|
||||||
fmt.Fprintf(os.Stderr, "\n")
|
fmt.Fprintf(os.Stderr, "\n")
|
||||||
|
|
||||||
|
// Print force message if metadata was updated despite no changes
|
||||||
|
if force && result.Created == 0 && result.Updated == 0 && len(result.IDMapping) == 0 {
|
||||||
|
fmt.Fprintf(os.Stderr, "Metadata updated (database already in sync with JSONL)\n")
|
||||||
|
}
|
||||||
|
|
||||||
// Run duplicate detection if requested
|
// Run duplicate detection if requested
|
||||||
if dedupeAfter {
|
if dedupeAfter {
|
||||||
fmt.Fprintf(os.Stderr, "\n=== Post-Import Duplicate Detection ===\n")
|
fmt.Fprintf(os.Stderr, "\n=== Post-Import Duplicate Detection ===\n")
|
||||||
@@ -697,6 +704,7 @@ func init() {
|
|||||||
importCmd.Flags().Bool("rename-on-import", false, "Rename imported issues to match database prefix (updates all references)")
|
importCmd.Flags().Bool("rename-on-import", false, "Rename imported issues to match database prefix (updates all references)")
|
||||||
importCmd.Flags().Bool("clear-duplicate-external-refs", false, "Clear duplicate external_ref values (keeps first occurrence)")
|
importCmd.Flags().Bool("clear-duplicate-external-refs", false, "Clear duplicate external_ref values (keeps first occurrence)")
|
||||||
importCmd.Flags().String("orphan-handling", "", "How to handle missing parent issues: strict/resurrect/skip/allow (default: use config or 'allow')")
|
importCmd.Flags().String("orphan-handling", "", "How to handle missing parent issues: strict/resurrect/skip/allow (default: use config or 'allow')")
|
||||||
|
importCmd.Flags().Bool("force", false, "Force metadata update even when database is already in sync with JSONL")
|
||||||
importCmd.Flags().BoolVar(&jsonOutput, "json", false, "Output import statistics in JSON format")
|
importCmd.Flags().BoolVar(&jsonOutput, "json", false, "Output import statistics in JSON format")
|
||||||
rootCmd.AddCommand(importCmd)
|
rootCmd.AddCommand(importCmd)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -89,6 +89,7 @@ var (
|
|||||||
noAutoFlush bool
|
noAutoFlush bool
|
||||||
noAutoImport bool
|
noAutoImport bool
|
||||||
sandboxMode bool
|
sandboxMode bool
|
||||||
|
allowStale bool // Use --allow-stale: skip staleness check (emergency escape hatch)
|
||||||
noDb bool // Use --no-db mode: load from JSONL, write back after each command
|
noDb bool // Use --no-db mode: load from JSONL, write back after each command
|
||||||
profileEnabled bool
|
profileEnabled bool
|
||||||
profileFile *os.File
|
profileFile *os.File
|
||||||
@@ -109,6 +110,7 @@ func init() {
|
|||||||
rootCmd.PersistentFlags().BoolVar(&noAutoFlush, "no-auto-flush", false, "Disable automatic JSONL sync after CRUD operations")
|
rootCmd.PersistentFlags().BoolVar(&noAutoFlush, "no-auto-flush", false, "Disable automatic JSONL sync after CRUD operations")
|
||||||
rootCmd.PersistentFlags().BoolVar(&noAutoImport, "no-auto-import", false, "Disable automatic JSONL import when newer than DB")
|
rootCmd.PersistentFlags().BoolVar(&noAutoImport, "no-auto-import", false, "Disable automatic JSONL import when newer than DB")
|
||||||
rootCmd.PersistentFlags().BoolVar(&sandboxMode, "sandbox", false, "Sandbox mode: disables daemon and auto-sync")
|
rootCmd.PersistentFlags().BoolVar(&sandboxMode, "sandbox", false, "Sandbox mode: disables daemon and auto-sync")
|
||||||
|
rootCmd.PersistentFlags().BoolVar(&allowStale, "allow-stale", false, "Allow operations on potentially stale data (skip staleness check)")
|
||||||
rootCmd.PersistentFlags().BoolVar(&noDb, "no-db", false, "Use no-db mode: load from JSONL, no SQLite")
|
rootCmd.PersistentFlags().BoolVar(&noDb, "no-db", false, "Use no-db mode: load from JSONL, no SQLite")
|
||||||
rootCmd.PersistentFlags().BoolVar(&profileEnabled, "profile", false, "Generate CPU profile for performance analysis")
|
rootCmd.PersistentFlags().BoolVar(&profileEnabled, "profile", false, "Generate CPU profile for performance analysis")
|
||||||
|
|
||||||
|
|||||||
@@ -18,6 +18,11 @@ import (
|
|||||||
// Implements bd-2q6d: All read operations should validate database freshness.
|
// Implements bd-2q6d: All read operations should validate database freshness.
|
||||||
// Implements bd-c4rq: Daemon check moved to call sites to avoid function call overhead.
|
// Implements bd-c4rq: Daemon check moved to call sites to avoid function call overhead.
|
||||||
func ensureDatabaseFresh(ctx context.Context) error {
|
func ensureDatabaseFresh(ctx context.Context) error {
|
||||||
|
if allowStale {
|
||||||
|
fmt.Fprintf(os.Stderr, "⚠️ Staleness check skipped (--allow-stale), data may be out of sync\n")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// Skip check if no storage available (shouldn't happen in practice)
|
// Skip check if no storage available (shouldn't happen in practice)
|
||||||
if store == nil {
|
if store == nil {
|
||||||
return nil
|
return nil
|
||||||
@@ -43,7 +48,11 @@ func ensureDatabaseFresh(ctx context.Context) error {
|
|||||||
"The JSONL file has been updated (e.g., after 'git pull') but the database\n"+
|
"The JSONL file has been updated (e.g., after 'git pull') but the database\n"+
|
||||||
"hasn't been imported yet. This would cause you to see stale/incomplete data.\n\n"+
|
"hasn't been imported yet. This would cause you to see stale/incomplete data.\n\n"+
|
||||||
"To fix:\n"+
|
"To fix:\n"+
|
||||||
" bd import # Import JSONL updates to database\n\n"+
|
" bd import -i .beads/beads.jsonl # Import JSONL updates to database\n\n"+
|
||||||
|
"If in a sandboxed environment (e.g., Codex) where daemon can't be stopped:\n"+
|
||||||
|
" bd --sandbox ready # Use direct mode (no daemon)\n"+
|
||||||
|
" bd import --force # Force metadata update\n"+
|
||||||
|
" bd ready --allow-stale # Skip staleness check (use with caution)\n\n"+
|
||||||
"Or use daemon mode (auto-imports on every operation):\n"+
|
"Or use daemon mode (auto-imports on every operation):\n"+
|
||||||
" bd daemon start\n"+
|
" bd daemon start\n"+
|
||||||
" bd <command> # Will auto-import before executing",
|
" bd <command> # Will auto-import before executing",
|
||||||
|
|||||||
302
docs/GH353_INVESTIGATION.md
Normal file
302
docs/GH353_INVESTIGATION.md
Normal file
@@ -0,0 +1,302 @@
|
|||||||
|
# Investigation: GH #353 - Daemon Locking Issues in Codex Sandbox
|
||||||
|
|
||||||
|
## Problem Summary
|
||||||
|
|
||||||
|
When running `bd` inside the Codex sandbox (macOS host), users encounter persistent "Database out of sync with JSONL" errors that cannot be resolved through normal means (`bd import`). The root cause is a daemon process that the sandbox cannot signal or kill, creating a deadlock situation.
|
||||||
|
|
||||||
|
## Root Cause Analysis
|
||||||
|
|
||||||
|
### The Daemon Locking Mechanism
|
||||||
|
|
||||||
|
The daemon uses three mechanisms to claim a database:
|
||||||
|
|
||||||
|
1. **File lock (`flock`)** on `.beads/daemon.lock` - exclusive lock held while daemon is running
|
||||||
|
2. **PID file** at `.beads/daemon.pid` - contains daemon process ID (Windows compatibility)
|
||||||
|
3. **Lock metadata** in `daemon.lock` - JSON containing PID, database path, version, start time
|
||||||
|
|
||||||
|
**Source:** `cmd/bd/daemon_lock.go`
|
||||||
|
|
||||||
|
### Process Verification Issue
|
||||||
|
|
||||||
|
On Unix systems, `isProcessRunning()` uses `syscall.Kill(pid, 0)` to check if a process exists. In sandboxed environments:
|
||||||
|
|
||||||
|
- The daemon PID exists in the lock file
|
||||||
|
- `syscall.Kill(pid, 0)` returns EPERM (operation not permitted)
|
||||||
|
- The CLI can't verify if the daemon is actually running
|
||||||
|
- The CLI can't send signals to stop the daemon
|
||||||
|
|
||||||
|
**Source:** `cmd/bd/daemon_unix.go:26-28`
|
||||||
|
|
||||||
|
### Staleness Check Flow
|
||||||
|
|
||||||
|
When running `bd ready` or other read commands:
|
||||||
|
|
||||||
|
1. **With daemon connected:**
|
||||||
|
- Command → Daemon RPC → `checkAndAutoImportIfStale()`
|
||||||
|
- Daemon checks JSONL mtime vs `last_import_time` metadata
|
||||||
|
- Daemon auto-imports if stale (with safeguards)
|
||||||
|
- **Source:** `internal/rpc/server_export_import_auto.go:171-303`
|
||||||
|
|
||||||
|
2. **Without daemon (direct mode):**
|
||||||
|
- Command → `ensureDatabaseFresh(ctx)` check
|
||||||
|
- Compares JSONL mtime vs `last_import_time` metadata
|
||||||
|
- **Refuses to proceed** if stale, shows error message
|
||||||
|
- **Source:** `cmd/bd/staleness.go:20-51`
|
||||||
|
|
||||||
|
### The Deadlock Scenario
|
||||||
|
|
||||||
|
1. Daemon is running outside sandbox with database lock
|
||||||
|
2. User (in sandbox) runs `bd ready`
|
||||||
|
3. CLI tries to connect to daemon → connection fails or daemon is unreachable
|
||||||
|
4. CLI falls back to direct mode
|
||||||
|
5. Direct mode checks staleness → JSONL is newer than metadata
|
||||||
|
6. Error: "Database out of sync with JSONL. Run 'bd import' first."
|
||||||
|
7. User runs `bd import -i .beads/beads.jsonl`
|
||||||
|
8. Import updates metadata in database file
|
||||||
|
9. **But daemon still running with OLD metadata cached in memory**
|
||||||
|
10. User runs `bd ready` again → CLI connects to daemon
|
||||||
|
11. Daemon checks staleness using **cached metadata** → still stale!
|
||||||
|
12. **Infinite loop:** Can't fix because can't restart daemon
|
||||||
|
|
||||||
|
### Why `--no-daemon` Doesn't Always Work
|
||||||
|
|
||||||
|
The `--no-daemon` flag should work by setting `daemonClient = nil` and skipping daemon connection (**source:** `cmd/bd/main.go:287-289`). However:
|
||||||
|
|
||||||
|
1. If JSONL is genuinely newer than database (e.g., after `git pull`), the staleness check in direct mode will still fail
|
||||||
|
2. If the user doesn't specify `--no-daemon` consistently, the CLI will reconnect to the stale daemon
|
||||||
|
3. The daemon may still hold file locks that interfere with direct operations
|
||||||
|
|
||||||
|
## Existing Workarounds
|
||||||
|
|
||||||
|
### The `--sandbox` Flag
|
||||||
|
|
||||||
|
Already exists! Sets:
|
||||||
|
- `noDaemon = true` (skip daemon)
|
||||||
|
- `noAutoFlush = true` (skip auto-flush)
|
||||||
|
- `noAutoImport = true` (skip auto-import)
|
||||||
|
|
||||||
|
**Source:** `cmd/bd/main.go:201-206`
|
||||||
|
|
||||||
|
**Issue:** Still runs staleness check in direct mode, which fails if JSONL is actually newer.
|
||||||
|
|
||||||
|
## Proposed Solutions
|
||||||
|
|
||||||
|
### Solution 1: Force-Import Flag (Quick Fix) ⭐ **Recommended**
|
||||||
|
|
||||||
|
Add `--force` flag to `bd import` that:
|
||||||
|
- Updates `last_import_time` and `last_import_hash` metadata even when 0 issues imported
|
||||||
|
- Explicitly touches database file to update mtime
|
||||||
|
- Prints clear message: "Metadata updated (database already in sync)"
|
||||||
|
|
||||||
|
**Pros:**
|
||||||
|
- Minimal code change
|
||||||
|
- Solves immediate problem
|
||||||
|
- User can manually fix stuck state
|
||||||
|
|
||||||
|
**Cons:**
|
||||||
|
- Requires user to know about --force flag
|
||||||
|
- Doesn't prevent the problem from occurring
|
||||||
|
|
||||||
|
**Implementation location:** `cmd/bd/import.go` around line 349
|
||||||
|
|
||||||
|
### Solution 2: Skip-Staleness Flag (Escape Hatch) ⭐ **Recommended**
|
||||||
|
|
||||||
|
Add `--allow-stale` or `--no-staleness-check` global flag that:
|
||||||
|
- Bypasses `ensureDatabaseFresh()` check entirely
|
||||||
|
- Allows operations on potentially stale data
|
||||||
|
- Prints warning: "⚠️ Staleness check skipped, data may be out of sync"
|
||||||
|
|
||||||
|
**Pros:**
|
||||||
|
- Emergency escape hatch when stuck
|
||||||
|
- Minimal invasive change
|
||||||
|
- Works with `--sandbox` mode
|
||||||
|
|
||||||
|
**Cons:**
|
||||||
|
- User can accidentally work with stale data
|
||||||
|
- Should be well-documented as last resort
|
||||||
|
|
||||||
|
**Implementation location:** `cmd/bd/staleness.go:20` and callers
|
||||||
|
|
||||||
|
### Solution 3: Sandbox Detection (Automatic) ⭐⭐ **Best Long-term**
|
||||||
|
|
||||||
|
Auto-detect sandbox environment and adjust behavior:
|
||||||
|
|
||||||
|
```go
|
||||||
|
func isSandboxed() bool {
|
||||||
|
// Try to signal a known process (e.g., our own parent)
|
||||||
|
// If we get EPERM, we're likely sandboxed
|
||||||
|
if syscall.Kill(os.Getppid(), 0) != nil {
|
||||||
|
if err == syscall.EPERM {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// In PersistentPreRun:
|
||||||
|
if isSandboxed() {
|
||||||
|
sandboxMode = true // Auto-enable sandbox mode
|
||||||
|
fmt.Fprintf(os.Stderr, "ℹ️ Sandbox detected, using direct mode\n")
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Additionally, when daemon connection fails with permission errors:
|
||||||
|
- Automatically set `noDaemon = true` for subsequent operations
|
||||||
|
- Skip daemon health checks that require process signals
|
||||||
|
|
||||||
|
**Pros:**
|
||||||
|
- Zero configuration for users
|
||||||
|
- Prevents the problem entirely
|
||||||
|
- Graceful degradation
|
||||||
|
|
||||||
|
**Cons:**
|
||||||
|
- More complex heuristic
|
||||||
|
- May have false positives
|
||||||
|
- Requires testing in various environments
|
||||||
|
|
||||||
|
**Implementation locations:**
|
||||||
|
- `cmd/bd/main.go` (detection)
|
||||||
|
- `cmd/bd/daemon_unix.go` (process checks)
|
||||||
|
|
||||||
|
### Solution 4: Better Daemon Health Checks (Robust)
|
||||||
|
|
||||||
|
Enhance daemon health check to detect unreachable daemons:
|
||||||
|
|
||||||
|
1. When `daemonClient.Health()` fails, check why:
|
||||||
|
- Connection refused → daemon not running
|
||||||
|
- Timeout → daemon unreachable (sandbox?)
|
||||||
|
- Permission denied → sandbox detected
|
||||||
|
|
||||||
|
2. On sandbox detection, automatically:
|
||||||
|
- Set `noDaemon = true`
|
||||||
|
- Clear cached daemon client
|
||||||
|
- Proceed in direct mode
|
||||||
|
|
||||||
|
**Pros:**
|
||||||
|
- Automatic recovery
|
||||||
|
- Better error messages
|
||||||
|
- Handles edge cases
|
||||||
|
|
||||||
|
**Cons:**
|
||||||
|
- Requires careful timeout tuning
|
||||||
|
- More complex state management
|
||||||
|
|
||||||
|
**Implementation location:** `cmd/bd/main.go` around lines 300-367
|
||||||
|
|
||||||
|
### Solution 5: Daemon Metadata Refresh (Prevents Staleness)
|
||||||
|
|
||||||
|
Make daemon periodically refresh metadata from disk:
|
||||||
|
|
||||||
|
```go
|
||||||
|
// In daemon event loop, check metadata every N seconds
|
||||||
|
if time.Since(lastMetadataCheck) > 5*time.Second {
|
||||||
|
lastImportTime, _ := store.GetMetadata(ctx, "last_import_time")
|
||||||
|
// Update cached value
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Pros:**
|
||||||
|
- Daemon picks up external import operations
|
||||||
|
- Reduces stale metadata issues
|
||||||
|
- Works for other scenarios too
|
||||||
|
|
||||||
|
**Cons:**
|
||||||
|
- Doesn't solve sandbox permission issues
|
||||||
|
- Adds I/O overhead
|
||||||
|
- Still requires daemon restart eventually
|
||||||
|
|
||||||
|
**Implementation location:** `cmd/bd/daemon_event_loop.go`
|
||||||
|
|
||||||
|
## Recommended Implementation Plan
|
||||||
|
|
||||||
|
### Phase 1: Immediate Relief (1-2 hours)
|
||||||
|
1. ✅ Add `--force` flag to `bd import` (Solution 1)
|
||||||
|
2. ✅ Add `--allow-stale` global flag (Solution 2)
|
||||||
|
3. ✅ Update error message to suggest these flags
|
||||||
|
|
||||||
|
### Phase 2: Better UX (3-4 hours)
|
||||||
|
1. ✅ Implement sandbox detection heuristic (Solution 3)
|
||||||
|
2. ✅ Auto-enable `--sandbox` mode when detected
|
||||||
|
3. ✅ Update docs with sandbox troubleshooting
|
||||||
|
|
||||||
|
### Phase 3: Robustness (5-6 hours)
|
||||||
|
1. Enhance daemon health checks (Solution 4)
|
||||||
|
2. Add daemon metadata refresh (Solution 5)
|
||||||
|
3. Comprehensive testing in sandbox environments
|
||||||
|
|
||||||
|
## Testing Strategy
|
||||||
|
|
||||||
|
### Manual Testing in Codex Sandbox
|
||||||
|
1. Start daemon outside sandbox
|
||||||
|
2. Run `bd ready` inside sandbox → should detect sandbox
|
||||||
|
3. Run `bd import --force` → should update metadata
|
||||||
|
4. Run `bd ready --allow-stale` → should work despite staleness
|
||||||
|
|
||||||
|
### Automated Testing
|
||||||
|
1. Mock sandboxed environment (permission denied on signals)
|
||||||
|
2. Test daemon connection failure scenarios
|
||||||
|
3. Test metadata update in import with 0 changes
|
||||||
|
4. Test staleness check bypass flag
|
||||||
|
|
||||||
|
## Documentation Updates Needed
|
||||||
|
|
||||||
|
1. **TROUBLESHOOTING.md** - Add sandbox section with:
|
||||||
|
- Symptoms of daemon lock issues
|
||||||
|
- `--sandbox` flag usage
|
||||||
|
- `--force` and `--allow-stale` as escape hatches
|
||||||
|
|
||||||
|
2. **CLI_REFERENCE.md** - Document new flags:
|
||||||
|
- `--allow-stale` / `--no-staleness-check`
|
||||||
|
- `bd import --force`
|
||||||
|
|
||||||
|
3. **Error message** in `staleness.go` - Add:
|
||||||
|
```
|
||||||
|
If you're in a sandboxed environment (e.g., Codex):
|
||||||
|
bd --sandbox ready
|
||||||
|
bd import --force -i .beads/beads.jsonl
|
||||||
|
```
|
||||||
|
|
||||||
|
## Files to Modify
|
||||||
|
|
||||||
|
### Critical Path (Phase 1)
|
||||||
|
- [ ] `cmd/bd/import.go` - Add --force flag
|
||||||
|
- [ ] `cmd/bd/staleness.go` - Add staleness bypass, update error message
|
||||||
|
- [ ] `cmd/bd/main.go` - Add --allow-stale flag
|
||||||
|
|
||||||
|
### Enhancement (Phase 2-3)
|
||||||
|
- [ ] `cmd/bd/main.go` - Sandbox detection
|
||||||
|
- [ ] `cmd/bd/daemon_unix.go` - Permission-aware process checks
|
||||||
|
- [ ] `cmd/bd/daemon_event_loop.go` - Metadata refresh
|
||||||
|
- [ ] `internal/rpc/server_export_import_auto.go` - Better import handling
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
- [ ] `docs/TROUBLESHOOTING.md`
|
||||||
|
- [ ] `docs/CLI_REFERENCE.md`
|
||||||
|
- [ ] Issue #353 comment with workaround
|
||||||
|
|
||||||
|
## Open Questions
|
||||||
|
|
||||||
|
1. Should `--sandbox` auto-detect, or require explicit flag?
|
||||||
|
- **Recommendation:** Start with explicit, add auto-detect in Phase 2
|
||||||
|
|
||||||
|
2. Should `--allow-stale` be per-command or global?
|
||||||
|
- **Recommendation:** Global flag (less repetition)
|
||||||
|
|
||||||
|
3. What should happen to daemon lock files when daemon is unreachable?
|
||||||
|
- **Recommendation:** Leave them (don't force-break locks), use direct mode
|
||||||
|
|
||||||
|
4. Should we add a `--force-direct` that ignores daemon locks entirely?
|
||||||
|
- **Recommendation:** Not needed if sandbox detection works well
|
||||||
|
|
||||||
|
## Success Metrics
|
||||||
|
|
||||||
|
- Users in Codex can run `bd ready` without errors
|
||||||
|
- No false positives in sandbox detection
|
||||||
|
- Clear error messages guide users to solutions
|
||||||
|
- `bd import --force` always updates metadata
|
||||||
|
- `--sandbox` mode works reliably
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Investigation completed:** 2025-11-21
|
||||||
|
**Next steps:** Implement Phase 1 solutions
|
||||||
128
docs/GH353_NEXT_SESSION.md
Normal file
128
docs/GH353_NEXT_SESSION.md
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
# Next Session Prompt: Implement GH #353 Fixes
|
||||||
|
|
||||||
|
## Context
|
||||||
|
We've investigated GH #353 (daemon locking issues in Codex sandbox). Full analysis in `docs/GH353_INVESTIGATION.md`.
|
||||||
|
|
||||||
|
**TL;DR:** Users in sandboxed environments (Codex) get stuck with "Database out of sync" errors because:
|
||||||
|
1. Running daemon has cached metadata
|
||||||
|
2. `bd import` updates database but daemon never sees it
|
||||||
|
3. Sandbox can't signal/kill the daemon
|
||||||
|
4. User is stuck in infinite loop
|
||||||
|
|
||||||
|
## Task: Implement Phase 1 Solutions
|
||||||
|
|
||||||
|
Implement three quick fixes that give users escape hatches:
|
||||||
|
|
||||||
|
### 1. Add `--force` flag to `bd import`
|
||||||
|
**File:** `cmd/bd/import.go`
|
||||||
|
|
||||||
|
**What to do:**
|
||||||
|
- Add `--force` flag to importCmd.Flags() (around line 692)
|
||||||
|
- When `--force` is true, ALWAYS update metadata (lines 310-346) even if `created == 0 && updated == 0`
|
||||||
|
- Print message: "Metadata updated (database already in sync with JSONL)"
|
||||||
|
- Ensure `TouchDatabaseFile()` is called to update mtime
|
||||||
|
|
||||||
|
**Why:** Allows users to manually force metadata sync when stuck
|
||||||
|
|
||||||
|
### 2. Add `--allow-stale` global flag
|
||||||
|
**File:** `cmd/bd/main.go`
|
||||||
|
|
||||||
|
**What to do:**
|
||||||
|
- Add global var: `allowStale bool`
|
||||||
|
- Add to rootCmd.PersistentFlags(): `--allow-stale` (around line 111)
|
||||||
|
- Description: "Allow operations on potentially stale data (skip staleness check)"
|
||||||
|
|
||||||
|
**File:** `cmd/bd/staleness.go`
|
||||||
|
|
||||||
|
**What to do:**
|
||||||
|
- At top of `ensureDatabaseFresh()` function (line 20), add:
|
||||||
|
```go
|
||||||
|
if allowStale {
|
||||||
|
fmt.Fprintf(os.Stderr, "⚠️ Staleness check skipped (--allow-stale), data may be out of sync\n")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Why:** Emergency escape hatch when staleness check blocks operations
|
||||||
|
|
||||||
|
### 3. Improve error message in staleness.go
|
||||||
|
**File:** `cmd/bd/staleness.go`
|
||||||
|
|
||||||
|
**What to do:**
|
||||||
|
- Update the error message (lines 41-50) to add sandbox guidance:
|
||||||
|
```go
|
||||||
|
return fmt.Errorf(
|
||||||
|
"Database out of sync with JSONL. Run 'bd import' first.\n\n"+
|
||||||
|
"The JSONL file has been updated (e.g., after 'git pull') but the database\n"+
|
||||||
|
"hasn't been imported yet. This would cause you to see stale/incomplete data.\n\n"+
|
||||||
|
"To fix:\n"+
|
||||||
|
" bd import -i .beads/beads.jsonl # Import JSONL updates to database\n\n"+
|
||||||
|
"If in a sandboxed environment (e.g., Codex) where daemon can't be stopped:\n"+
|
||||||
|
" bd --sandbox ready # Use direct mode (no daemon)\n"+
|
||||||
|
" bd import --force # Force metadata update\n"+
|
||||||
|
" bd ready --allow-stale # Skip staleness check (use with caution)\n\n"+
|
||||||
|
"Or use daemon mode (auto-imports on every operation):\n"+
|
||||||
|
" bd daemon start\n"+
|
||||||
|
" bd <command> # Will auto-import before executing",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Why:** Guides users to the right solution based on their environment
|
||||||
|
|
||||||
|
## Testing Checklist
|
||||||
|
|
||||||
|
After implementation:
|
||||||
|
|
||||||
|
- [ ] `bd import --force -i .beads/beads.jsonl` updates metadata even with 0 changes
|
||||||
|
- [ ] `bd import --force` without `-i` flag shows appropriate error (needs input file)
|
||||||
|
- [ ] `bd ready --allow-stale` bypasses staleness check and shows warning
|
||||||
|
- [ ] Error message displays correctly and includes sandbox guidance
|
||||||
|
- [ ] `--sandbox` mode still works as before
|
||||||
|
- [ ] Flags appear in `bd --help` and `bd import --help`
|
||||||
|
|
||||||
|
## Quick Start Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Review the investigation
|
||||||
|
cat docs/GH353_INVESTIGATION.md
|
||||||
|
|
||||||
|
# 2. Check current import.go implementation
|
||||||
|
grep -A 5 "func init()" cmd/bd/import.go
|
||||||
|
|
||||||
|
# 3. Check current staleness.go
|
||||||
|
head -60 cmd/bd/staleness.go
|
||||||
|
|
||||||
|
# 4. Run existing tests to establish baseline
|
||||||
|
go test ./cmd/bd/... -run TestImport
|
||||||
|
go test ./cmd/bd/... -run TestStaleness
|
||||||
|
|
||||||
|
# 5. Implement changes (see sections above)
|
||||||
|
|
||||||
|
# 6. Test manually
|
||||||
|
bd import --help | grep force
|
||||||
|
bd --help | grep allow-stale
|
||||||
|
```
|
||||||
|
|
||||||
|
## Expected Outcome
|
||||||
|
|
||||||
|
Users stuck in Codex sandbox can:
|
||||||
|
1. Run `bd import --force -i .beads/beads.jsonl` to fix metadata
|
||||||
|
2. Run `bd --sandbox ready` to use direct mode
|
||||||
|
3. Run `bd ready --allow-stale` as last resort
|
||||||
|
4. See helpful error message explaining their options
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- **Investigation:** `docs/GH353_INVESTIGATION.md`
|
||||||
|
- **Issue:** https://github.com/steveyegge/beads/issues/353
|
||||||
|
- **Key files:**
|
||||||
|
- `cmd/bd/import.go` (import command)
|
||||||
|
- `cmd/bd/staleness.go` (staleness check)
|
||||||
|
- `cmd/bd/main.go` (global flags)
|
||||||
|
|
||||||
|
## Estimated Time
|
||||||
|
~1-2 hours for implementation + testing
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Ready to implement?** Start with adding the flags, then update the error message, then test thoroughly.
|
||||||
Reference in New Issue
Block a user