Add client self-heal for stale daemon.pid

- When socket missing and lock free, automatically remove stale daemon.pid
- Prevents stale artifacts from accumulating after daemon crashes
- Includes comprehensive test coverage
- Fixes bd-1mzt

Amp-Thread-ID: https://ampcode.com/threads/T-3f606a8a-d591-4412-b994-ea790889a04d
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-11-07 21:21:24 -08:00
parent 00f045a972
commit a236558a7a
2 changed files with 124 additions and 0 deletions

View File

@@ -44,6 +44,8 @@ func TryConnectWithTimeout(socketPath string, dialTimeout time.Duration) (*Clien
running, _ := lockfile.TryDaemonLock(beadsDir)
if !running {
debug.Logf("daemon lock not held and socket missing (no daemon running)")
// Self-heal: clean up stale artifacts when lock is free and socket is missing
cleanupStaleDaemonArtifacts(beadsDir)
return nil, nil
}
}
@@ -333,3 +335,24 @@ func (c *Client) Export(args *ExportArgs) (*Response, error) {
func (c *Client) EpicStatus(args *EpicStatusArgs) (*Response, error) {
return c.Execute(OpEpicStatus, args)
}
// cleanupStaleDaemonArtifacts removes stale daemon.pid file when socket is missing and lock is free.
// This prevents stale artifacts from accumulating after daemon crashes.
// Only removes pid file - lock file is managed by OS (released on process exit).
func cleanupStaleDaemonArtifacts(beadsDir string) {
pidFile := filepath.Join(beadsDir, "daemon.pid")
// Check if pid file exists
if _, err := os.Stat(pidFile); err != nil {
// No pid file to clean up
return
}
// Remove stale pid file
if err := os.Remove(pidFile); err != nil {
debug.Logf("failed to remove stale pid file: %v", err)
return
}
debug.Logf("removed stale daemon.pid file (lock free, socket missing)")
}

View File

@@ -0,0 +1,101 @@
package rpc
import (
"os"
"path/filepath"
"testing"
)
func TestCleanupStaleDaemonArtifacts(t *testing.T) {
// Create temp directory for test
tmpDir := t.TempDir()
beadsDir := filepath.Join(tmpDir, ".beads")
if err := os.MkdirAll(beadsDir, 0755); err != nil {
t.Fatalf("failed to create beads dir: %v", err)
}
pidFile := filepath.Join(beadsDir, "daemon.pid")
// Test 1: No pid file - should not error
t.Run("no_pid_file", func(t *testing.T) {
cleanupStaleDaemonArtifacts(beadsDir)
// Should not panic or error
})
// Test 2: Pid file exists - should be removed
t.Run("removes_pid_file", func(t *testing.T) {
// Create stale pid file
if err := os.WriteFile(pidFile, []byte("12345\n"), 0644); err != nil {
t.Fatalf("failed to create pid file: %v", err)
}
// Verify it exists
if _, err := os.Stat(pidFile); err != nil {
t.Fatalf("pid file should exist before cleanup: %v", err)
}
// Clean up
cleanupStaleDaemonArtifacts(beadsDir)
// Verify it was removed
if _, err := os.Stat(pidFile); err == nil {
t.Errorf("pid file should have been removed")
}
})
// Test 3: Permission denied - should not panic
t.Run("permission_denied", func(t *testing.T) {
// Create pid file
if err := os.WriteFile(pidFile, []byte("12345\n"), 0644); err != nil {
t.Fatalf("failed to create pid file: %v", err)
}
// Make directory read-only (on Unix-like systems)
if err := os.Chmod(beadsDir, 0555); err != nil {
t.Fatalf("failed to make directory read-only: %v", err)
}
defer func() {
// Restore permissions for cleanup
_ = os.Chmod(beadsDir, 0755)
}()
// Should not panic even if removal fails
cleanupStaleDaemonArtifacts(beadsDir)
})
}
func TestTryConnectWithTimeout_SelfHeal(t *testing.T) {
// Create temp directory for test
tmpDir := t.TempDir()
beadsDir := filepath.Join(tmpDir, ".beads")
if err := os.MkdirAll(beadsDir, 0755); err != nil {
t.Fatalf("failed to create beads dir: %v", err)
}
socketPath := filepath.Join(beadsDir, "bd.sock")
pidFile := filepath.Join(beadsDir, "daemon.pid")
// Create stale pid file (no socket, no lock)
if err := os.WriteFile(pidFile, []byte("99999\n"), 0644); err != nil {
t.Fatalf("failed to create pid file: %v", err)
}
// Verify pid file exists
if _, err := os.Stat(pidFile); err != nil {
t.Fatalf("pid file should exist before test: %v", err)
}
// Try to connect (should fail but clean up stale pid file)
client, err := TryConnectWithTimeout(socketPath, 100)
if client != nil {
t.Errorf("expected nil client (no daemon running)")
}
if err != nil {
t.Errorf("expected nil error, got: %v", err)
}
// Verify pid file was cleaned up
if _, err := os.Stat(pidFile); err == nil {
t.Errorf("pid file should have been removed during self-heal")
}
}