From a236558a7add3a2a987e62ceb20c4f7232095bed Mon Sep 17 00:00:00 2001 From: Steve Yegge Date: Fri, 7 Nov 2025 21:21:24 -0800 Subject: [PATCH] Add client self-heal for stale daemon.pid - When socket missing and lock free, automatically remove stale daemon.pid - Prevents stale artifacts from accumulating after daemon crashes - Includes comprehensive test coverage - Fixes bd-1mzt Amp-Thread-ID: https://ampcode.com/threads/T-3f606a8a-d591-4412-b994-ea790889a04d Co-authored-by: Amp --- internal/rpc/client.go | 23 ++++++ internal/rpc/client_selfheal_test.go | 101 +++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 internal/rpc/client_selfheal_test.go diff --git a/internal/rpc/client.go b/internal/rpc/client.go index 81e4b384..5af9bcd0 100644 --- a/internal/rpc/client.go +++ b/internal/rpc/client.go @@ -44,6 +44,8 @@ func TryConnectWithTimeout(socketPath string, dialTimeout time.Duration) (*Clien running, _ := lockfile.TryDaemonLock(beadsDir) if !running { debug.Logf("daemon lock not held and socket missing (no daemon running)") + // Self-heal: clean up stale artifacts when lock is free and socket is missing + cleanupStaleDaemonArtifacts(beadsDir) return nil, nil } } @@ -333,3 +335,24 @@ func (c *Client) Export(args *ExportArgs) (*Response, error) { func (c *Client) EpicStatus(args *EpicStatusArgs) (*Response, error) { return c.Execute(OpEpicStatus, args) } + +// cleanupStaleDaemonArtifacts removes stale daemon.pid file when socket is missing and lock is free. +// This prevents stale artifacts from accumulating after daemon crashes. +// Only removes pid file - lock file is managed by OS (released on process exit). +func cleanupStaleDaemonArtifacts(beadsDir string) { + pidFile := filepath.Join(beadsDir, "daemon.pid") + + // Check if pid file exists + if _, err := os.Stat(pidFile); err != nil { + // No pid file to clean up + return + } + + // Remove stale pid file + if err := os.Remove(pidFile); err != nil { + debug.Logf("failed to remove stale pid file: %v", err) + return + } + + debug.Logf("removed stale daemon.pid file (lock free, socket missing)") +} diff --git a/internal/rpc/client_selfheal_test.go b/internal/rpc/client_selfheal_test.go new file mode 100644 index 00000000..0dad9c1f --- /dev/null +++ b/internal/rpc/client_selfheal_test.go @@ -0,0 +1,101 @@ +package rpc + +import ( + "os" + "path/filepath" + "testing" +) + +func TestCleanupStaleDaemonArtifacts(t *testing.T) { + // Create temp directory for test + tmpDir := t.TempDir() + beadsDir := filepath.Join(tmpDir, ".beads") + if err := os.MkdirAll(beadsDir, 0755); err != nil { + t.Fatalf("failed to create beads dir: %v", err) + } + + pidFile := filepath.Join(beadsDir, "daemon.pid") + + // Test 1: No pid file - should not error + t.Run("no_pid_file", func(t *testing.T) { + cleanupStaleDaemonArtifacts(beadsDir) + // Should not panic or error + }) + + // Test 2: Pid file exists - should be removed + t.Run("removes_pid_file", func(t *testing.T) { + // Create stale pid file + if err := os.WriteFile(pidFile, []byte("12345\n"), 0644); err != nil { + t.Fatalf("failed to create pid file: %v", err) + } + + // Verify it exists + if _, err := os.Stat(pidFile); err != nil { + t.Fatalf("pid file should exist before cleanup: %v", err) + } + + // Clean up + cleanupStaleDaemonArtifacts(beadsDir) + + // Verify it was removed + if _, err := os.Stat(pidFile); err == nil { + t.Errorf("pid file should have been removed") + } + }) + + // Test 3: Permission denied - should not panic + t.Run("permission_denied", func(t *testing.T) { + // Create pid file + if err := os.WriteFile(pidFile, []byte("12345\n"), 0644); err != nil { + t.Fatalf("failed to create pid file: %v", err) + } + + // Make directory read-only (on Unix-like systems) + if err := os.Chmod(beadsDir, 0555); err != nil { + t.Fatalf("failed to make directory read-only: %v", err) + } + defer func() { + // Restore permissions for cleanup + _ = os.Chmod(beadsDir, 0755) + }() + + // Should not panic even if removal fails + cleanupStaleDaemonArtifacts(beadsDir) + }) +} + +func TestTryConnectWithTimeout_SelfHeal(t *testing.T) { + // Create temp directory for test + tmpDir := t.TempDir() + beadsDir := filepath.Join(tmpDir, ".beads") + if err := os.MkdirAll(beadsDir, 0755); err != nil { + t.Fatalf("failed to create beads dir: %v", err) + } + + socketPath := filepath.Join(beadsDir, "bd.sock") + pidFile := filepath.Join(beadsDir, "daemon.pid") + + // Create stale pid file (no socket, no lock) + if err := os.WriteFile(pidFile, []byte("99999\n"), 0644); err != nil { + t.Fatalf("failed to create pid file: %v", err) + } + + // Verify pid file exists + if _, err := os.Stat(pidFile); err != nil { + t.Fatalf("pid file should exist before test: %v", err) + } + + // Try to connect (should fail but clean up stale pid file) + client, err := TryConnectWithTimeout(socketPath, 100) + if client != nil { + t.Errorf("expected nil client (no daemon running)") + } + if err != nil { + t.Errorf("expected nil error, got: %v", err) + } + + // Verify pid file was cleaned up + if _, err := os.Stat(pidFile); err == nil { + t.Errorf("pid file should have been removed during self-heal") + } +}