From 1465f7bb1d38fbaf215848694feb3a5190725361 Mon Sep 17 00:00:00 2001 From: Ross Gardler Date: Tue, 16 Dec 2025 21:57:29 -0800 Subject: [PATCH 1/2] hooks: kill descendant processes on timeout; add test and comment Ensure RunSync kills the hook process group on timeout so descendant processes (e.g. scripts that ) cannot keep the caller blocked. Add explanatory comment in and a Linux-only unit test in . Trim pid parsing newline in the test. All tests pass locally. --- internal/hooks/hooks.go | 38 ++++++++++++++++++++++---- internal/hooks/hooks_test.go | 53 ++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 6 deletions(-) diff --git a/internal/hooks/hooks.go b/internal/hooks/hooks.go index b21c46a8..ba72316b 100644 --- a/internal/hooks/hooks.go +++ b/internal/hooks/hooks.go @@ -9,6 +9,7 @@ import ( "os" "os/exec" "path/filepath" + "syscall" "time" "github.com/steveyegge/beads/internal/types" @@ -118,15 +119,40 @@ func (r *Runner) runHook(hookPath, event string, issue *types.Issue) error { cmd.Stdout = &stdout cmd.Stderr = &stderr - // Run the hook - err = cmd.Run() - if err != nil { - // Log error but don't fail - hooks shouldn't break beads - // In production, this could go to a log file + // Start the hook so we can manage its process group and kill children on timeout. + // + // Rationale: scripts may spawn child processes (backgrounded or otherwise). + // If we only kill the immediate process, descendants may survive and keep + // the test (or caller) blocked — see TestRunSync_Timeout which previously + // observed a `sleep 60` still running after the parent process was killed. + // Creating a process group (Setpgid) and sending a negative PID to + // `syscall.Kill` ensures the entire group (parent + children) are killed + // reliably on timeout. + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + + if err := cmd.Start(); err != nil { return err } - return nil + done := make(chan error, 1) + go func() { + done <- cmd.Wait() + }() + + select { + case <-ctx.Done(): + // Kill the whole process group to ensure any children (e.g., sleep) + // are also terminated. + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + // Wait for process to exit + <-done + return ctx.Err() + case err := <-done: + if err != nil { + return err + } + return nil + } } // HookExists checks if a hook exists for an event diff --git a/internal/hooks/hooks_test.go b/internal/hooks/hooks_test.go index cc056b37..09a484a1 100644 --- a/internal/hooks/hooks_test.go +++ b/internal/hooks/hooks_test.go @@ -3,6 +3,9 @@ package hooks import ( "os" "path/filepath" + "runtime" + "strconv" + "strings" "testing" "time" @@ -252,6 +255,56 @@ sleep 60` } } +func TestRunSync_KillsDescendants(t *testing.T) { + if runtime.GOOS != "linux" { + t.Skip("TestRunSync_KillsDescendants requires Linux /proc") + } + + tmpDir := t.TempDir() + hookPath := filepath.Join(tmpDir, HookOnCreate) + pidFile := filepath.Join(tmpDir, "child.pid") + + // Hook starts a background sleep, writes its pid, and waits for it. + // Parent will remain alive until the child exits, so killing the + // process group should terminate both. + hookScript := `#!/bin/sh +(sleep 60 & echo $! > ` + pidFile + ` ; wait)` + if err := os.WriteFile(hookPath, []byte(hookScript), 0755); err != nil { + t.Fatalf("Failed to create hook file: %v", err) + } + + runner := &Runner{ + hooksDir: tmpDir, + timeout: 500 * time.Millisecond, + } + issue := &types.Issue{ID: "bd-test", Title: "Test"} + + if testing.Short() { + t.Skip("Skipping long-running descendant kill test in short mode") + } + + err := runner.RunSync(EventCreate, issue) + if err == nil { + t.Fatal("Expected RunSync to return an error on timeout") + } + + // Read the child PID and ensure it's not running anymore. + data, err := os.ReadFile(pidFile) + if err != nil { + t.Fatalf("Failed to read pid file: %v", err) + } + pidStr := strings.TrimSpace(string(data)) + pid, err := strconv.Atoi(pidStr) + if err != nil { + t.Fatalf("Invalid pid in pid file: %v", err) + } + + // Check /proc/ does not exist + if _, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid))); err == nil { + t.Fatalf("Child process %d still exists after timeout", pid) + } +} + func TestRunSync_HookFailure(t *testing.T) { tmpDir := t.TempDir() hookPath := filepath.Join(tmpDir, HookOnUpdate) From 6b0d0901d06a945749a7e30f75c82cb74489ebc0 Mon Sep 17 00:00:00 2001 From: Ross Gardler Date: Tue, 16 Dec 2025 22:38:45 -0800 Subject: [PATCH 2/2] hooks: harden timeout kill and add platform split - Move runHook into build-tagged implementations (unix/windows) to keep unix syscalls off Windows builds. - In unix implementation, guard nil Process, return wrapped kill errors except ESRCH, and document linkage to TestRunSync_KillsDescendants. - On Windows, best-effort kill on timeout retains prior behavior. - In tests, move testing.Short earlier and keep descendant-kill coverage on Linux. --- internal/hooks/hooks.go | 61 --------------------------- internal/hooks/hooks_test.go | 8 ++-- internal/hooks/hooks_unix.go | 74 +++++++++++++++++++++++++++++++++ internal/hooks/hooks_windows.go | 53 +++++++++++++++++++++++ 4 files changed, 131 insertions(+), 65 deletions(-) create mode 100644 internal/hooks/hooks_unix.go create mode 100644 internal/hooks/hooks_windows.go diff --git a/internal/hooks/hooks.go b/internal/hooks/hooks.go index ba72316b..d4f94328 100644 --- a/internal/hooks/hooks.go +++ b/internal/hooks/hooks.go @@ -3,13 +3,8 @@ package hooks import ( - "bytes" - "context" - "encoding/json" "os" - "os/exec" "path/filepath" - "syscall" "time" "github.com/steveyegge/beads/internal/types" @@ -99,62 +94,6 @@ func (r *Runner) RunSync(event string, issue *types.Issue) error { return r.runHook(hookPath, event, issue) } -func (r *Runner) runHook(hookPath, event string, issue *types.Issue) error { - ctx, cancel := context.WithTimeout(context.Background(), r.timeout) - defer cancel() - - // Prepare JSON data for stdin - issueJSON, err := json.Marshal(issue) - if err != nil { - return err - } - - // Create command: hook_script - // #nosec G204 -- hookPath is from controlled .beads/hooks directory - cmd := exec.CommandContext(ctx, hookPath, issue.ID, event) - cmd.Stdin = bytes.NewReader(issueJSON) - - // Capture output for debugging (but don't block on it) - var stdout, stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - // Start the hook so we can manage its process group and kill children on timeout. - // - // Rationale: scripts may spawn child processes (backgrounded or otherwise). - // If we only kill the immediate process, descendants may survive and keep - // the test (or caller) blocked — see TestRunSync_Timeout which previously - // observed a `sleep 60` still running after the parent process was killed. - // Creating a process group (Setpgid) and sending a negative PID to - // `syscall.Kill` ensures the entire group (parent + children) are killed - // reliably on timeout. - cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} - - if err := cmd.Start(); err != nil { - return err - } - - done := make(chan error, 1) - go func() { - done <- cmd.Wait() - }() - - select { - case <-ctx.Done(): - // Kill the whole process group to ensure any children (e.g., sleep) - // are also terminated. - _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) - // Wait for process to exit - <-done - return ctx.Err() - case err := <-done: - if err != nil { - return err - } - return nil - } -} - // HookExists checks if a hook exists for an event func (r *Runner) HookExists(event string) bool { hookName := eventToHook(event) diff --git a/internal/hooks/hooks_test.go b/internal/hooks/hooks_test.go index 09a484a1..aed1439f 100644 --- a/internal/hooks/hooks_test.go +++ b/internal/hooks/hooks_test.go @@ -260,6 +260,10 @@ func TestRunSync_KillsDescendants(t *testing.T) { t.Skip("TestRunSync_KillsDescendants requires Linux /proc") } + if testing.Short() { + t.Skip("Skipping long-running descendant kill test in short mode") + } + tmpDir := t.TempDir() hookPath := filepath.Join(tmpDir, HookOnCreate) pidFile := filepath.Join(tmpDir, "child.pid") @@ -279,10 +283,6 @@ func TestRunSync_KillsDescendants(t *testing.T) { } issue := &types.Issue{ID: "bd-test", Title: "Test"} - if testing.Short() { - t.Skip("Skipping long-running descendant kill test in short mode") - } - err := runner.RunSync(EventCreate, issue) if err == nil { t.Fatal("Expected RunSync to return an error on timeout") diff --git a/internal/hooks/hooks_unix.go b/internal/hooks/hooks_unix.go new file mode 100644 index 00000000..8c424ea2 --- /dev/null +++ b/internal/hooks/hooks_unix.go @@ -0,0 +1,74 @@ +//go:build unix + +package hooks + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "os/exec" + "syscall" + + "github.com/steveyegge/beads/internal/types" +) + +// runHook executes the hook and enforces a timeout, killing the process group +// on expiration to ensure descendant processes are terminated. +func (r *Runner) runHook(hookPath, event string, issue *types.Issue) error { + ctx, cancel := context.WithTimeout(context.Background(), r.timeout) + defer cancel() + + // Prepare JSON data for stdin + issueJSON, err := json.Marshal(issue) + if err != nil { + return err + } + + // Create command: hook_script + // #nosec G204 -- hookPath is from controlled .beads/hooks directory + cmd := exec.CommandContext(ctx, hookPath, issue.ID, event) + cmd.Stdin = bytes.NewReader(issueJSON) + + // Capture output for debugging (but don't block on it) + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + // Start the hook so we can manage its process group and kill children on timeout. + // + // Rationale: scripts may spawn child processes (backgrounded or otherwise). + // If we only kill the immediate process, descendants may survive and keep + // the test (or caller) blocked — this was exposed by TestRunSync_Timeout and + // validated by TestRunSync_KillsDescendants. Creating a process group (Setpgid) + // and sending a negative PID to syscall.Kill ensures the entire group + // (parent + children) are killed reliably on timeout. + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + + if err := cmd.Start(); err != nil { + return err + } + + done := make(chan error, 1) + go func() { + done <- cmd.Wait() + }() + + select { + case <-ctx.Done(): + if cmd.Process != nil { + if err := syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL); err != nil && !errors.Is(err, syscall.ESRCH) { + return fmt.Errorf("kill process group: %w", err) + } + } + // Wait for process to exit after the kill attempt + <-done + return ctx.Err() + case err := <-done: + if err != nil { + return err + } + return nil + } +} diff --git a/internal/hooks/hooks_windows.go b/internal/hooks/hooks_windows.go new file mode 100644 index 00000000..7bfd8a76 --- /dev/null +++ b/internal/hooks/hooks_windows.go @@ -0,0 +1,53 @@ +//go:build windows + +package hooks + +import ( + "bytes" + "context" + "encoding/json" + "os/exec" + + "github.com/steveyegge/beads/internal/types" +) + +// runHook executes the hook and enforces a timeout on Windows. +// Windows lacks Unix-style process groups; on timeout we best-effort kill +// the started process. Descendant processes may survive if they detach, +// but this preserves previous behavior while keeping tests green on Windows. +func (r *Runner) runHook(hookPath, event string, issue *types.Issue) error { + ctx, cancel := context.WithTimeout(context.Background(), r.timeout) + defer cancel() + + issueJSON, err := json.Marshal(issue) + if err != nil { + return err + } + + cmd := exec.CommandContext(ctx, hookPath, issue.ID, event) + cmd.Stdin = bytes.NewReader(issueJSON) + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Start(); err != nil { + return err + } + + done := make(chan error, 1) + go func() { + done <- cmd.Wait() + }() + + select { + case <-ctx.Done(): + if cmd.Process != nil { + _ = cmd.Process.Kill() + } + <-done + return ctx.Err() + case err := <-done: + return err + } +}