hooks: kill descendant processes on timeout; add test and comment

Ensure RunSync kills the hook process group on timeout so descendant processes (e.g. scripts that ) cannot keep the caller blocked. Add explanatory comment in  and a Linux-only unit test  in . Trim pid parsing newline in the test. All tests pass locally.
This commit is contained in:
Ross Gardler
2025-12-16 21:57:29 -08:00
parent 8d73a86f7a
commit 1465f7bb1d
2 changed files with 85 additions and 6 deletions

View File

@@ -9,6 +9,7 @@ import (
"os"
"os/exec"
"path/filepath"
"syscall"
"time"
"github.com/steveyegge/beads/internal/types"
@@ -118,15 +119,40 @@ func (r *Runner) runHook(hookPath, event string, issue *types.Issue) error {
cmd.Stdout = &stdout
cmd.Stderr = &stderr
// Run the hook
err = cmd.Run()
if err != nil {
// Log error but don't fail - hooks shouldn't break beads
// In production, this could go to a log file
// Start the hook so we can manage its process group and kill children on timeout.
//
// Rationale: scripts may spawn child processes (backgrounded or otherwise).
// If we only kill the immediate process, descendants may survive and keep
// the test (or caller) blocked — see TestRunSync_Timeout which previously
// observed a `sleep 60` still running after the parent process was killed.
// Creating a process group (Setpgid) and sending a negative PID to
// `syscall.Kill` ensures the entire group (parent + children) are killed
// reliably on timeout.
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
if err := cmd.Start(); err != nil {
return err
}
return nil
done := make(chan error, 1)
go func() {
done <- cmd.Wait()
}()
select {
case <-ctx.Done():
// Kill the whole process group to ensure any children (e.g., sleep)
// are also terminated.
_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
// Wait for process to exit
<-done
return ctx.Err()
case err := <-done:
if err != nil {
return err
}
return nil
}
}
// HookExists checks if a hook exists for an event

View File

@@ -3,6 +3,9 @@ package hooks
import (
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"testing"
"time"
@@ -252,6 +255,56 @@ sleep 60`
}
}
func TestRunSync_KillsDescendants(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip("TestRunSync_KillsDescendants requires Linux /proc")
}
tmpDir := t.TempDir()
hookPath := filepath.Join(tmpDir, HookOnCreate)
pidFile := filepath.Join(tmpDir, "child.pid")
// Hook starts a background sleep, writes its pid, and waits for it.
// Parent will remain alive until the child exits, so killing the
// process group should terminate both.
hookScript := `#!/bin/sh
(sleep 60 & echo $! > ` + pidFile + ` ; wait)`
if err := os.WriteFile(hookPath, []byte(hookScript), 0755); err != nil {
t.Fatalf("Failed to create hook file: %v", err)
}
runner := &Runner{
hooksDir: tmpDir,
timeout: 500 * time.Millisecond,
}
issue := &types.Issue{ID: "bd-test", Title: "Test"}
if testing.Short() {
t.Skip("Skipping long-running descendant kill test in short mode")
}
err := runner.RunSync(EventCreate, issue)
if err == nil {
t.Fatal("Expected RunSync to return an error on timeout")
}
// Read the child PID and ensure it's not running anymore.
data, err := os.ReadFile(pidFile)
if err != nil {
t.Fatalf("Failed to read pid file: %v", err)
}
pidStr := strings.TrimSpace(string(data))
pid, err := strconv.Atoi(pidStr)
if err != nil {
t.Fatalf("Invalid pid in pid file: %v", err)
}
// Check /proc/<pid> does not exist
if _, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid))); err == nil {
t.Fatalf("Child process %d still exists after timeout", pid)
}
}
func TestRunSync_HookFailure(t *testing.T) {
tmpDir := t.TempDir()
hookPath := filepath.Join(tmpDir, HookOnUpdate)