hooks: kill descendant processes on timeout; add test and comment

Ensure RunSync kills the hook process group on timeout so descendant processes (e.g. scripts that ) cannot keep the caller blocked. Add explanatory comment in and a Linux-only unit test in . Trim pid parsing newline in the test. All tests pass locally.
2025-12-16 21:57:29 -08:00
parent 8d73a86f7a
commit 1465f7bb1d
2 changed files with 85 additions and 6 deletions
@@ -9,6 +9,7 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
+	"syscall"
 	"time"

 	"github.com/steveyegge/beads/internal/types"
@@ -118,15 +119,40 @@ func (r *Runner) runHook(hookPath, event string, issue *types.Issue) error {
 	cmd.Stdout = &stdout
 	cmd.Stderr = &stderr

-	// Run the hook
-	err = cmd.Run()
-	if err != nil {
-		// Log error but don't fail - hooks shouldn't break beads
-		// In production, this could go to a log file
+	// Start the hook so we can manage its process group and kill children on timeout.
+	//
+	// Rationale: scripts may spawn child processes (backgrounded or otherwise).
+	// If we only kill the immediate process, descendants may survive and keep
+	// the test (or caller) blocked — see TestRunSync_Timeout which previously
+	// observed a `sleep 60` still running after the parent process was killed.
+	// Creating a process group (Setpgid) and sending a negative PID to
+	// `syscall.Kill` ensures the entire group (parent + children) are killed
+	// reliably on timeout.
+	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
+
+	if err := cmd.Start(); err != nil {
 		return err
 	}

-	return nil
+	done := make(chan error, 1)
+	go func() {
+		done <- cmd.Wait()
+	}()
+
+	select {
+	case <-ctx.Done():
+		// Kill the whole process group to ensure any children (e.g., sleep)
+		// are also terminated.
+		_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
+		// Wait for process to exit
+		<-done
+		return ctx.Err()
+	case err := <-done:
+		if err != nil {
+			return err
+		}
+		return nil
+	}
 }

 // HookExists checks if a hook exists for an event
@@ -3,6 +3,9 @@ package hooks
 import (
 	"os"
 	"path/filepath"
+	"runtime"
+	"strconv"
+	"strings"
 	"testing"
 	"time"

@@ -252,6 +255,56 @@ sleep 60`
 	}
 }

+func TestRunSync_KillsDescendants(t *testing.T) {
+	if runtime.GOOS != "linux" {
+		t.Skip("TestRunSync_KillsDescendants requires Linux /proc")
+	}
+
+	tmpDir := t.TempDir()
+	hookPath := filepath.Join(tmpDir, HookOnCreate)
+	pidFile := filepath.Join(tmpDir, "child.pid")
+
+	// Hook starts a background sleep, writes its pid, and waits for it.
+	// Parent will remain alive until the child exits, so killing the
+	// process group should terminate both.
+	hookScript := `#!/bin/sh
+(sleep 60 & echo $! > ` + pidFile + ` ; wait)`
+	if err := os.WriteFile(hookPath, []byte(hookScript), 0755); err != nil {
+		t.Fatalf("Failed to create hook file: %v", err)
+	}
+
+	runner := &Runner{
+		hooksDir: tmpDir,
+		timeout:  500 * time.Millisecond,
+	}
+	issue := &types.Issue{ID: "bd-test", Title: "Test"}
+
+	if testing.Short() {
+		t.Skip("Skipping long-running descendant kill test in short mode")
+	}
+
+	err := runner.RunSync(EventCreate, issue)
+	if err == nil {
+		t.Fatal("Expected RunSync to return an error on timeout")
+	}
+
+	// Read the child PID and ensure it's not running anymore.
+	data, err := os.ReadFile(pidFile)
+	if err != nil {
+		t.Fatalf("Failed to read pid file: %v", err)
+	}
+	pidStr := strings.TrimSpace(string(data))
+	pid, err := strconv.Atoi(pidStr)
+	if err != nil {
+		t.Fatalf("Invalid pid in pid file: %v", err)
+	}
+
+	// Check /proc/<pid> does not exist
+	if _, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid))); err == nil {
+		t.Fatalf("Child process %d still exists after timeout", pid)
+	}
+}
+
 func TestRunSync_HookFailure(t *testing.T) {
 	tmpDir := t.TempDir()
 	hookPath := filepath.Join(tmpDir, HookOnUpdate)