Fix deacon patrol process leak by killing pane processes before respawn (#745)

## Problem
The deacon patrol was leaking claude processes. Every patrol cycle (1-3 minutes),
a new claude process was spawned under the hq-deacon tmux session, but old processes
were never terminated. This resulted in 12+ accumulated claude processes consuming
resources.

## Root Cause
In molecule_step.go:331, handleStepContinue() used tmux respawn-pane -k to restart
the pane between patrol steps. The -k flag sends SIGHUP to the shell but does not
kill all descendant processes (claude and its node children).

## Solution
Added KillPaneProcesses() function in tmux.go that explicitly kills all descendant
processes before respawning the pane. This function:
- Gets all descendant PIDs recursively
- Sends SIGTERM to all (deepest first)
- Waits 100ms for graceful shutdown
- Sends SIGKILL to survivors

Updated handleStepContinue() to call KillPaneProcesses() before RespawnPane().

Co-authored-by: Roland Tritsch <roland@ailtir.com>
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Roland Tritsch
2026-01-21 06:30:23 +00:00
committed by John Ogle
parent d562828449
commit 858782d657
2 changed files with 48 additions and 0 deletions

View File

@@ -322,6 +322,12 @@ func handleStepContinue(cwd, townRoot, _ string, nextStep *beads.Issue, dryRun b
t := tmux.NewTmux()
// Kill all processes in the pane before respawning to prevent process leaks
if err := t.KillPaneProcesses(pane); err != nil {
// Non-fatal but log the warning
style.PrintWarning("could not kill pane processes: %v", err)
}
// Clear history before respawn
if err := t.ClearHistory(pane); err != nil {
// Non-fatal

View File

@@ -284,6 +284,48 @@ func getAllDescendants(pid string) []string {
return result
}
// KillPaneProcesses explicitly kills all processes associated with a tmux pane.
// This prevents orphan processes that survive pane respawn due to SIGHUP being ignored.
//
// Process:
// 1. Get the pane's main process PID
// 2. Find all descendant processes recursively (not just direct children)
// 3. Send SIGTERM to all descendants (deepest first)
// 4. Wait 100ms for graceful shutdown
// 5. Send SIGKILL to any remaining descendants
//
// This ensures Claude processes and all their children are properly terminated
// before respawning the pane.
func (t *Tmux) KillPaneProcesses(pane string) error {
// Get the pane PID
pid, err := t.GetPanePID(pane)
if err != nil {
return fmt.Errorf("getting pane PID: %w", err)
}
if pid == "" {
return fmt.Errorf("pane PID is empty")
}
// Get all descendant PIDs recursively (returns deepest-first order)
descendants := getAllDescendants(pid)
// Send SIGTERM to all descendants (deepest first to avoid orphaning)
for _, dpid := range descendants {
_ = exec.Command("kill", "-TERM", dpid).Run()
}
// Wait for graceful shutdown
time.Sleep(100 * time.Millisecond)
// Send SIGKILL to any remaining descendants
for _, dpid := range descendants {
_ = exec.Command("kill", "-KILL", dpid).Run()
}
return nil
}
// KillServer terminates the entire tmux server and all sessions.
func (t *Tmux) KillServer() error {
_, err := t.run("kill-server")