Fix deacon patrol process leak by killing pane processes before respawn (#745)
## Problem The deacon patrol was leaking claude processes. Every patrol cycle (1-3 minutes), a new claude process was spawned under the hq-deacon tmux session, but old processes were never terminated. This resulted in 12+ accumulated claude processes consuming resources. ## Root Cause In molecule_step.go:331, handleStepContinue() used tmux respawn-pane -k to restart the pane between patrol steps. The -k flag sends SIGHUP to the shell but does not kill all descendant processes (claude and its node children). ## Solution Added KillPaneProcesses() function in tmux.go that explicitly kills all descendant processes before respawning the pane. This function: - Gets all descendant PIDs recursively - Sends SIGTERM to all (deepest first) - Waits 100ms for graceful shutdown - Sends SIGKILL to survivors Updated handleStepContinue() to call KillPaneProcesses() before RespawnPane(). Co-authored-by: Roland Tritsch <roland@ailtir.com> Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -322,6 +322,12 @@ func handleStepContinue(cwd, townRoot, _ string, nextStep *beads.Issue, dryRun b
|
||||
|
||||
t := tmux.NewTmux()
|
||||
|
||||
// Kill all processes in the pane before respawning to prevent process leaks
|
||||
if err := t.KillPaneProcesses(pane); err != nil {
|
||||
// Non-fatal but log the warning
|
||||
style.PrintWarning("could not kill pane processes: %v", err)
|
||||
}
|
||||
|
||||
// Clear history before respawn
|
||||
if err := t.ClearHistory(pane); err != nil {
|
||||
// Non-fatal
|
||||
|
||||
@@ -284,6 +284,48 @@ func getAllDescendants(pid string) []string {
|
||||
return result
|
||||
}
|
||||
|
||||
// KillPaneProcesses explicitly kills all processes associated with a tmux pane.
|
||||
// This prevents orphan processes that survive pane respawn due to SIGHUP being ignored.
|
||||
//
|
||||
// Process:
|
||||
// 1. Get the pane's main process PID
|
||||
// 2. Find all descendant processes recursively (not just direct children)
|
||||
// 3. Send SIGTERM to all descendants (deepest first)
|
||||
// 4. Wait 100ms for graceful shutdown
|
||||
// 5. Send SIGKILL to any remaining descendants
|
||||
//
|
||||
// This ensures Claude processes and all their children are properly terminated
|
||||
// before respawning the pane.
|
||||
func (t *Tmux) KillPaneProcesses(pane string) error {
|
||||
// Get the pane PID
|
||||
pid, err := t.GetPanePID(pane)
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting pane PID: %w", err)
|
||||
}
|
||||
|
||||
if pid == "" {
|
||||
return fmt.Errorf("pane PID is empty")
|
||||
}
|
||||
|
||||
// Get all descendant PIDs recursively (returns deepest-first order)
|
||||
descendants := getAllDescendants(pid)
|
||||
|
||||
// Send SIGTERM to all descendants (deepest first to avoid orphaning)
|
||||
for _, dpid := range descendants {
|
||||
_ = exec.Command("kill", "-TERM", dpid).Run()
|
||||
}
|
||||
|
||||
// Wait for graceful shutdown
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
// Send SIGKILL to any remaining descendants
|
||||
for _, dpid := range descendants {
|
||||
_ = exec.Command("kill", "-KILL", dpid).Run()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// KillServer terminates the entire tmux server and all sessions.
|
||||
func (t *Tmux) KillServer() error {
|
||||
_, err := t.run("kill-server")
|
||||
|
||||
Reference in New Issue
Block a user