fix(polecat): kill orphaned Claude processes when nuking polecats
When polecats are nuked, Claude child processes could survive and become orphans, leading to memory exhaustion (observed: 142 orphaned processes consuming ~56GB RAM). This commit: 1. Increases the SIGTERM→SIGKILL grace period from 100ms to 2s to give processes time to clean up gracefully 2. Adds orphan cleanup to `gt polecat nuke` that runs after session termination to catch any processes that escaped 3. Adds a new `gt cleanup` command for manual orphan removal The orphan detection uses aggressive tmux session verification to find ALL Claude processes not in any active session, not just those with PPID=1. Fixes: gh-736 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -18,6 +18,7 @@ import (
|
||||
"github.com/steveyegge/gastown/internal/runtime"
|
||||
"github.com/steveyegge/gastown/internal/style"
|
||||
"github.com/steveyegge/gastown/internal/tmux"
|
||||
"github.com/steveyegge/gastown/internal/util"
|
||||
)
|
||||
|
||||
// Polecat command flags
|
||||
@@ -1268,6 +1269,12 @@ func runPolecatNuke(cmd *cobra.Command, args []string) error {
|
||||
fmt.Printf("\n%s Nuked %d polecat(s).\n", style.SuccessPrefix, nuked)
|
||||
}
|
||||
|
||||
// Final cleanup: Kill any orphaned Claude processes that escaped the session termination.
|
||||
// This catches processes that called setsid() or were reparented during session shutdown.
|
||||
if !polecatNukeDryRun {
|
||||
cleanupOrphanedProcesses()
|
||||
}
|
||||
|
||||
if len(nukeErrors) > 0 {
|
||||
return fmt.Errorf("%d nuke(s) failed", len(nukeErrors))
|
||||
}
|
||||
@@ -1275,6 +1282,39 @@ func runPolecatNuke(cmd *cobra.Command, args []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupOrphanedProcesses kills Claude processes that survived session termination.
|
||||
// Uses aggressive zombie detection via tmux session verification.
|
||||
func cleanupOrphanedProcesses() {
|
||||
results, err := util.CleanupZombieClaudeProcesses()
|
||||
if err != nil {
|
||||
// Non-fatal: log and continue
|
||||
fmt.Printf(" %s orphan cleanup check failed: %v\n", style.Dim.Render("○"), err)
|
||||
return
|
||||
}
|
||||
|
||||
if len(results) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Report what was cleaned up
|
||||
var killed, escalated int
|
||||
for _, r := range results {
|
||||
switch r.Signal {
|
||||
case "SIGTERM", "SIGKILL":
|
||||
killed++
|
||||
case "UNKILLABLE":
|
||||
escalated++
|
||||
}
|
||||
}
|
||||
|
||||
if killed > 0 {
|
||||
fmt.Printf(" %s cleaned up %d orphaned process(es)\n", style.Success.Render("✓"), killed)
|
||||
}
|
||||
if escalated > 0 {
|
||||
fmt.Printf(" %s %d process(es) survived SIGKILL (unkillable)\n", style.Warning.Render("⚠"), escalated)
|
||||
}
|
||||
}
|
||||
|
||||
func runPolecatStale(cmd *cobra.Command, args []string) error {
|
||||
rigName := args[0]
|
||||
mgr, r, err := getPolecatManager(rigName)
|
||||
|
||||
Reference in New Issue
Block a user