Files
gastown/internal/cmd/cleanup.go
capable bebf425ac5 fix(polecat): kill orphaned Claude processes when nuking polecats
When polecats are nuked, Claude child processes could survive and become
orphans, leading to memory exhaustion (observed: 142 orphaned processes
consuming ~56GB RAM).

This commit:
1. Increases the SIGTERM→SIGKILL grace period from 100ms to 2s to give
   processes time to clean up gracefully
2. Adds orphan cleanup to `gt polecat nuke` that runs after session
   termination to catch any processes that escaped
3. Adds a new `gt cleanup` command for manual orphan removal

The orphan detection uses aggressive tmux session verification to find
ALL Claude processes not in any active session, not just those with
PPID=1.

Fixes: gh-736

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-22 21:43:23 -08:00

128 lines
3.6 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package cmd
import (
"fmt"
"github.com/spf13/cobra"
"github.com/steveyegge/gastown/internal/style"
"github.com/steveyegge/gastown/internal/util"
)
var (
cleanupDryRun bool
cleanupForce bool
)
var cleanupCmd = &cobra.Command{
Use: "cleanup",
GroupID: GroupWork,
Short: "Clean up orphaned Claude processes",
Long: `Clean up orphaned Claude processes that survived session termination.
This command finds and kills Claude processes that are not associated with
any active Gas Town tmux session. These orphans can accumulate when:
- Polecat sessions are killed without proper cleanup
- Claude spawns subagent processes that outlive their parent
- Network or system issues interrupt normal shutdown
Uses aggressive tmux session verification to detect ALL orphaned processes,
not just those with PPID=1.
Examples:
gt cleanup # Clean up orphans with confirmation
gt cleanup --dry-run # Show what would be killed
gt cleanup --force # Kill without confirmation`,
RunE: runCleanup,
}
func init() {
cleanupCmd.Flags().BoolVar(&cleanupDryRun, "dry-run", false, "Show what would be killed without killing")
cleanupCmd.Flags().BoolVarP(&cleanupForce, "force", "f", false, "Kill without confirmation")
rootCmd.AddCommand(cleanupCmd)
}
func runCleanup(cmd *cobra.Command, args []string) error {
// Find orphaned processes using aggressive zombie detection
zombies, err := util.FindZombieClaudeProcesses()
if err != nil {
return fmt.Errorf("finding orphaned processes: %w", err)
}
if len(zombies) == 0 {
fmt.Printf("%s No orphaned Claude processes found\n", style.Bold.Render("✓"))
return nil
}
// Show what we found
fmt.Printf("%s Found %d orphaned Claude process(es):\n\n", style.Warning.Render("⚠"), len(zombies))
for _, z := range zombies {
ageStr := formatProcessAgeCleanup(z.Age)
fmt.Printf(" %s %s (age: %s, tty: %s)\n",
style.Bold.Render(fmt.Sprintf("PID %d", z.PID)),
z.Cmd,
style.Dim.Render(ageStr),
z.TTY)
}
fmt.Println()
if cleanupDryRun {
fmt.Printf("%s Dry run - no processes killed\n", style.Dim.Render(""))
return nil
}
// Confirm unless --force
if !cleanupForce {
fmt.Printf("Kill these %d process(es)? [y/N] ", len(zombies))
var response string
_, _ = fmt.Scanln(&response)
if response != "y" && response != "Y" && response != "yes" && response != "Yes" {
fmt.Println("Aborted")
return nil
}
}
// Kill the processes using the standard cleanup function
results, err := util.CleanupZombieClaudeProcesses()
if err != nil {
return fmt.Errorf("cleaning up processes: %w", err)
}
// Report results
var killed, escalated int
for _, r := range results {
switch r.Signal {
case "SIGTERM":
fmt.Printf(" %s PID %d sent SIGTERM\n", style.Success.Render("✓"), r.Process.PID)
killed++
case "SIGKILL":
fmt.Printf(" %s PID %d sent SIGKILL (didn't respond to SIGTERM)\n", style.Warning.Render("⚠"), r.Process.PID)
killed++
case "UNKILLABLE":
fmt.Printf(" %s PID %d survived SIGKILL\n", style.Error.Render("✗"), r.Process.PID)
escalated++
}
}
fmt.Printf("\n%s Cleaned up %d process(es)", style.Bold.Render("✓"), killed)
if escalated > 0 {
fmt.Printf(", %d unkillable", escalated)
}
fmt.Println()
return nil
}
// formatProcessAgeCleanup formats seconds into a human-readable age string
func formatProcessAgeCleanup(seconds int) string {
if seconds < 60 {
return fmt.Sprintf("%ds", seconds)
}
if seconds < 3600 {
return fmt.Sprintf("%dm%ds", seconds/60, seconds%60)
}
hours := seconds / 3600
mins := (seconds % 3600) / 60
return fmt.Sprintf("%dh%dm", hours, mins)
}