* feat: Add automatic orphaned claude process cleanup Claude Code's Task tool spawns subagent processes that sometimes don't clean up properly after completion. These accumulate and consume significant memory (observed: 17 processes using ~6GB RAM). This change adds automatic cleanup in two places: 1. **Deacon patrol** (primary): New patrol step "orphan-process-cleanup" runs `gt deacon cleanup-orphans` early in each cycle. More responsive (~30s). 2. **Daemon heartbeat** (fallback): Runs cleanup every 3 minutes as safety net when deacon is down. Detection uses TTY column - processes with TTY "?" have no controlling terminal. This is safe because: - Processes in terminals (user sessions) have a TTY like "pts/0" - untouched - Only kills processes with no controlling terminal - Orphaned subagents are children of tmux server with no TTY New files: - internal/util/orphan.go: FindOrphanedClaudeProcesses, CleanupOrphanedClaudeProcesses - internal/util/orphan_test.go: Tests for orphan detection New command: - `gt deacon cleanup-orphans`: Manual/patrol-triggered cleanup Fixes #587 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(orphan): add Windows build tag and minimum age check Addresses review feedback on PR #588: 1. Add //go:build !windows to orphan.go and orphan_test.go - The code uses Unix-specific syscalls (SIGTERM, ESRCH) and ps command options that don't exist on Windows 2. Add minimum age check (60 seconds) to prevent false positives - Prevents race conditions with newly spawned subagents - Addresses reviewer concern about cron/systemd processes - Uses portable etime format instead of Linux-only etimes 3. Add parseEtime helper with comprehensive tests - Parses [[DD-]HH:]MM:SS format (works on both Linux and macOS) - etimes (seconds) is Linux-specific, etime is portable Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(orphan): add proper SIGTERM→SIGKILL escalation with state tracking Previous approach used process age which doesn't work: a Task subagent runs without TTY from birth, so a long-running legitimate subagent that later fails to exit would be immediately SIGKILLed without trying SIGTERM. New approach uses a state file to track signal history: 1. First encounter → SIGTERM, record PID + timestamp in state file 2. Next cycle (after 60s grace period) → if still alive, SIGKILL 3. Next cycle → if survived SIGKILL, log as unkillable and remove State file: $XDG_RUNTIME_DIR/gastown-orphan-state (or /tmp/) Format: "<pid> <signal> <unix_timestamp>" per line The state file is automatically cleaned up: - Dead processes removed on load - Unkillable processes removed after logging Also updates callers to use new CleanupResult type which includes the signal sent (SIGTERM, SIGKILL, or UNKILLABLE). Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
82 lines
2.2 KiB
Go
82 lines
2.2 KiB
Go
//go:build !windows
|
|
|
|
package util
|
|
|
|
import (
|
|
"testing"
|
|
)
|
|
|
|
func TestParseEtime(t *testing.T) {
|
|
tests := []struct {
|
|
input string
|
|
expected int
|
|
wantErr bool
|
|
}{
|
|
// MM:SS format
|
|
{"00:30", 30, false},
|
|
{"01:00", 60, false},
|
|
{"01:23", 83, false},
|
|
{"59:59", 3599, false},
|
|
|
|
// HH:MM:SS format
|
|
{"00:01:00", 60, false},
|
|
{"01:00:00", 3600, false},
|
|
{"01:02:03", 3723, false},
|
|
{"23:59:59", 86399, false},
|
|
|
|
// DD-HH:MM:SS format
|
|
{"1-00:00:00", 86400, false},
|
|
{"2-01:02:03", 176523, false},
|
|
{"7-12:30:45", 649845, false},
|
|
|
|
// Edge cases
|
|
{"00:00", 0, false},
|
|
{"0-00:00:00", 0, false},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.input, func(t *testing.T) {
|
|
got, err := parseEtime(tt.input)
|
|
if (err != nil) != tt.wantErr {
|
|
t.Errorf("parseEtime(%q) error = %v, wantErr %v", tt.input, err, tt.wantErr)
|
|
return
|
|
}
|
|
if got != tt.expected {
|
|
t.Errorf("parseEtime(%q) = %d, want %d", tt.input, got, tt.expected)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestFindOrphanedClaudeProcesses(t *testing.T) {
|
|
// This is a live test that checks for orphaned processes on the current system.
|
|
// It should not fail - just return whatever orphans exist (likely none in CI).
|
|
orphans, err := FindOrphanedClaudeProcesses()
|
|
if err != nil {
|
|
t.Fatalf("FindOrphanedClaudeProcesses() error = %v", err)
|
|
}
|
|
|
|
// Log what we found (useful for debugging)
|
|
t.Logf("Found %d orphaned claude processes", len(orphans))
|
|
for _, o := range orphans {
|
|
t.Logf(" PID %d: %s", o.PID, o.Cmd)
|
|
}
|
|
}
|
|
|
|
func TestFindOrphanedClaudeProcesses_IgnoresTerminalProcesses(t *testing.T) {
|
|
// This test verifies that the function only returns processes without TTY.
|
|
// We can't easily mock ps output, but we can verify that if we're running
|
|
// this test in a terminal, our own process tree isn't flagged.
|
|
orphans, err := FindOrphanedClaudeProcesses()
|
|
if err != nil {
|
|
t.Fatalf("FindOrphanedClaudeProcesses() error = %v", err)
|
|
}
|
|
|
|
// If we're running in a terminal (typical test scenario), verify that
|
|
// any orphans found genuinely have no TTY. We can't verify they're NOT
|
|
// in the list since we control the test process, but we can log for inspection.
|
|
for _, o := range orphans {
|
|
t.Logf("Orphan found: PID %d (%s) - verify this has TTY=? in 'ps aux'", o.PID, o.Cmd)
|
|
}
|
|
}
|