fix(doctor): make orphan process check informational only
The orphan-processes check previously killed any Claude process without a tmux ancestor, which incorrectly targeted user's personal Claude sessions running in regular terminals. Now the check is informational only: - Changed from FixableCheck to BaseCheck (no auto-fix) - Returns StatusOK with details listing processes outside tmux - Message advises user to verify processes are expected - Removed Fix method and related helpers The orphan-sessions check remains fixable since it only targets gt-* sessions that don't match valid Gas Town patterns. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -225,26 +225,26 @@ func (c *OrphanSessionCheck) isValidSession(sess string, validRigs []string, may
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// OrphanProcessCheck detects orphaned Claude/claude-code processes
|
// OrphanProcessCheck detects Claude/claude-code processes that are not
|
||||||
// that are not associated with a Gas Town tmux session.
|
// running inside a tmux session. These may be user's personal Claude sessions
|
||||||
|
// or legitimately orphaned processes from crashed Gas Town sessions.
|
||||||
|
// This check is informational only - it does not auto-fix since we cannot
|
||||||
|
// distinguish user sessions from orphaned Gas Town processes.
|
||||||
type OrphanProcessCheck struct {
|
type OrphanProcessCheck struct {
|
||||||
FixableCheck
|
BaseCheck
|
||||||
orphanPIDs []int // Cached during Run for use in Fix
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewOrphanProcessCheck creates a new orphan process check.
|
// NewOrphanProcessCheck creates a new orphan process check.
|
||||||
func NewOrphanProcessCheck() *OrphanProcessCheck {
|
func NewOrphanProcessCheck() *OrphanProcessCheck {
|
||||||
return &OrphanProcessCheck{
|
return &OrphanProcessCheck{
|
||||||
FixableCheck: FixableCheck{
|
BaseCheck: BaseCheck{
|
||||||
BaseCheck: BaseCheck{
|
CheckName: "orphan-processes",
|
||||||
CheckName: "orphan-processes",
|
CheckDescription: "Detect Claude processes outside tmux",
|
||||||
CheckDescription: "Detect orphaned Claude processes",
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run checks for orphaned Claude processes.
|
// Run checks for Claude processes running outside tmux.
|
||||||
func (c *OrphanProcessCheck) Run(ctx *CheckContext) *CheckResult {
|
func (c *OrphanProcessCheck) Run(ctx *CheckContext) *CheckResult {
|
||||||
// Get list of tmux session PIDs
|
// Get list of tmux session PIDs
|
||||||
tmuxPIDs, err := c.getTmuxSessionPIDs()
|
tmuxPIDs, err := c.getTmuxSessionPIDs()
|
||||||
@@ -276,145 +276,41 @@ func (c *OrphanProcessCheck) Run(ctx *CheckContext) *CheckResult {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check which Claude processes are orphaned
|
// Check which Claude processes are outside tmux
|
||||||
var orphans []processInfo
|
var outsideTmux []processInfo
|
||||||
var validCount int
|
var insideTmux int
|
||||||
|
|
||||||
for _, proc := range claudeProcs {
|
for _, proc := range claudeProcs {
|
||||||
if c.isOrphanProcess(proc, tmuxPIDs) {
|
if c.isOrphanProcess(proc, tmuxPIDs) {
|
||||||
orphans = append(orphans, proc)
|
outsideTmux = append(outsideTmux, proc)
|
||||||
} else {
|
} else {
|
||||||
validCount++
|
insideTmux++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cache orphan PIDs for Fix
|
if len(outsideTmux) == 0 {
|
||||||
c.orphanPIDs = make([]int, len(orphans))
|
|
||||||
for i, p := range orphans {
|
|
||||||
c.orphanPIDs[i] = p.pid
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(orphans) == 0 {
|
|
||||||
return &CheckResult{
|
return &CheckResult{
|
||||||
Name: c.Name(),
|
Name: c.Name(),
|
||||||
Status: StatusOK,
|
Status: StatusOK,
|
||||||
Message: fmt.Sprintf("All %d Claude processes have valid parents", validCount),
|
Message: fmt.Sprintf("All %d Claude processes are inside tmux", insideTmux),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
details := make([]string, len(orphans))
|
details := make([]string, 0, len(outsideTmux)+2)
|
||||||
for i, proc := range orphans {
|
details = append(details, "These may be your personal Claude sessions or orphaned Gas Town processes.")
|
||||||
details[i] = fmt.Sprintf("PID %d: %s (parent: %d)", proc.pid, proc.cmd, proc.ppid)
|
details = append(details, "Verify these are expected before manually killing any:")
|
||||||
|
for _, proc := range outsideTmux {
|
||||||
|
details = append(details, fmt.Sprintf(" PID %d: %s (parent: %d)", proc.pid, proc.cmd, proc.ppid))
|
||||||
}
|
}
|
||||||
|
|
||||||
return &CheckResult{
|
return &CheckResult{
|
||||||
Name: c.Name(),
|
Name: c.Name(),
|
||||||
Status: StatusWarning,
|
Status: StatusWarning,
|
||||||
Message: fmt.Sprintf("Found %d orphaned Claude process(es)", len(orphans)),
|
Message: fmt.Sprintf("Found %d Claude process(es) running outside tmux", len(outsideTmux)),
|
||||||
Details: details,
|
Details: details,
|
||||||
FixHint: "Run 'gt doctor --fix' to kill orphaned processes",
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fix kills orphaned processes, with safeguards for crew sessions.
|
|
||||||
func (c *OrphanProcessCheck) Fix(ctx *CheckContext) error {
|
|
||||||
if len(c.orphanPIDs) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// SAFEGUARD: Get crew session pane PIDs to avoid killing crew processes.
|
|
||||||
// Even if a process appears orphaned, if its parent is a crew session pane,
|
|
||||||
// we should not kill it (the detection might be wrong).
|
|
||||||
crewPanePIDs := c.getCrewSessionPanePIDs()
|
|
||||||
|
|
||||||
var lastErr error
|
|
||||||
for _, pid := range c.orphanPIDs {
|
|
||||||
// Check if this process has a crew session ancestor
|
|
||||||
if c.hasCrewAncestor(pid, crewPanePIDs) {
|
|
||||||
// Skip - this process might belong to a crew session
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
proc, err := os.FindProcess(pid)
|
|
||||||
if err != nil {
|
|
||||||
lastErr = err
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if err := proc.Signal(os.Interrupt); err != nil {
|
|
||||||
// Try SIGKILL if SIGINT fails
|
|
||||||
if killErr := proc.Kill(); killErr != nil {
|
|
||||||
lastErr = killErr
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return lastErr
|
|
||||||
}
|
|
||||||
|
|
||||||
// getCrewSessionPanePIDs returns pane PIDs for all crew sessions.
|
|
||||||
func (c *OrphanProcessCheck) getCrewSessionPanePIDs() map[int]bool {
|
|
||||||
pids := make(map[int]bool)
|
|
||||||
|
|
||||||
t := tmux.NewTmux()
|
|
||||||
sessions, err := t.ListSessions()
|
|
||||||
if err != nil {
|
|
||||||
return pids
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, session := range sessions {
|
|
||||||
if !isCrewSession(session) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// Get pane PIDs for this crew session
|
|
||||||
out, err := exec.Command("tmux", "list-panes", "-t", session, "-F", "#{pane_pid}").Output()
|
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
|
|
||||||
var pid int
|
|
||||||
if _, err := fmt.Sscanf(line, "%d", &pid); err == nil {
|
|
||||||
pids[pid] = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return pids
|
|
||||||
}
|
|
||||||
|
|
||||||
// hasCrewAncestor checks if a process has a crew session pane as an ancestor.
|
|
||||||
func (c *OrphanProcessCheck) hasCrewAncestor(pid int, crewPanePIDs map[int]bool) bool {
|
|
||||||
if len(crewPanePIDs) == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Walk up the process tree
|
|
||||||
currentPID := pid
|
|
||||||
visited := make(map[int]bool)
|
|
||||||
|
|
||||||
for currentPID > 1 && !visited[currentPID] {
|
|
||||||
visited[currentPID] = true
|
|
||||||
|
|
||||||
// Check if this PID is a crew pane
|
|
||||||
if crewPanePIDs[currentPID] {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get parent PID
|
|
||||||
out, err := exec.Command("ps", "-p", fmt.Sprintf("%d", currentPID), "-o", "ppid=").Output() //nolint:gosec // G204: PID is numeric from internal state
|
|
||||||
if err != nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
var ppid int
|
|
||||||
if _, err := fmt.Sscanf(strings.TrimSpace(string(out)), "%d", &ppid); err != nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
currentPID = ppid
|
|
||||||
}
|
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
type processInfo struct {
|
type processInfo struct {
|
||||||
pid int
|
pid int
|
||||||
ppid int
|
ppid int
|
||||||
|
|||||||
134
internal/doctor/orphan_check_test.go
Normal file
134
internal/doctor/orphan_check_test.go
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
package doctor
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNewOrphanSessionCheck(t *testing.T) {
|
||||||
|
check := NewOrphanSessionCheck()
|
||||||
|
|
||||||
|
if check.Name() != "orphan-sessions" {
|
||||||
|
t.Errorf("expected name 'orphan-sessions', got %q", check.Name())
|
||||||
|
}
|
||||||
|
|
||||||
|
if !check.CanFix() {
|
||||||
|
t.Error("expected CanFix to return true for session check")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewOrphanProcessCheck(t *testing.T) {
|
||||||
|
check := NewOrphanProcessCheck()
|
||||||
|
|
||||||
|
if check.Name() != "orphan-processes" {
|
||||||
|
t.Errorf("expected name 'orphan-processes', got %q", check.Name())
|
||||||
|
}
|
||||||
|
|
||||||
|
// OrphanProcessCheck should NOT be fixable - it's informational only
|
||||||
|
if check.CanFix() {
|
||||||
|
t.Error("expected CanFix to return false for process check (informational only)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOrphanProcessCheck_Run(t *testing.T) {
|
||||||
|
// This test verifies the check runs without error.
|
||||||
|
// Results depend on whether Claude processes exist in the test environment.
|
||||||
|
check := NewOrphanProcessCheck()
|
||||||
|
ctx := &CheckContext{TownRoot: t.TempDir()}
|
||||||
|
|
||||||
|
result := check.Run(ctx)
|
||||||
|
|
||||||
|
// Should return OK (no processes or all inside tmux) or Warning (processes outside tmux)
|
||||||
|
// Both are valid depending on test environment
|
||||||
|
if result.Status != StatusOK && result.Status != StatusWarning {
|
||||||
|
t.Errorf("expected StatusOK or StatusWarning, got %v: %s", result.Status, result.Message)
|
||||||
|
}
|
||||||
|
|
||||||
|
// If warning, should have informational details
|
||||||
|
if result.Status == StatusWarning {
|
||||||
|
if len(result.Details) < 3 {
|
||||||
|
t.Errorf("expected at least 3 detail lines (2 info + 1 process), got %d", len(result.Details))
|
||||||
|
}
|
||||||
|
// Should NOT have a FixHint since this is informational only
|
||||||
|
if result.FixHint != "" {
|
||||||
|
t.Errorf("expected no FixHint for informational check, got %q", result.FixHint)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOrphanProcessCheck_MessageContent(t *testing.T) {
|
||||||
|
// Verify the check description is correct
|
||||||
|
check := NewOrphanProcessCheck()
|
||||||
|
|
||||||
|
expectedDesc := "Detect Claude processes outside tmux"
|
||||||
|
if check.Description() != expectedDesc {
|
||||||
|
t.Errorf("expected description %q, got %q", expectedDesc, check.Description())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsCrewSession(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
session string
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{"gt-gastown-crew-joe", true},
|
||||||
|
{"gt-beads-crew-max", true},
|
||||||
|
{"gt-rig-crew-a", true},
|
||||||
|
{"gt-gastown-witness", false},
|
||||||
|
{"gt-gastown-refinery", false},
|
||||||
|
{"gt-gastown-polecat1", false},
|
||||||
|
{"hq-deacon", false},
|
||||||
|
{"hq-mayor", false},
|
||||||
|
{"other-session", false},
|
||||||
|
{"gt-crew", false}, // Not enough parts
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.session, func(t *testing.T) {
|
||||||
|
got := isCrewSession(tt.session)
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("isCrewSession(%q) = %v, want %v", tt.session, got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOrphanSessionCheck_IsValidSession(t *testing.T) {
|
||||||
|
check := NewOrphanSessionCheck()
|
||||||
|
validRigs := []string{"gastown", "beads"}
|
||||||
|
mayorSession := "hq-mayor"
|
||||||
|
deaconSession := "hq-deacon"
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
session string
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
// Town-level sessions
|
||||||
|
{"hq-mayor", true},
|
||||||
|
{"hq-deacon", true},
|
||||||
|
|
||||||
|
// Valid rig sessions
|
||||||
|
{"gt-gastown-witness", true},
|
||||||
|
{"gt-gastown-refinery", true},
|
||||||
|
{"gt-gastown-polecat1", true},
|
||||||
|
{"gt-beads-witness", true},
|
||||||
|
{"gt-beads-refinery", true},
|
||||||
|
{"gt-beads-crew-max", true},
|
||||||
|
|
||||||
|
// Invalid rig sessions (rig doesn't exist)
|
||||||
|
{"gt-unknown-witness", false},
|
||||||
|
{"gt-foo-refinery", false},
|
||||||
|
|
||||||
|
// Non-gt sessions (should not be checked by this function,
|
||||||
|
// but if called, they'd fail format validation)
|
||||||
|
{"other-session", false},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.session, func(t *testing.T) {
|
||||||
|
got := check.isValidSession(tt.session, validRigs, mayorSession, deaconSession)
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("isValidSession(%q) = %v, want %v", tt.session, got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user