Merge pull request #272 from julianknutsen/fix/100-doctor-process-check-informational

fix(doctor): gt doctor --fix kills user's personal Claude sessions
This commit is contained in:
Steve Yegge
2026-01-08 17:22:02 -08:00
committed by GitHub
2 changed files with 157 additions and 127 deletions

View File

@@ -225,26 +225,26 @@ func (c *OrphanSessionCheck) isValidSession(sess string, validRigs []string, may
return true
}
// OrphanProcessCheck detects orphaned Claude/claude-code processes
// that are not associated with a Gas Town tmux session.
// OrphanProcessCheck detects Claude/claude-code processes that are not
// running inside a tmux session. These may be user's personal Claude sessions
// or legitimately orphaned processes from crashed Gas Town sessions.
// This check is informational only - it does not auto-fix since we cannot
// distinguish user sessions from orphaned Gas Town processes.
type OrphanProcessCheck struct {
FixableCheck
orphanPIDs []int // Cached during Run for use in Fix
BaseCheck
}
// NewOrphanProcessCheck creates a new orphan process check.
func NewOrphanProcessCheck() *OrphanProcessCheck {
return &OrphanProcessCheck{
FixableCheck: FixableCheck{
BaseCheck: BaseCheck{
CheckName: "orphan-processes",
CheckDescription: "Detect orphaned Claude processes",
},
BaseCheck: BaseCheck{
CheckName: "orphan-processes",
CheckDescription: "Detect Claude processes outside tmux",
},
}
}
// Run checks for orphaned Claude processes.
// Run checks for Claude processes running outside tmux.
func (c *OrphanProcessCheck) Run(ctx *CheckContext) *CheckResult {
// Get list of tmux session PIDs
tmuxPIDs, err := c.getTmuxSessionPIDs()
@@ -276,145 +276,41 @@ func (c *OrphanProcessCheck) Run(ctx *CheckContext) *CheckResult {
}
}
// Check which Claude processes are orphaned
var orphans []processInfo
var validCount int
// Check which Claude processes are outside tmux
var outsideTmux []processInfo
var insideTmux int
for _, proc := range claudeProcs {
if c.isOrphanProcess(proc, tmuxPIDs) {
orphans = append(orphans, proc)
outsideTmux = append(outsideTmux, proc)
} else {
validCount++
insideTmux++
}
}
// Cache orphan PIDs for Fix
c.orphanPIDs = make([]int, len(orphans))
for i, p := range orphans {
c.orphanPIDs[i] = p.pid
}
if len(orphans) == 0 {
if len(outsideTmux) == 0 {
return &CheckResult{
Name: c.Name(),
Status: StatusOK,
Message: fmt.Sprintf("All %d Claude processes have valid parents", validCount),
Message: fmt.Sprintf("All %d Claude processes are inside tmux", insideTmux),
}
}
details := make([]string, len(orphans))
for i, proc := range orphans {
details[i] = fmt.Sprintf("PID %d: %s (parent: %d)", proc.pid, proc.cmd, proc.ppid)
details := make([]string, 0, len(outsideTmux)+2)
details = append(details, "These may be your personal Claude sessions or orphaned Gas Town processes.")
details = append(details, "Verify these are expected before manually killing any:")
for _, proc := range outsideTmux {
details = append(details, fmt.Sprintf(" PID %d: %s (parent: %d)", proc.pid, proc.cmd, proc.ppid))
}
return &CheckResult{
Name: c.Name(),
Status: StatusWarning,
Message: fmt.Sprintf("Found %d orphaned Claude process(es)", len(orphans)),
Message: fmt.Sprintf("Found %d Claude process(es) running outside tmux", len(outsideTmux)),
Details: details,
FixHint: "Run 'gt doctor --fix' to kill orphaned processes",
}
}
// Fix kills orphaned processes, with safeguards for crew sessions.
func (c *OrphanProcessCheck) Fix(ctx *CheckContext) error {
if len(c.orphanPIDs) == 0 {
return nil
}
// SAFEGUARD: Get crew session pane PIDs to avoid killing crew processes.
// Even if a process appears orphaned, if its parent is a crew session pane,
// we should not kill it (the detection might be wrong).
crewPanePIDs := c.getCrewSessionPanePIDs()
var lastErr error
for _, pid := range c.orphanPIDs {
// Check if this process has a crew session ancestor
if c.hasCrewAncestor(pid, crewPanePIDs) {
// Skip - this process might belong to a crew session
continue
}
proc, err := os.FindProcess(pid)
if err != nil {
lastErr = err
continue
}
if err := proc.Signal(os.Interrupt); err != nil {
// Try SIGKILL if SIGINT fails
if killErr := proc.Kill(); killErr != nil {
lastErr = killErr
}
}
}
return lastErr
}
// getCrewSessionPanePIDs returns pane PIDs for all crew sessions.
func (c *OrphanProcessCheck) getCrewSessionPanePIDs() map[int]bool {
pids := make(map[int]bool)
t := tmux.NewTmux()
sessions, err := t.ListSessions()
if err != nil {
return pids
}
for _, session := range sessions {
if !isCrewSession(session) {
continue
}
// Get pane PIDs for this crew session
out, err := exec.Command("tmux", "list-panes", "-t", session, "-F", "#{pane_pid}").Output()
if err != nil {
continue
}
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
var pid int
if _, err := fmt.Sscanf(line, "%d", &pid); err == nil {
pids[pid] = true
}
}
}
return pids
}
// hasCrewAncestor checks if a process has a crew session pane as an ancestor.
func (c *OrphanProcessCheck) hasCrewAncestor(pid int, crewPanePIDs map[int]bool) bool {
if len(crewPanePIDs) == 0 {
return false
}
// Walk up the process tree
currentPID := pid
visited := make(map[int]bool)
for currentPID > 1 && !visited[currentPID] {
visited[currentPID] = true
// Check if this PID is a crew pane
if crewPanePIDs[currentPID] {
return true
}
// Get parent PID
out, err := exec.Command("ps", "-p", fmt.Sprintf("%d", currentPID), "-o", "ppid=").Output() //nolint:gosec // G204: PID is numeric from internal state
if err != nil {
break
}
var ppid int
if _, err := fmt.Sscanf(strings.TrimSpace(string(out)), "%d", &ppid); err != nil {
break
}
currentPID = ppid
}
return false
}
type processInfo struct {
pid int
ppid int

View File

@@ -0,0 +1,134 @@
package doctor
import (
"testing"
)
func TestNewOrphanSessionCheck(t *testing.T) {
check := NewOrphanSessionCheck()
if check.Name() != "orphan-sessions" {
t.Errorf("expected name 'orphan-sessions', got %q", check.Name())
}
if !check.CanFix() {
t.Error("expected CanFix to return true for session check")
}
}
func TestNewOrphanProcessCheck(t *testing.T) {
check := NewOrphanProcessCheck()
if check.Name() != "orphan-processes" {
t.Errorf("expected name 'orphan-processes', got %q", check.Name())
}
// OrphanProcessCheck should NOT be fixable - it's informational only
if check.CanFix() {
t.Error("expected CanFix to return false for process check (informational only)")
}
}
func TestOrphanProcessCheck_Run(t *testing.T) {
// This test verifies the check runs without error.
// Results depend on whether Claude processes exist in the test environment.
check := NewOrphanProcessCheck()
ctx := &CheckContext{TownRoot: t.TempDir()}
result := check.Run(ctx)
// Should return OK (no processes or all inside tmux) or Warning (processes outside tmux)
// Both are valid depending on test environment
if result.Status != StatusOK && result.Status != StatusWarning {
t.Errorf("expected StatusOK or StatusWarning, got %v: %s", result.Status, result.Message)
}
// If warning, should have informational details
if result.Status == StatusWarning {
if len(result.Details) < 3 {
t.Errorf("expected at least 3 detail lines (2 info + 1 process), got %d", len(result.Details))
}
// Should NOT have a FixHint since this is informational only
if result.FixHint != "" {
t.Errorf("expected no FixHint for informational check, got %q", result.FixHint)
}
}
}
func TestOrphanProcessCheck_MessageContent(t *testing.T) {
// Verify the check description is correct
check := NewOrphanProcessCheck()
expectedDesc := "Detect Claude processes outside tmux"
if check.Description() != expectedDesc {
t.Errorf("expected description %q, got %q", expectedDesc, check.Description())
}
}
func TestIsCrewSession(t *testing.T) {
tests := []struct {
session string
want bool
}{
{"gt-gastown-crew-joe", true},
{"gt-beads-crew-max", true},
{"gt-rig-crew-a", true},
{"gt-gastown-witness", false},
{"gt-gastown-refinery", false},
{"gt-gastown-polecat1", false},
{"hq-deacon", false},
{"hq-mayor", false},
{"other-session", false},
{"gt-crew", false}, // Not enough parts
}
for _, tt := range tests {
t.Run(tt.session, func(t *testing.T) {
got := isCrewSession(tt.session)
if got != tt.want {
t.Errorf("isCrewSession(%q) = %v, want %v", tt.session, got, tt.want)
}
})
}
}
func TestOrphanSessionCheck_IsValidSession(t *testing.T) {
check := NewOrphanSessionCheck()
validRigs := []string{"gastown", "beads"}
mayorSession := "hq-mayor"
deaconSession := "hq-deacon"
tests := []struct {
session string
want bool
}{
// Town-level sessions
{"hq-mayor", true},
{"hq-deacon", true},
// Valid rig sessions
{"gt-gastown-witness", true},
{"gt-gastown-refinery", true},
{"gt-gastown-polecat1", true},
{"gt-beads-witness", true},
{"gt-beads-refinery", true},
{"gt-beads-crew-max", true},
// Invalid rig sessions (rig doesn't exist)
{"gt-unknown-witness", false},
{"gt-foo-refinery", false},
// Non-gt sessions (should not be checked by this function,
// but if called, they'd fail format validation)
{"other-session", false},
}
for _, tt := range tests {
t.Run(tt.session, func(t *testing.T) {
got := check.isValidSession(tt.session, validRigs, mayorSession, deaconSession)
if got != tt.want {
t.Errorf("isValidSession(%q) = %v, want %v", tt.session, got, tt.want)
}
})
}
}