Merge pull request #272 from julianknutsen/fix/100-doctor-process-check-informational
fix(doctor): gt doctor --fix kills user's personal Claude sessions
This commit is contained in:
@@ -225,26 +225,26 @@ func (c *OrphanSessionCheck) isValidSession(sess string, validRigs []string, may
|
||||
return true
|
||||
}
|
||||
|
||||
// OrphanProcessCheck detects orphaned Claude/claude-code processes
|
||||
// that are not associated with a Gas Town tmux session.
|
||||
// OrphanProcessCheck detects Claude/claude-code processes that are not
|
||||
// running inside a tmux session. These may be user's personal Claude sessions
|
||||
// or legitimately orphaned processes from crashed Gas Town sessions.
|
||||
// This check is informational only - it does not auto-fix since we cannot
|
||||
// distinguish user sessions from orphaned Gas Town processes.
|
||||
type OrphanProcessCheck struct {
|
||||
FixableCheck
|
||||
orphanPIDs []int // Cached during Run for use in Fix
|
||||
BaseCheck
|
||||
}
|
||||
|
||||
// NewOrphanProcessCheck creates a new orphan process check.
|
||||
func NewOrphanProcessCheck() *OrphanProcessCheck {
|
||||
return &OrphanProcessCheck{
|
||||
FixableCheck: FixableCheck{
|
||||
BaseCheck: BaseCheck{
|
||||
CheckName: "orphan-processes",
|
||||
CheckDescription: "Detect orphaned Claude processes",
|
||||
},
|
||||
BaseCheck: BaseCheck{
|
||||
CheckName: "orphan-processes",
|
||||
CheckDescription: "Detect Claude processes outside tmux",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Run checks for orphaned Claude processes.
|
||||
// Run checks for Claude processes running outside tmux.
|
||||
func (c *OrphanProcessCheck) Run(ctx *CheckContext) *CheckResult {
|
||||
// Get list of tmux session PIDs
|
||||
tmuxPIDs, err := c.getTmuxSessionPIDs()
|
||||
@@ -276,145 +276,41 @@ func (c *OrphanProcessCheck) Run(ctx *CheckContext) *CheckResult {
|
||||
}
|
||||
}
|
||||
|
||||
// Check which Claude processes are orphaned
|
||||
var orphans []processInfo
|
||||
var validCount int
|
||||
// Check which Claude processes are outside tmux
|
||||
var outsideTmux []processInfo
|
||||
var insideTmux int
|
||||
|
||||
for _, proc := range claudeProcs {
|
||||
if c.isOrphanProcess(proc, tmuxPIDs) {
|
||||
orphans = append(orphans, proc)
|
||||
outsideTmux = append(outsideTmux, proc)
|
||||
} else {
|
||||
validCount++
|
||||
insideTmux++
|
||||
}
|
||||
}
|
||||
|
||||
// Cache orphan PIDs for Fix
|
||||
c.orphanPIDs = make([]int, len(orphans))
|
||||
for i, p := range orphans {
|
||||
c.orphanPIDs[i] = p.pid
|
||||
}
|
||||
|
||||
if len(orphans) == 0 {
|
||||
if len(outsideTmux) == 0 {
|
||||
return &CheckResult{
|
||||
Name: c.Name(),
|
||||
Status: StatusOK,
|
||||
Message: fmt.Sprintf("All %d Claude processes have valid parents", validCount),
|
||||
Message: fmt.Sprintf("All %d Claude processes are inside tmux", insideTmux),
|
||||
}
|
||||
}
|
||||
|
||||
details := make([]string, len(orphans))
|
||||
for i, proc := range orphans {
|
||||
details[i] = fmt.Sprintf("PID %d: %s (parent: %d)", proc.pid, proc.cmd, proc.ppid)
|
||||
details := make([]string, 0, len(outsideTmux)+2)
|
||||
details = append(details, "These may be your personal Claude sessions or orphaned Gas Town processes.")
|
||||
details = append(details, "Verify these are expected before manually killing any:")
|
||||
for _, proc := range outsideTmux {
|
||||
details = append(details, fmt.Sprintf(" PID %d: %s (parent: %d)", proc.pid, proc.cmd, proc.ppid))
|
||||
}
|
||||
|
||||
return &CheckResult{
|
||||
Name: c.Name(),
|
||||
Status: StatusWarning,
|
||||
Message: fmt.Sprintf("Found %d orphaned Claude process(es)", len(orphans)),
|
||||
Message: fmt.Sprintf("Found %d Claude process(es) running outside tmux", len(outsideTmux)),
|
||||
Details: details,
|
||||
FixHint: "Run 'gt doctor --fix' to kill orphaned processes",
|
||||
}
|
||||
}
|
||||
|
||||
// Fix kills orphaned processes, with safeguards for crew sessions.
|
||||
func (c *OrphanProcessCheck) Fix(ctx *CheckContext) error {
|
||||
if len(c.orphanPIDs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// SAFEGUARD: Get crew session pane PIDs to avoid killing crew processes.
|
||||
// Even if a process appears orphaned, if its parent is a crew session pane,
|
||||
// we should not kill it (the detection might be wrong).
|
||||
crewPanePIDs := c.getCrewSessionPanePIDs()
|
||||
|
||||
var lastErr error
|
||||
for _, pid := range c.orphanPIDs {
|
||||
// Check if this process has a crew session ancestor
|
||||
if c.hasCrewAncestor(pid, crewPanePIDs) {
|
||||
// Skip - this process might belong to a crew session
|
||||
continue
|
||||
}
|
||||
|
||||
proc, err := os.FindProcess(pid)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
if err := proc.Signal(os.Interrupt); err != nil {
|
||||
// Try SIGKILL if SIGINT fails
|
||||
if killErr := proc.Kill(); killErr != nil {
|
||||
lastErr = killErr
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return lastErr
|
||||
}
|
||||
|
||||
// getCrewSessionPanePIDs returns pane PIDs for all crew sessions.
|
||||
func (c *OrphanProcessCheck) getCrewSessionPanePIDs() map[int]bool {
|
||||
pids := make(map[int]bool)
|
||||
|
||||
t := tmux.NewTmux()
|
||||
sessions, err := t.ListSessions()
|
||||
if err != nil {
|
||||
return pids
|
||||
}
|
||||
|
||||
for _, session := range sessions {
|
||||
if !isCrewSession(session) {
|
||||
continue
|
||||
}
|
||||
// Get pane PIDs for this crew session
|
||||
out, err := exec.Command("tmux", "list-panes", "-t", session, "-F", "#{pane_pid}").Output()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
|
||||
var pid int
|
||||
if _, err := fmt.Sscanf(line, "%d", &pid); err == nil {
|
||||
pids[pid] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return pids
|
||||
}
|
||||
|
||||
// hasCrewAncestor checks if a process has a crew session pane as an ancestor.
|
||||
func (c *OrphanProcessCheck) hasCrewAncestor(pid int, crewPanePIDs map[int]bool) bool {
|
||||
if len(crewPanePIDs) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Walk up the process tree
|
||||
currentPID := pid
|
||||
visited := make(map[int]bool)
|
||||
|
||||
for currentPID > 1 && !visited[currentPID] {
|
||||
visited[currentPID] = true
|
||||
|
||||
// Check if this PID is a crew pane
|
||||
if crewPanePIDs[currentPID] {
|
||||
return true
|
||||
}
|
||||
|
||||
// Get parent PID
|
||||
out, err := exec.Command("ps", "-p", fmt.Sprintf("%d", currentPID), "-o", "ppid=").Output() //nolint:gosec // G204: PID is numeric from internal state
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
var ppid int
|
||||
if _, err := fmt.Sscanf(strings.TrimSpace(string(out)), "%d", &ppid); err != nil {
|
||||
break
|
||||
}
|
||||
currentPID = ppid
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
type processInfo struct {
|
||||
pid int
|
||||
ppid int
|
||||
|
||||
134
internal/doctor/orphan_check_test.go
Normal file
134
internal/doctor/orphan_check_test.go
Normal file
@@ -0,0 +1,134 @@
|
||||
package doctor
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNewOrphanSessionCheck(t *testing.T) {
|
||||
check := NewOrphanSessionCheck()
|
||||
|
||||
if check.Name() != "orphan-sessions" {
|
||||
t.Errorf("expected name 'orphan-sessions', got %q", check.Name())
|
||||
}
|
||||
|
||||
if !check.CanFix() {
|
||||
t.Error("expected CanFix to return true for session check")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewOrphanProcessCheck(t *testing.T) {
|
||||
check := NewOrphanProcessCheck()
|
||||
|
||||
if check.Name() != "orphan-processes" {
|
||||
t.Errorf("expected name 'orphan-processes', got %q", check.Name())
|
||||
}
|
||||
|
||||
// OrphanProcessCheck should NOT be fixable - it's informational only
|
||||
if check.CanFix() {
|
||||
t.Error("expected CanFix to return false for process check (informational only)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestOrphanProcessCheck_Run(t *testing.T) {
|
||||
// This test verifies the check runs without error.
|
||||
// Results depend on whether Claude processes exist in the test environment.
|
||||
check := NewOrphanProcessCheck()
|
||||
ctx := &CheckContext{TownRoot: t.TempDir()}
|
||||
|
||||
result := check.Run(ctx)
|
||||
|
||||
// Should return OK (no processes or all inside tmux) or Warning (processes outside tmux)
|
||||
// Both are valid depending on test environment
|
||||
if result.Status != StatusOK && result.Status != StatusWarning {
|
||||
t.Errorf("expected StatusOK or StatusWarning, got %v: %s", result.Status, result.Message)
|
||||
}
|
||||
|
||||
// If warning, should have informational details
|
||||
if result.Status == StatusWarning {
|
||||
if len(result.Details) < 3 {
|
||||
t.Errorf("expected at least 3 detail lines (2 info + 1 process), got %d", len(result.Details))
|
||||
}
|
||||
// Should NOT have a FixHint since this is informational only
|
||||
if result.FixHint != "" {
|
||||
t.Errorf("expected no FixHint for informational check, got %q", result.FixHint)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestOrphanProcessCheck_MessageContent(t *testing.T) {
|
||||
// Verify the check description is correct
|
||||
check := NewOrphanProcessCheck()
|
||||
|
||||
expectedDesc := "Detect Claude processes outside tmux"
|
||||
if check.Description() != expectedDesc {
|
||||
t.Errorf("expected description %q, got %q", expectedDesc, check.Description())
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsCrewSession(t *testing.T) {
|
||||
tests := []struct {
|
||||
session string
|
||||
want bool
|
||||
}{
|
||||
{"gt-gastown-crew-joe", true},
|
||||
{"gt-beads-crew-max", true},
|
||||
{"gt-rig-crew-a", true},
|
||||
{"gt-gastown-witness", false},
|
||||
{"gt-gastown-refinery", false},
|
||||
{"gt-gastown-polecat1", false},
|
||||
{"hq-deacon", false},
|
||||
{"hq-mayor", false},
|
||||
{"other-session", false},
|
||||
{"gt-crew", false}, // Not enough parts
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.session, func(t *testing.T) {
|
||||
got := isCrewSession(tt.session)
|
||||
if got != tt.want {
|
||||
t.Errorf("isCrewSession(%q) = %v, want %v", tt.session, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestOrphanSessionCheck_IsValidSession(t *testing.T) {
|
||||
check := NewOrphanSessionCheck()
|
||||
validRigs := []string{"gastown", "beads"}
|
||||
mayorSession := "hq-mayor"
|
||||
deaconSession := "hq-deacon"
|
||||
|
||||
tests := []struct {
|
||||
session string
|
||||
want bool
|
||||
}{
|
||||
// Town-level sessions
|
||||
{"hq-mayor", true},
|
||||
{"hq-deacon", true},
|
||||
|
||||
// Valid rig sessions
|
||||
{"gt-gastown-witness", true},
|
||||
{"gt-gastown-refinery", true},
|
||||
{"gt-gastown-polecat1", true},
|
||||
{"gt-beads-witness", true},
|
||||
{"gt-beads-refinery", true},
|
||||
{"gt-beads-crew-max", true},
|
||||
|
||||
// Invalid rig sessions (rig doesn't exist)
|
||||
{"gt-unknown-witness", false},
|
||||
{"gt-foo-refinery", false},
|
||||
|
||||
// Non-gt sessions (should not be checked by this function,
|
||||
// but if called, they'd fail format validation)
|
||||
{"other-session", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.session, func(t *testing.T) {
|
||||
got := check.isValidSession(tt.session, validRigs, mayorSession, deaconSession)
|
||||
if got != tt.want {
|
||||
t.Errorf("isValidSession(%q) = %v, want %v", tt.session, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user