From 3981e46688a327a52ab52744821f7ad147dc9965 Mon Sep 17 00:00:00 2001 From: Steve Yegge Date: Sat, 27 Dec 2025 01:00:16 -0800 Subject: [PATCH] feat: Add safeguards to protect crew sessions from auto-cleanup Crew workers are human-managed and should never be auto-killed by gt doctor --fix. This adds defense-in-depth protection: 1. OrphanSessionCheck.Fix() now skips any session matching the gt--crew- pattern 2. OrphanProcessCheck.Fix() now checks if a process has a crew session pane as an ancestor before killing it Even if detection fails (like the pgrep bug we just fixed), crew sessions and their processes will be protected. Generated with Claude Code Co-Authored-By: Claude Opus 4.5 --- internal/doctor/orphan_check.go | 96 ++++++++++++++++++++++++++++++++- 1 file changed, 94 insertions(+), 2 deletions(-) diff --git a/internal/doctor/orphan_check.go b/internal/doctor/orphan_check.go index 91face23..2d8f572c 100644 --- a/internal/doctor/orphan_check.go +++ b/internal/doctor/orphan_check.go @@ -101,7 +101,7 @@ func (c *OrphanSessionCheck) Run(ctx *CheckContext) *CheckResult { } } -// Fix kills all orphaned sessions. +// Fix kills all orphaned sessions, except crew sessions which are protected. func (c *OrphanSessionCheck) Fix(ctx *CheckContext) error { if len(c.orphanSessions) == 0 { return nil @@ -111,6 +111,11 @@ func (c *OrphanSessionCheck) Fix(ctx *CheckContext) error { var lastErr error for _, session := range c.orphanSessions { + // SAFEGUARD: Never auto-kill crew sessions. + // Crew workers are human-managed and require explicit action. + if isCrewSession(session) { + continue + } if err := t.KillSession(session); err != nil { lastErr = err } @@ -119,6 +124,18 @@ func (c *OrphanSessionCheck) Fix(ctx *CheckContext) error { return lastErr } +// isCrewSession returns true if the session name matches the crew pattern. +// Crew sessions are gt--crew- and are protected from auto-cleanup. +func isCrewSession(session string) bool { + // Pattern: gt--crew- + // Example: gt-gastown-crew-joe + parts := strings.Split(session, "-") + if len(parts) >= 4 && parts[0] == "gt" && parts[2] == "crew" { + return true + } + return false +} + // getValidRigs returns a list of valid rig names from the workspace. func (c *OrphanSessionCheck) getValidRigs(townRoot string) []string { var rigs []string @@ -294,14 +311,25 @@ func (c *OrphanProcessCheck) Run(ctx *CheckContext) *CheckResult { } } -// Fix kills all orphaned processes. +// Fix kills orphaned processes, with safeguards for crew sessions. func (c *OrphanProcessCheck) Fix(ctx *CheckContext) error { if len(c.orphanPIDs) == 0 { return nil } + // SAFEGUARD: Get crew session pane PIDs to avoid killing crew processes. + // Even if a process appears orphaned, if its parent is a crew session pane, + // we should not kill it (the detection might be wrong). + crewPanePIDs := c.getCrewSessionPanePIDs() + var lastErr error for _, pid := range c.orphanPIDs { + // Check if this process has a crew session ancestor + if c.hasCrewAncestor(pid, crewPanePIDs) { + // Skip - this process might belong to a crew session + continue + } + proc, err := os.FindProcess(pid) if err != nil { lastErr = err @@ -318,6 +346,70 @@ func (c *OrphanProcessCheck) Fix(ctx *CheckContext) error { return lastErr } +// getCrewSessionPanePIDs returns pane PIDs for all crew sessions. +func (c *OrphanProcessCheck) getCrewSessionPanePIDs() map[int]bool { + pids := make(map[int]bool) + + t := tmux.NewTmux() + sessions, err := t.ListSessions() + if err != nil { + return pids + } + + for _, session := range sessions { + if !isCrewSession(session) { + continue + } + // Get pane PIDs for this crew session + out, err := exec.Command("tmux", "list-panes", "-t", session, "-F", "#{pane_pid}").Output() + if err != nil { + continue + } + for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { + var pid int + if _, err := fmt.Sscanf(line, "%d", &pid); err == nil { + pids[pid] = true + } + } + } + + return pids +} + +// hasCrewAncestor checks if a process has a crew session pane as an ancestor. +func (c *OrphanProcessCheck) hasCrewAncestor(pid int, crewPanePIDs map[int]bool) bool { + if len(crewPanePIDs) == 0 { + return false + } + + // Walk up the process tree + currentPID := pid + visited := make(map[int]bool) + + for currentPID > 1 && !visited[currentPID] { + visited[currentPID] = true + + // Check if this PID is a crew pane + if crewPanePIDs[currentPID] { + return true + } + + // Get parent PID + out, err := exec.Command("ps", "-p", fmt.Sprintf("%d", currentPID), "-o", "ppid=").Output() + if err != nil { + break + } + + var ppid int + if _, err := fmt.Sscanf(strings.TrimSpace(string(out)), "%d", &ppid); err != nil { + break + } + currentPID = ppid + } + + return false +} + type processInfo struct { pid int ppid int