Merge upstream/main into fix/fresh-install-fixes

2026-01-21 22:48:06 +07:00
parent 035b7775ea a8be623eeb
commit b2b9cbc836
148 changed files with 5464 additions and 2027 deletions
--- a/internal/doctor/agent_beads_check.go
+++ b/internal/doctor/agent_beads_check.go
@@ -170,7 +170,6 @@ func (c *AgentBeadsCheck) Fix(ctx *CheckContext) error {
 			RoleType:   "deacon",
 			Rig:        "",
 			AgentState: "idle",
-			RoleBead:   beads.DeaconRoleBeadIDTown(),
 		}
 		desc := "Deacon (daemon beacon) - receives mechanical heartbeats, runs town plugins and monitoring."
 		if _, err := townBd.CreateAgentBead(deaconID, desc, fields); err != nil {
@@ -184,7 +183,6 @@ func (c *AgentBeadsCheck) Fix(ctx *CheckContext) error {
 			RoleType:   "mayor",
 			Rig:        "",
 			AgentState: "idle",
-			RoleBead:   beads.MayorRoleBeadIDTown(),
 		}
 		desc := "Mayor - global coordinator, handles cross-rig communication and escalations."
 		if _, err := townBd.CreateAgentBead(mayorID, desc, fields); err != nil {
@@ -231,7 +229,6 @@ func (c *AgentBeadsCheck) Fix(ctx *CheckContext) error {
 				RoleType:   "witness",
 				Rig:        rigName,
 				AgentState: "idle",
-				RoleBead:   beads.RoleBeadIDTown("witness"),
 			}
 			desc := fmt.Sprintf("Witness for %s - monitors polecat health and progress.", rigName)
 			if _, err := bd.CreateAgentBead(witnessID, desc, fields); err != nil {
@@ -245,7 +242,6 @@ func (c *AgentBeadsCheck) Fix(ctx *CheckContext) error {
 				RoleType:   "refinery",
 				Rig:        rigName,
 				AgentState: "idle",
-				RoleBead:   beads.RoleBeadIDTown("refinery"),
 			}
 			desc := fmt.Sprintf("Refinery for %s - processes merge queue.", rigName)
 			if _, err := bd.CreateAgentBead(refineryID, desc, fields); err != nil {
@@ -262,7 +258,6 @@ func (c *AgentBeadsCheck) Fix(ctx *CheckContext) error {
 					RoleType:   "crew",
 					Rig:        rigName,
 					AgentState: "idle",
-					RoleBead:   beads.RoleBeadIDTown("crew"),
 				}
 				desc := fmt.Sprintf("Crew worker %s in %s - human-managed persistent workspace.", workerName, rigName)
 				if _, err := bd.CreateAgentBead(crewID, desc, fields); err != nil {
--- a/internal/doctor/claude_settings_check.go
+++ b/internal/doctor/claude_settings_check.go
@@ -510,8 +510,9 @@ func (c *ClaudeSettingsCheck) Fix(ctx *CheckContext) error {
 				sf.agentType == "deacon" || sf.agentType == "mayor" {
 				running, _ := t.HasSession(sf.sessionName)
 				if running {
-					// Cycle the agent by killing and letting gt up restart it
-					_ = t.KillSession(sf.sessionName)
+					// Cycle the agent by killing and letting gt up restart it.
+					// Use KillSessionWithProcesses to ensure all descendant processes are killed.
+					_ = t.KillSessionWithProcesses(sf.sessionName)
 				}
 			}
 		}
--- a/internal/doctor/misclassified_wisp_check.go
+++ b/internal/doctor/misclassified_wisp_check.go
@@ -0,0 +1,206 @@
+package doctor
+
+import (
+	"bufio"
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/steveyegge/gastown/internal/beads"
+)
+
+// CheckMisclassifiedWisps detects issues that should be marked as wisps but aren't.
+// Wisps are ephemeral issues for operational workflows (patrols, MRs, mail).
+// This check finds issues that have wisp characteristics but lack the wisp:true flag.
+type CheckMisclassifiedWisps struct {
+	FixableCheck
+	misclassified     []misclassifiedWisp
+	misclassifiedRigs map[string]int // rig -> count
+}
+
+type misclassifiedWisp struct {
+	rigName string
+	id      string
+	title   string
+	reason  string
+}
+
+// NewCheckMisclassifiedWisps creates a new misclassified wisp check.
+func NewCheckMisclassifiedWisps() *CheckMisclassifiedWisps {
+	return &CheckMisclassifiedWisps{
+		FixableCheck: FixableCheck{
+			BaseCheck: BaseCheck{
+				CheckName:        "misclassified-wisps",
+				CheckDescription: "Detect issues that should be wisps but aren't marked as ephemeral",
+				CheckCategory:    CategoryCleanup,
+			},
+		},
+		misclassifiedRigs: make(map[string]int),
+	}
+}
+
+// Run checks for misclassified wisps in each rig.
+func (c *CheckMisclassifiedWisps) Run(ctx *CheckContext) *CheckResult {
+	c.misclassified = nil
+	c.misclassifiedRigs = make(map[string]int)
+
+	rigs, err := discoverRigs(ctx.TownRoot)
+	if err != nil {
+		return &CheckResult{
+			Name:    c.Name(),
+			Status:  StatusError,
+			Message: "Failed to discover rigs",
+			Details: []string{err.Error()},
+		}
+	}
+
+	if len(rigs) == 0 {
+		return &CheckResult{
+			Name:    c.Name(),
+			Status:  StatusOK,
+			Message: "No rigs configured",
+		}
+	}
+
+	var details []string
+
+	for _, rigName := range rigs {
+		rigPath := filepath.Join(ctx.TownRoot, rigName)
+		found := c.findMisclassifiedWisps(rigPath, rigName)
+		if len(found) > 0 {
+			c.misclassified = append(c.misclassified, found...)
+			c.misclassifiedRigs[rigName] = len(found)
+			details = append(details, fmt.Sprintf("%s: %d misclassified wisp(s)", rigName, len(found)))
+		}
+	}
+
+	// Also check town-level beads
+	townFound := c.findMisclassifiedWisps(ctx.TownRoot, "town")
+	if len(townFound) > 0 {
+		c.misclassified = append(c.misclassified, townFound...)
+		c.misclassifiedRigs["town"] = len(townFound)
+		details = append(details, fmt.Sprintf("town: %d misclassified wisp(s)", len(townFound)))
+	}
+
+	total := len(c.misclassified)
+	if total > 0 {
+		return &CheckResult{
+			Name:    c.Name(),
+			Status:  StatusWarning,
+			Message: fmt.Sprintf("%d issue(s) should be marked as wisps", total),
+			Details: details,
+			FixHint: "Run 'gt doctor --fix' to mark these issues as ephemeral",
+		}
+	}
+
+	return &CheckResult{
+		Name:    c.Name(),
+		Status:  StatusOK,
+		Message: "No misclassified wisps found",
+	}
+}
+
+// findMisclassifiedWisps finds issues that should be wisps but aren't in a single location.
+func (c *CheckMisclassifiedWisps) findMisclassifiedWisps(path string, rigName string) []misclassifiedWisp {
+	beadsDir := beads.ResolveBeadsDir(path)
+	issuesPath := filepath.Join(beadsDir, "issues.jsonl")
+	file, err := os.Open(issuesPath)
+	if err != nil {
+		return nil // No issues file
+	}
+	defer file.Close()
+
+	var found []misclassifiedWisp
+
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if line == "" {
+			continue
+		}
+
+		var issue struct {
+			ID     string   `json:"id"`
+			Title  string   `json:"title"`
+			Status string   `json:"status"`
+			Type   string   `json:"issue_type"`
+			Labels []string `json:"labels"`
+			Wisp   bool     `json:"wisp"`
+		}
+		if err := json.Unmarshal([]byte(line), &issue); err != nil {
+			continue
+		}
+
+		// Skip issues already marked as wisps
+		if issue.Wisp {
+			continue
+		}
+
+		// Skip closed issues - they're done, no need to reclassify
+		if issue.Status == "closed" {
+			continue
+		}
+
+		// Check for wisp characteristics
+		if reason := c.shouldBeWisp(issue.ID, issue.Title, issue.Type, issue.Labels); reason != "" {
+			found = append(found, misclassifiedWisp{
+				rigName: rigName,
+				id:      issue.ID,
+				title:   issue.Title,
+				reason:  reason,
+			})
+		}
+	}
+
+	return found
+}
+
+// shouldBeWisp checks if an issue has characteristics indicating it should be a wisp.
+// Returns the reason string if it should be a wisp, empty string otherwise.
+func (c *CheckMisclassifiedWisps) shouldBeWisp(id, title, issueType string, labels []string) string {
+	// Check for merge-request type - these should always be wisps
+	if issueType == "merge-request" {
+		return "merge-request type should be ephemeral"
+	}
+
+	// Check for patrol-related labels
+	for _, label := range labels {
+		if strings.Contains(label, "patrol") {
+			return "patrol label indicates ephemeral workflow"
+		}
+		if label == "gt:mail" || label == "gt:handoff" {
+			return "mail/handoff label indicates ephemeral message"
+		}
+	}
+
+	// Check for formula instance patterns in ID
+	// Formula instances typically have IDs like "mol-<formula>-<hash>" or "<formula>.<step>"
+	if strings.HasPrefix(id, "mol-") && strings.Contains(id, "-patrol") {
+		return "patrol molecule ID pattern"
+	}
+
+	// Check for specific title patterns indicating operational work
+	lowerTitle := strings.ToLower(title)
+	if strings.Contains(lowerTitle, "patrol cycle") ||
+		strings.Contains(lowerTitle, "witness patrol") ||
+		strings.Contains(lowerTitle, "deacon patrol") ||
+		strings.Contains(lowerTitle, "refinery patrol") {
+		return "patrol title indicates ephemeral workflow"
+	}
+
+	return ""
+}
+
+// Fix marks misclassified issues as wisps using bd update.
+func (c *CheckMisclassifiedWisps) Fix(ctx *CheckContext) error {
+	// Note: bd doesn't have a direct flag to set wisp:true on existing issues.
+	// The proper fix is to ensure issues are created with --ephemeral flag.
+	// For now, we just report the issues - they'll be cleaned up by wisp-gc
+	// if they become abandoned, or manually closed.
+	//
+	// A true fix would require bd to support: bd update <id> --ephemeral
+	// Until then, this check serves as a diagnostic.
+	return nil
+}
--- a/internal/doctor/orphan_check.go
+++ b/internal/doctor/orphan_check.go
@@ -5,7 +5,6 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
-	"regexp"
 	"strings"

 	"github.com/steveyegge/gastown/internal/events"
@@ -95,8 +94,8 @@ func (c *OrphanSessionCheck) Run(ctx *CheckContext) *CheckResult {
 			continue
 		}

-		// Only check gt-* sessions (Gas Town sessions)
-		if !strings.HasPrefix(sess, "gt-") {
+		// Only check gt-* and hq-* sessions (Gas Town sessions)
+		if !strings.HasPrefix(sess, "gt-") && !strings.HasPrefix(sess, "hq-") {
 			continue
 		}

@@ -150,7 +149,8 @@ func (c *OrphanSessionCheck) Fix(ctx *CheckContext) error {
 		// Log pre-death event for crash investigation (before killing)
 		_ = events.LogFeed(events.TypeSessionDeath, sess,
 			events.SessionDeathPayload(sess, "unknown", "orphan cleanup", "gt doctor"))
-		if err := t.KillSession(sess); err != nil {
+		// Use KillSessionWithProcesses to ensure all descendant processes are killed.
+		if err := t.KillSessionWithProcesses(sess); err != nil {
 			lastErr = err
 		}
 	}
@@ -200,8 +200,8 @@ func (c *OrphanSessionCheck) getValidRigs(townRoot string) []string {

 // isValidSession checks if a session name matches expected Gas Town patterns.
 // Valid patterns:
-//   - gt-{town}-mayor (dynamic based on town name)
-//   - gt-{town}-deacon (dynamic based on town name)
+//   - hq-mayor (headquarters mayor session)
+//   - hq-deacon (headquarters deacon session)
 //   - gt-<rig>-witness
 //   - gt-<rig>-refinery
 //   - gt-<rig>-<polecat> (where polecat is any name)
@@ -354,8 +354,9 @@ func (c *OrphanProcessCheck) getTmuxSessionPIDs() (map[int]bool, error) { //noli

 	// Find tmux server processes using ps instead of pgrep.
 	// pgrep -x tmux is unreliable on macOS - it often misses the actual server.
-	// We use ps with awk to find processes where comm is exactly "tmux".
-	out, err := exec.Command("sh", "-c", `ps ax -o pid,comm | awk '$2 == "tmux" || $2 ~ /\/tmux$/ { print $1 }'`).Output()
+	// We use ps with awk to find processes where comm is exactly "tmux" or starts with "tmux:".
+	// On Linux, tmux servers show as "tmux: server" in the comm field.
+	out, err := exec.Command("sh", "-c", `ps ax -o pid,comm | awk '$2 == "tmux" || $2 ~ /\/tmux$/ || $2 ~ /^tmux:/ { print $1 }'`).Output()
 	if err != nil {
 		// No tmux server running
 		return pids, nil
@@ -388,40 +389,41 @@ func (c *OrphanProcessCheck) getTmuxSessionPIDs() (map[int]bool, error) { //noli
 	return pids, nil
 }

-// findRuntimeProcesses finds all running runtime CLI processes.
-// Excludes Claude.app desktop application and its helpers.
+// findRuntimeProcesses finds Gas Town Claude processes (those with --dangerously-skip-permissions).
+// Only detects processes started by Gas Town, not user's personal Claude sessions.
 func (c *OrphanProcessCheck) findRuntimeProcesses() ([]processInfo, error) {
 	var procs []processInfo

-	// Use ps to find runtime processes
-	out, err := exec.Command("ps", "-eo", "pid,ppid,comm").Output()
+	// Use ps with args to get full command line (needed to check for Gas Town signature)
+	out, err := exec.Command("ps", "-eo", "pid,ppid,args").Output()
 	if err != nil {
 		return nil, err
 	}

-	// Regex to match runtime CLI processes (not Claude.app)
-	// Match: "claude", "claude-code", or "codex" (or paths ending in those)
-	runtimePattern := regexp.MustCompile(`(?i)(^claude$|/claude$|^claude-code$|/claude-code$|^codex$|/codex$)`)
-
-	// Pattern to exclude Claude.app and related desktop processes
-	excludePattern := regexp.MustCompile(`(?i)(Claude\.app|claude-native|chrome-native)`)
-
 	for _, line := range strings.Split(string(out), "\n") {
 		fields := strings.Fields(line)
 		if len(fields) < 3 {
 			continue
 		}

-		// Check if command matches runtime CLI
-		cmd := strings.Join(fields[2:], " ")
+		// Extract command name (without path)
+		cmd := fields[2]
+		if idx := strings.LastIndex(cmd, "/"); idx >= 0 {
+			cmd = cmd[idx+1:]
+		}

-		// Skip desktop app processes
-		if excludePattern.MatchString(cmd) {
+		// Only match claude/codex processes, not tmux or other launchers
+		// (tmux command line may contain --dangerously-skip-permissions as part of the launched command)
+		if cmd != "claude" && cmd != "claude-code" && cmd != "codex" {
 			continue
 		}

-		// Only match CLI runtime processes
-		if !runtimePattern.MatchString(cmd) {
+		// Get full args
+		args := strings.Join(fields[2:], " ")
+
+		// Only match Gas Town Claude processes (have --dangerously-skip-permissions)
+		// This excludes user's personal Claude sessions
+		if !strings.Contains(args, "--dangerously-skip-permissions") {
 			continue
 		}

@@ -436,7 +438,7 @@ func (c *OrphanProcessCheck) findRuntimeProcesses() ([]processInfo, error) {
 		procs = append(procs, processInfo{
 			pid:  pid,
 			ppid: ppid,
-			cmd:  cmd,
+			cmd:  args,
 		})
 	}

--- a/internal/doctor/orphan_check_test.go
+++ b/internal/doctor/orphan_check_test.go
@@ -4,6 +4,7 @@ import (
 	"os"
 	"path/filepath"
 	"reflect"
+	"runtime"
 	"testing"
 )

@@ -43,6 +44,10 @@ func TestNewOrphanProcessCheck(t *testing.T) {
 }

 func TestOrphanProcessCheck_Run(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("orphan process detection is not supported on Windows")
+	}
+
 	// This test verifies the check runs without error.
 	// Results depend on whether Claude processes exist in the test environment.
 	check := NewOrphanProcessCheck()
@@ -353,6 +358,37 @@ func TestIsCrewSession_ComprehensivePatterns(t *testing.T) {
 	}
 }

+// TestOrphanSessionCheck_HQSessions tests that hq-* sessions are properly recognized as valid.
+func TestOrphanSessionCheck_HQSessions(t *testing.T) {
+	townRoot := t.TempDir()
+	mayorDir := filepath.Join(townRoot, "mayor")
+	if err := os.MkdirAll(mayorDir, 0o755); err != nil {
+		t.Fatalf("create mayor dir: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(mayorDir, "rigs.json"), []byte("{}"), 0o644); err != nil {
+		t.Fatalf("create rigs.json: %v", err)
+	}
+
+	lister := &mockSessionLister{
+		sessions: []string{
+			"hq-mayor",   // valid: headquarters mayor session
+			"hq-deacon",  // valid: headquarters deacon session
+		},
+	}
+	check := NewOrphanSessionCheckWithSessionLister(lister)
+	result := check.Run(&CheckContext{TownRoot: townRoot})
+
+	if result.Status != StatusOK {
+		t.Fatalf("expected StatusOK for valid hq sessions, got %v: %s", result.Status, result.Message)
+	}
+	if result.Message != "All 2 Gas Town sessions are valid" {
+		t.Fatalf("unexpected message: %q", result.Message)
+	}
+	if len(check.orphanSessions) != 0 {
+		t.Fatalf("expected no orphan sessions, got %v", check.orphanSessions)
+	}
+}
+
 // TestOrphanSessionCheck_Run_Deterministic tests the full Run path with a mock session
 // lister, ensuring deterministic behavior without depending on real tmux state.
 func TestOrphanSessionCheck_Run_Deterministic(t *testing.T) {
@@ -378,9 +414,11 @@ func TestOrphanSessionCheck_Run_Deterministic(t *testing.T) {
 			"gt-gastown-witness",      // valid: gastown rig exists
 			"gt-gastown-polecat1",     // valid: gastown rig exists
 			"gt-beads-refinery",       // valid: beads rig exists
+			"hq-mayor",                // valid: hq-mayor is recognized
+			"hq-deacon",               // valid: hq-deacon is recognized
 			"gt-unknown-witness",      // orphan: unknown rig doesn't exist
 			"gt-missing-crew-joe",     // orphan: missing rig doesn't exist
-			"random-session",          // ignored: doesn't match gt-* pattern
+			"random-session",          // ignored: doesn't match gt-*/hq-* pattern
 		},
 	}
 	check := NewOrphanSessionCheckWithSessionLister(lister)
--- a/internal/doctor/role_beads_check.go
+++ b/internal/doctor/role_beads_check.go
@@ -2,119 +2,116 @@ package doctor

 import (
 	"fmt"
-	"os/exec"
-	"strings"
+	"os"
+	"path/filepath"

-	"github.com/steveyegge/gastown/internal/beads"
+	"github.com/BurntSushi/toml"
+	"github.com/steveyegge/gastown/internal/config"
 )

-// RoleBeadsCheck verifies that role definition beads exist.
-// Role beads are templates that define role characteristics and lifecycle hooks.
-// They are stored in town beads (~/.beads/) with hq- prefix:
-//   - hq-mayor-role, hq-deacon-role, hq-dog-role
-//   - hq-witness-role, hq-refinery-role, hq-polecat-role, hq-crew-role
-//
-// Role beads are created by gt install, but creation may fail silently.
-// Without role beads, agents fall back to defaults which may differ from
-// user expectations.
-type RoleBeadsCheck struct {
-	FixableCheck
-	missing []string // Track missing role beads for fix
+// RoleConfigCheck verifies that role configuration is valid.
+// Role definitions are now config-based (internal/config/roles/*.toml),
+// not stored as beads. Built-in defaults are embedded in the binary.
+// This check validates any user-provided overrides at:
+//   - <town>/roles/<role>.toml (town-level overrides)
+//   - <rig>/roles/<role>.toml (rig-level overrides)
+type RoleConfigCheck struct {
+	BaseCheck
 }

-// NewRoleBeadsCheck creates a new role beads check.
-func NewRoleBeadsCheck() *RoleBeadsCheck {
-	return &RoleBeadsCheck{
-		FixableCheck: FixableCheck{
-			BaseCheck: BaseCheck{
-				CheckName:        "role-beads-exist",
-				CheckDescription: "Verify role definition beads exist",
-				CheckCategory:    CategoryConfig,
-			},
+// NewRoleBeadsCheck creates a new role config check.
+// Note: Function name kept as NewRoleBeadsCheck for backward compatibility
+// with existing doctor.go registration code.
+func NewRoleBeadsCheck() *RoleConfigCheck {
+	return &RoleConfigCheck{
+		BaseCheck: BaseCheck{
+			CheckName:        "role-config-valid",
+			CheckDescription: "Verify role configuration is valid",
+			CheckCategory:    CategoryConfig,
 		},
 	}
 }

-// Run checks if role beads exist.
-func (c *RoleBeadsCheck) Run(ctx *CheckContext) *CheckResult {
-	c.missing = nil // Reset
+// Run checks if role config is valid.
+func (c *RoleConfigCheck) Run(ctx *CheckContext) *CheckResult {
+	var warnings []string
+	var overrideCount int

-	townBeadsPath := beads.GetTownBeadsPath(ctx.TownRoot)
-	bd := beads.New(townBeadsPath)
-
-	var missing []string
-	roleDefs := beads.AllRoleBeadDefs()
-
-	for _, role := range roleDefs {
-		if _, err := bd.Show(role.ID); err != nil {
-			missing = append(missing, role.ID)
+	// Check town-level overrides
+	townRolesDir := filepath.Join(ctx.TownRoot, "roles")
+	if entries, err := os.ReadDir(townRolesDir); err == nil {
+		for _, entry := range entries {
+			if !entry.IsDir() && filepath.Ext(entry.Name()) == ".toml" {
+				overrideCount++
+				path := filepath.Join(townRolesDir, entry.Name())
+				if err := validateRoleOverride(path); err != nil {
+					warnings = append(warnings, fmt.Sprintf("town override %s: %v", entry.Name(), err))
+				}
+			}
 		}
 	}

-	c.missing = missing
+	// Check rig-level overrides for each rig
+	// Discover rigs by looking for directories with rig.json
+	if entries, err := os.ReadDir(ctx.TownRoot); err == nil {
+		for _, entry := range entries {
+			if !entry.IsDir() {
+				continue
+			}
+			rigName := entry.Name()
+			// Check if this is a rig (has rig.json)
+			if _, err := os.Stat(filepath.Join(ctx.TownRoot, rigName, "rig.json")); err != nil {
+				continue
+			}
+			rigRolesDir := filepath.Join(ctx.TownRoot, rigName, "roles")
+			if roleEntries, err := os.ReadDir(rigRolesDir); err == nil {
+				for _, roleEntry := range roleEntries {
+					if !roleEntry.IsDir() && filepath.Ext(roleEntry.Name()) == ".toml" {
+						overrideCount++
+						path := filepath.Join(rigRolesDir, roleEntry.Name())
+						if err := validateRoleOverride(path); err != nil {
+							warnings = append(warnings, fmt.Sprintf("rig %s override %s: %v", rigName, roleEntry.Name(), err))
+						}
+					}
+				}
+			}
+		}
+	}

-	if len(missing) == 0 {
+	if len(warnings) > 0 {
 		return &CheckResult{
 			Name:     c.Name(),
-			Status:   StatusOK,
-			Message:  fmt.Sprintf("All %d role beads exist", len(roleDefs)),
+			Status:   StatusWarning,
+			Message:  fmt.Sprintf("%d role config override(s) have issues", len(warnings)),
+			Details:  warnings,
+			FixHint:  "Check TOML syntax in role override files",
 			Category: c.Category(),
 		}
 	}

+	msg := "Role config uses built-in defaults"
+	if overrideCount > 0 {
+		msg = fmt.Sprintf("Role config valid (%d override file(s))", overrideCount)
+	}
+
 	return &CheckResult{
 		Name:     c.Name(),
-		Status:   StatusWarning, // Warning, not error - agents work without role beads
-		Message:  fmt.Sprintf("%d role bead(s) missing (agents will use defaults)", len(missing)),
-		Details:  missing,
-		FixHint:  "Run 'gt doctor --fix' to create missing role beads",
+		Status:   StatusOK,
+		Message:  msg,
 		Category: c.Category(),
 	}
 }

-// Fix creates missing role beads.
-func (c *RoleBeadsCheck) Fix(ctx *CheckContext) error {
-	// Re-run check to populate missing if needed
-	if c.missing == nil {
-		result := c.Run(ctx)
-		if result.Status == StatusOK {
-			return nil // Nothing to fix
-		}
+// validateRoleOverride checks if a role override file is valid TOML.
+func validateRoleOverride(path string) error {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return err
 	}

-	if len(c.missing) == 0 {
-		return nil
-	}
-
-	// Build lookup map for role definitions
-	roleDefMap := make(map[string]beads.RoleBeadDef)
-	for _, role := range beads.AllRoleBeadDefs() {
-		roleDefMap[role.ID] = role
-	}
-
-	// Create missing role beads
-	for _, id := range c.missing {
-		role, ok := roleDefMap[id]
-		if !ok {
-			continue // Shouldn't happen
-		}
-
-		// Create role bead using bd create --type=role
-		args := []string{
-			"create",
-			"--type=role",
-			"--id=" + role.ID,
-			"--title=" + role.Title,
-			"--description=" + role.Desc,
-		}
-		if beads.NeedsForceForID(role.ID) {
-			args = append(args, "--force")
-		}
-		cmd := exec.Command("bd", args...)
-		cmd.Dir = ctx.TownRoot
-		if output, err := cmd.CombinedOutput(); err != nil {
-			return fmt.Errorf("creating %s: %s", role.ID, strings.TrimSpace(string(output)))
-		}
+	var def config.RoleDefinition
+	if err := toml.Unmarshal(data, &def); err != nil {
+		return fmt.Errorf("invalid TOML: %w", err)
 	}

 	return nil
--- a/internal/doctor/role_beads_check_test.go
+++ b/internal/doctor/role_beads_check_test.go
@@ -4,15 +4,64 @@ import (
 	"os"
 	"path/filepath"
 	"testing"
-
-	"github.com/steveyegge/gastown/internal/beads"
 )

-func TestRoleBeadsCheck_Run(t *testing.T) {
-	t.Run("no town beads returns warning", func(t *testing.T) {
+func TestRoleConfigCheck_Run(t *testing.T) {
+	t.Run("no overrides returns OK with defaults message", func(t *testing.T) {
 		tmpDir := t.TempDir()
-		// Create minimal town structure without .beads
-		if err := os.MkdirAll(filepath.Join(tmpDir, "mayor"), 0755); err != nil {
+
+		check := NewRoleBeadsCheck()
+		ctx := &CheckContext{TownRoot: tmpDir}
+		result := check.Run(ctx)
+
+		if result.Status != StatusOK {
+			t.Errorf("expected StatusOK, got %v: %s", result.Status, result.Message)
+		}
+		if result.Message != "Role config uses built-in defaults" {
+			t.Errorf("unexpected message: %s", result.Message)
+		}
+	})
+
+	t.Run("valid town override returns OK", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		rolesDir := filepath.Join(tmpDir, "roles")
+		if err := os.MkdirAll(rolesDir, 0755); err != nil {
+			t.Fatal(err)
+		}
+
+		// Create a valid TOML override
+		override := `
+role = "witness"
+scope = "rig"
+
+[session]
+start_command = "exec echo test"
+`
+		if err := os.WriteFile(filepath.Join(rolesDir, "witness.toml"), []byte(override), 0644); err != nil {
+			t.Fatal(err)
+		}
+
+		check := NewRoleBeadsCheck()
+		ctx := &CheckContext{TownRoot: tmpDir}
+		result := check.Run(ctx)
+
+		if result.Status != StatusOK {
+			t.Errorf("expected StatusOK, got %v: %s", result.Status, result.Message)
+		}
+		if result.Message != "Role config valid (1 override file(s))" {
+			t.Errorf("unexpected message: %s", result.Message)
+		}
+	})
+
+	t.Run("invalid town override returns warning", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		rolesDir := filepath.Join(tmpDir, "roles")
+		if err := os.MkdirAll(rolesDir, 0755); err != nil {
+			t.Fatal(err)
+		}
+
+		// Create an invalid TOML file
+		if err := os.WriteFile(filepath.Join(rolesDir, "witness.toml"), []byte("invalid { toml"), 0644); err != nil {
 			t.Fatal(err)
 		}

@@ -20,49 +69,53 @@ func TestRoleBeadsCheck_Run(t *testing.T) {
 		ctx := &CheckContext{TownRoot: tmpDir}
 		result := check.Run(ctx)

-		// Without .beads directory, all role beads are "missing"
-		expectedCount := len(beads.AllRoleBeadDefs())
 		if result.Status != StatusWarning {
 			t.Errorf("expected StatusWarning, got %v: %s", result.Status, result.Message)
 		}
-		if len(result.Details) != expectedCount {
-			t.Errorf("expected %d missing role beads, got %d: %v", expectedCount, len(result.Details), result.Details)
+		if len(result.Details) != 1 {
+			t.Errorf("expected 1 warning detail, got %d", len(result.Details))
 		}
 	})

-	t.Run("check is fixable", func(t *testing.T) {
+	t.Run("valid rig override returns OK", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		rigName := "testrig"
+		rigDir := filepath.Join(tmpDir, rigName)
+		rigRolesDir := filepath.Join(rigDir, "roles")
+		if err := os.MkdirAll(rigRolesDir, 0755); err != nil {
+			t.Fatal(err)
+		}
+
+		// Create rig.json to mark this as a rig
+		if err := os.WriteFile(filepath.Join(rigDir, "rig.json"), []byte(`{"name": "testrig"}`), 0644); err != nil {
+			t.Fatal(err)
+		}
+
+		// Create a valid TOML override
+		override := `
+role = "refinery"
+scope = "rig"
+
+[session]
+needs_pre_sync = true
+`
+		if err := os.WriteFile(filepath.Join(rigRolesDir, "refinery.toml"), []byte(override), 0644); err != nil {
+			t.Fatal(err)
+		}
+
 		check := NewRoleBeadsCheck()
-		if !check.CanFix() {
-			t.Error("RoleBeadsCheck should be fixable")
+		ctx := &CheckContext{TownRoot: tmpDir}
+		result := check.Run(ctx)
+
+		if result.Status != StatusOK {
+			t.Errorf("expected StatusOK, got %v: %s", result.Status, result.Message)
+		}
+	})
+
+	t.Run("check is not fixable", func(t *testing.T) {
+		check := NewRoleBeadsCheck()
+		if check.CanFix() {
+			t.Error("RoleConfigCheck should not be fixable (config issues need manual fix)")
 		}
 	})
 }
-
-func TestRoleBeadsCheck_usesSharedDefs(t *testing.T) {
-	// Verify the check uses beads.AllRoleBeadDefs()
-	roleDefs := beads.AllRoleBeadDefs()
-
-	if len(roleDefs) < 7 {
-		t.Errorf("expected at least 7 role beads, got %d", len(roleDefs))
-	}
-
-	// Verify key roles are present
-	expectedIDs := map[string]bool{
-		"hq-mayor-role":    false,
-		"hq-deacon-role":   false,
-		"hq-witness-role":  false,
-		"hq-refinery-role": false,
-	}
-
-	for _, role := range roleDefs {
-		if _, exists := expectedIDs[role.ID]; exists {
-			expectedIDs[role.ID] = true
-		}
-	}
-
-	for id, found := range expectedIDs {
-		if !found {
-			t.Errorf("expected role %s not found in AllRoleBeadDefs()", id)
-		}
-	}
-}
--- a/internal/doctor/routing_mode_check.go
+++ b/internal/doctor/routing_mode_check.go
@@ -0,0 +1,147 @@
+package doctor
+
+import (
+	"bytes"
+	"fmt"
+	"os/exec"
+	"path/filepath"
+	"strings"
+)
+
+// RoutingModeCheck detects when beads routing.mode is set to "auto", which can
+// cause issues to be unexpectedly routed to ~/.beads-planning instead of the
+// local .beads directory. This happens because auto mode uses git remote URL
+// to detect user role, and non-SSH URLs are interpreted as "contributor" mode.
+//
+// See: https://github.com/steveyegge/beads/issues/1165
+type RoutingModeCheck struct {
+	FixableCheck
+}
+
+// NewRoutingModeCheck creates a new routing mode check.
+func NewRoutingModeCheck() *RoutingModeCheck {
+	return &RoutingModeCheck{
+		FixableCheck: FixableCheck{
+			BaseCheck: BaseCheck{
+				CheckName:        "routing-mode",
+				CheckDescription: "Check beads routing.mode is explicit (prevents .beads-planning routing)",
+				CheckCategory:    CategoryConfig,
+			},
+		},
+	}
+}
+
+// Run checks if routing.mode is set to "explicit".
+func (c *RoutingModeCheck) Run(ctx *CheckContext) *CheckResult {
+	// Check town-level beads config
+	townBeadsDir := filepath.Join(ctx.TownRoot, ".beads")
+	result := c.checkRoutingMode(townBeadsDir, "town")
+	if result.Status != StatusOK {
+		return result
+	}
+
+	// Also check rig-level beads if specified
+	if ctx.RigName != "" {
+		rigBeadsDir := filepath.Join(ctx.RigPath(), ".beads")
+		rigResult := c.checkRoutingMode(rigBeadsDir, fmt.Sprintf("rig '%s'", ctx.RigName))
+		if rigResult.Status != StatusOK {
+			return rigResult
+		}
+	}
+
+	return &CheckResult{
+		Name:    c.Name(),
+		Status:  StatusOK,
+		Message: "Beads routing.mode is explicit",
+	}
+}
+
+// checkRoutingMode checks the routing mode in a specific beads directory.
+func (c *RoutingModeCheck) checkRoutingMode(beadsDir, location string) *CheckResult {
+	// Run bd config get routing.mode
+	cmd := exec.Command("bd", "config", "get", "routing.mode")
+	cmd.Dir = filepath.Dir(beadsDir)
+	cmd.Env = append(cmd.Environ(), "BEADS_DIR="+beadsDir)
+
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	if err := cmd.Run(); err != nil {
+		// If the config key doesn't exist, that means it defaults to "auto"
+		if strings.Contains(stderr.String(), "not found") || strings.Contains(stderr.String(), "not set") {
+			return &CheckResult{
+				Name:   c.Name(),
+				Status: StatusWarning,
+				Message: fmt.Sprintf("routing.mode not set at %s (defaults to auto)", location),
+				Details: []string{
+					"Auto routing mode uses git remote URL to detect user role",
+					"Non-SSH URLs (HTTPS or file paths) trigger routing to ~/.beads-planning",
+					"This causes mail and issues to be stored in the wrong location",
+					"See: https://github.com/steveyegge/beads/issues/1165",
+				},
+				FixHint: "Run 'gt doctor --fix' or 'bd config set routing.mode explicit'",
+			}
+		}
+		// Other error - report as warning
+		return &CheckResult{
+			Name:    c.Name(),
+			Status:  StatusWarning,
+			Message: fmt.Sprintf("Could not check routing.mode at %s: %v", location, err),
+		}
+	}
+
+	mode := strings.TrimSpace(stdout.String())
+	if mode != "explicit" {
+		return &CheckResult{
+			Name:   c.Name(),
+			Status: StatusWarning,
+			Message: fmt.Sprintf("routing.mode is '%s' at %s (should be 'explicit')", mode, location),
+			Details: []string{
+				"Auto routing mode uses git remote URL to detect user role",
+				"Non-SSH URLs (HTTPS or file paths) trigger routing to ~/.beads-planning",
+				"This causes mail and issues to be stored in the wrong location",
+				"See: https://github.com/steveyegge/beads/issues/1165",
+			},
+			FixHint: "Run 'gt doctor --fix' or 'bd config set routing.mode explicit'",
+		}
+	}
+
+	return &CheckResult{
+		Name:    c.Name(),
+		Status:  StatusOK,
+		Message: fmt.Sprintf("routing.mode is explicit at %s", location),
+	}
+}
+
+// Fix sets routing.mode to "explicit" in both town and rig beads.
+func (c *RoutingModeCheck) Fix(ctx *CheckContext) error {
+	// Fix town-level beads
+	townBeadsDir := filepath.Join(ctx.TownRoot, ".beads")
+	if err := c.setRoutingMode(townBeadsDir); err != nil {
+		return fmt.Errorf("fixing town beads: %w", err)
+	}
+
+	// Also fix rig-level beads if specified
+	if ctx.RigName != "" {
+		rigBeadsDir := filepath.Join(ctx.RigPath(), ".beads")
+		if err := c.setRoutingMode(rigBeadsDir); err != nil {
+			return fmt.Errorf("fixing rig %s beads: %w", ctx.RigName, err)
+		}
+	}
+
+	return nil
+}
+
+// setRoutingMode sets routing.mode to "explicit" in the specified beads directory.
+func (c *RoutingModeCheck) setRoutingMode(beadsDir string) error {
+	cmd := exec.Command("bd", "config", "set", "routing.mode", "explicit")
+	cmd.Dir = filepath.Dir(beadsDir)
+	cmd.Env = append(cmd.Environ(), "BEADS_DIR="+beadsDir)
+
+	if output, err := cmd.CombinedOutput(); err != nil {
+		return fmt.Errorf("bd config set failed: %s", strings.TrimSpace(string(output)))
+	}
+
+	return nil
+}
--- a/internal/doctor/sparse_checkout_check_test.go
+++ b/internal/doctor/sparse_checkout_check_test.go
@@ -120,7 +120,7 @@ func TestSparseCheckoutCheck_MayorRigMissingSparseCheckout(t *testing.T) {
 	if !strings.Contains(result.Message, "1 repo(s) missing") {
 		t.Errorf("expected message about missing config, got %q", result.Message)
 	}
-	if len(result.Details) != 1 || !strings.Contains(result.Details[0], "mayor/rig") {
+	if len(result.Details) != 1 || !strings.Contains(filepath.ToSlash(result.Details[0]), "mayor/rig") {
 		t.Errorf("expected details to contain mayor/rig, got %v", result.Details)
 	}
 }
@@ -164,7 +164,7 @@ func TestSparseCheckoutCheck_CrewMissingSparseCheckout(t *testing.T) {
 	if result.Status != StatusError {
 		t.Errorf("expected StatusError for missing sparse checkout, got %v", result.Status)
 	}
-	if len(result.Details) != 1 || !strings.Contains(result.Details[0], "crew/agent1") {
+	if len(result.Details) != 1 || !strings.Contains(filepath.ToSlash(result.Details[0]), "crew/agent1") {
 		t.Errorf("expected details to contain crew/agent1, got %v", result.Details)
 	}
 }
@@ -186,7 +186,7 @@ func TestSparseCheckoutCheck_PolecatMissingSparseCheckout(t *testing.T) {
 	if result.Status != StatusError {
 		t.Errorf("expected StatusError for missing sparse checkout, got %v", result.Status)
 	}
-	if len(result.Details) != 1 || !strings.Contains(result.Details[0], "polecats/pc1") {
+	if len(result.Details) != 1 || !strings.Contains(filepath.ToSlash(result.Details[0]), "polecats/pc1") {
 		t.Errorf("expected details to contain polecats/pc1, got %v", result.Details)
 	}
 }
@@ -244,7 +244,7 @@ func TestSparseCheckoutCheck_MixedConfigured(t *testing.T) {
 	if !strings.Contains(result.Message, "1 repo(s) missing") {
 		t.Errorf("expected message about 1 missing repo, got %q", result.Message)
 	}
-	if len(result.Details) != 1 || !strings.Contains(result.Details[0], "crew/agent1") {
+	if len(result.Details) != 1 || !strings.Contains(filepath.ToSlash(result.Details[0]), "crew/agent1") {
 		t.Errorf("expected details to contain only crew/agent1, got %v", result.Details)
 	}
 }
--- a/internal/doctor/tmux_check.go
+++ b/internal/doctor/tmux_check.go
@@ -123,7 +123,8 @@ func (c *LinkedPaneCheck) Fix(ctx *CheckContext) error {
 	var lastErr error

 	for _, session := range c.linkedSessions {
-		if err := t.KillSession(session); err != nil {
+		// Use KillSessionWithProcesses to ensure all descendant processes are killed.
+		if err := t.KillSessionWithProcesses(session); err != nil {
 			lastErr = err
 		}
 	}
--- a/internal/doctor/zombie_check.go
+++ b/internal/doctor/zombie_check.go
@@ -128,7 +128,8 @@ func (c *ZombieSessionCheck) Fix(ctx *CheckContext) error {
 		_ = events.LogFeed(events.TypeSessionDeath, sess,
 			events.SessionDeathPayload(sess, "unknown", "zombie cleanup", "gt doctor"))

-		if err := t.KillSession(sess); err != nil {
+		// Use KillSessionWithProcesses to ensure all descendant processes are killed.
+		if err := t.KillSessionWithProcesses(sess); err != nil {
 			lastErr = err
 		}
 	}