diff --git a/.beads/formulas/gastown-release.formula.toml b/.beads/formulas/gastown-release.formula.toml index 9f5b53fb..5c57f55d 100644 --- a/.beads/formulas/gastown-release.formula.toml +++ b/.beads/formulas/gastown-release.formula.toml @@ -47,7 +47,7 @@ Check all crew workspaces and the mayor rig: ```bash # Check each workspace -for dir in ~/gt/gastown/crew/* ~/gt/gastown/mayor; do +for dir in $GT_ROOT/gastown/crew/* $GT_ROOT/gastown/mayor; do if [ -d "$dir/.git" ] || [ -d "$dir" ]; then echo "=== Checking $dir ===" cd "$dir" 2>/dev/null || continue diff --git a/.beads/formulas/mol-boot-triage.formula.toml b/.beads/formulas/mol-boot-triage.formula.toml index 38e5d248..66a472af 100644 --- a/.beads/formulas/mol-boot-triage.formula.toml +++ b/.beads/formulas/mol-boot-triage.formula.toml @@ -47,7 +47,7 @@ bd show hq-deacon 2>/dev/null gt feed --since 10m --plain | head -20 # Recent wisps (operational state) -ls -lt ~/gt/.beads-wisp/*.wisp.json 2>/dev/null | head -5 +ls -lt $GT_ROOT/.beads-wisp/*.wisp.json 2>/dev/null | head -5 ``` **Step 4: Check Deacon mail** @@ -221,7 +221,7 @@ Then exit. The next daemon tick will spawn a fresh Boot. **Update status file** ```bash # The gt boot command handles this automatically -# Status is written to ~/gt/deacon/dogs/boot/.boot-status.json +# Status is written to $GT_ROOT/deacon/dogs/boot/.boot-status.json ``` Boot is ephemeral by design. Each instance runs fresh. diff --git a/.beads/formulas/mol-deacon-patrol.formula.toml b/.beads/formulas/mol-deacon-patrol.formula.toml index 1c357490..f293c2b3 100644 --- a/.beads/formulas/mol-deacon-patrol.formula.toml +++ b/.beads/formulas/mol-deacon-patrol.formula.toml @@ -480,7 +480,7 @@ needs = ["zombie-scan"] description = """ Execute registered plugins. -Scan ~/gt/plugins/ for plugin directories. Each plugin has a plugin.md with TOML frontmatter defining its gate (when to run) and instructions (what to do). +Scan $GT_ROOT/plugins/ for plugin directories. Each plugin has a plugin.md with TOML frontmatter defining its gate (when to run) and instructions (what to do). See docs/deacon-plugins.md for full documentation. @@ -497,7 +497,7 @@ For each plugin: Plugins marked parallel: true can run concurrently using Task tool subagents. Sequential plugins run one at a time in directory order. -Skip this step if ~/gt/plugins/ does not exist or is empty.""" +Skip this step if $GT_ROOT/plugins/ does not exist or is empty.""" [[steps]] id = "dog-pool-maintenance" @@ -665,59 +665,84 @@ Skip dispatch - system is healthy. [[steps]] id = "costs-digest" -title = "Aggregate daily costs" +title = "Aggregate daily costs [DISABLED]" needs = ["session-gc"] description = """ -**DAILY DIGEST** - Aggregate yesterday's session cost wisps. +**⚠️ DISABLED** - Skip this step entirely. -Session costs are recorded as ephemeral wisps (not exported to JSONL) to avoid -log-in-database pollution. This step aggregates them into a permanent daily -"Cost Report YYYY-MM-DD" bead for audit purposes. +Cost tracking is temporarily disabled because Claude Code does not expose +session costs in a way that can be captured programmatically. + +**Why disabled:** +- The `gt costs` command uses tmux capture-pane to find costs +- Claude Code displays costs in the TUI status bar, not in scrollback +- All sessions show $0.00 because capture-pane can't see TUI chrome +- The infrastructure is sound but has no data source + +**What we need from Claude Code:** +- Stop hook env var (e.g., `$CLAUDE_SESSION_COST`) +- Or queryable file/API endpoint + +**Re-enable when:** Claude Code exposes cost data via API or environment. + +See: GH#24, gt-7awfj + +**Exit criteria:** Skip this step - proceed to next.""" + +[[steps]] +id = "patrol-digest" +title = "Aggregate daily patrol digests" +needs = ["costs-digest"] +description = """ +**DAILY DIGEST** - Aggregate yesterday's patrol cycle digests. + +Patrol cycles (Deacon, Witness, Refinery) create ephemeral per-cycle digests +to avoid JSONL pollution. This step aggregates them into a single permanent +"Patrol Report YYYY-MM-DD" bead for audit purposes. **Step 1: Check if digest is needed** ```bash -# Preview yesterday's costs (dry run) -gt costs digest --yesterday --dry-run +# Preview yesterday's patrol digests (dry run) +gt patrol digest --yesterday --dry-run ``` -If output shows "No session cost wisps found", skip to Step 3. +If output shows "No patrol digests found", skip to Step 3. **Step 2: Create the digest** ```bash -gt costs digest --yesterday +gt patrol digest --yesterday ``` This: -- Queries all session.ended wisps from yesterday -- Creates a single "Cost Report YYYY-MM-DD" bead with aggregated data -- Deletes the source wisps +- Queries all ephemeral patrol digests from yesterday +- Creates a single "Patrol Report YYYY-MM-DD" bead with aggregated data +- Deletes the source digests **Step 3: Verify** -The digest appears in `gt costs --week` queries. -Daily digests preserve audit trail without per-session pollution. +Daily patrol digests preserve audit trail without per-cycle pollution. **Timing**: Run once per morning patrol cycle. The --yesterday flag ensures we don't try to digest today's incomplete data. -**Exit criteria:** Yesterday's costs digested (or no wisps to digest).""" +**Exit criteria:** Yesterday's patrol digests aggregated (or none to aggregate).""" [[steps]] id = "log-maintenance" title = "Rotate logs and prune state" -needs = ["costs-digest"] +needs = ["patrol-digest"] description = """ Maintain daemon logs and state files. **Step 1: Check daemon.log size** ```bash # Get log file size -ls -la ~/.beads/daemon*.log 2>/dev/null || ls -la ~/gt/.beads/daemon*.log 2>/dev/null +ls -la ~/.beads/daemon*.log 2>/dev/null || ls -la $GT_ROOT/.beads/daemon*.log 2>/dev/null ``` If daemon.log exceeds 10MB: ```bash # Rotate with date suffix and gzip -LOGFILE="$HOME/gt/.beads/daemon.log" +LOGFILE="$GT_ROOT/.beads/daemon.log" if [ -f "$LOGFILE" ] && [ $(stat -f%z "$LOGFILE" 2>/dev/null || stat -c%s "$LOGFILE") -gt 10485760 ]; then DATE=$(date +%Y-%m-%dT%H-%M-%S) mv "$LOGFILE" "${LOGFILE%.log}-${DATE}.log" @@ -729,7 +754,7 @@ fi Clean up daemon logs older than 7 days: ```bash -find ~/gt/.beads/ -name "daemon-*.log.gz" -mtime +7 -delete +find $GT_ROOT/.beads/ -name "daemon-*.log.gz" -mtime +7 -delete ``` **Step 3: Prune state.json of dead sessions** diff --git a/.beads/formulas/mol-shutdown-dance.formula.toml b/.beads/formulas/mol-shutdown-dance.formula.toml index 6f044db0..35ca1264 100644 --- a/.beads/formulas/mol-shutdown-dance.formula.toml +++ b/.beads/formulas/mol-shutdown-dance.formula.toml @@ -8,7 +8,7 @@ goroutine (NOT a Claude session) that runs the interrogation state machine. Dogs are lightweight workers in Boot's pool (see dog-pool-architecture.md): - Fixed pool of 5 goroutines (configurable via GT_DOG_POOL_SIZE) -- State persisted to ~/gt/deacon/dogs/active/.json +- State persisted to $GT_ROOT/deacon/dogs/active/.json - Recovery on Boot restart via orphan state files ## State Machine @@ -151,7 +151,7 @@ If target doesn't exist: - Skip to EPITAPH with outcome=already_dead **3. Initialize state file:** -Write initial state to ~/gt/deacon/dogs/active/{dog-id}.json +Write initial state to $GT_ROOT/deacon/dogs/active/{dog-id}.json **4. Set initial attempt counter:** attempt = 1 @@ -477,11 +477,11 @@ bd close {warrant_id} --reason "{epitaph_summary}" **3. Move state file to completed:** ```bash -mv ~/gt/deacon/dogs/active/{dog-id}.json ~/gt/deacon/dogs/completed/ +mv $GT_ROOT/deacon/dogs/active/{dog-id}.json $GT_ROOT/deacon/dogs/completed/ ``` **4. Report to Boot:** -Write completion file: ~/gt/deacon/dogs/active/{dog-id}.done +Write completion file: $GT_ROOT/deacon/dogs/active/{dog-id}.done ```json { "dog_id": "{dog-id}", diff --git a/.beads/formulas/mol-town-shutdown.formula.toml b/.beads/formulas/mol-town-shutdown.formula.toml index 82f30ab2..0e76c72a 100644 --- a/.beads/formulas/mol-town-shutdown.formula.toml +++ b/.beads/formulas/mol-town-shutdown.formula.toml @@ -132,7 +132,7 @@ gt daemon rotate-logs gt doctor --fix ``` -Old logs are moved to `~/gt/logs/archive/` with timestamps. +Old logs are moved to `$GT_ROOT/logs/archive/` with timestamps. """ [[steps]] diff --git a/.github/workflows/windows-ci.yml b/.github/workflows/windows-ci.yml new file mode 100644 index 00000000..fa5c0f60 --- /dev/null +++ b/.github/workflows/windows-ci.yml @@ -0,0 +1,32 @@ +name: Windows CI + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + test: + name: Windows Build and Unit Tests + runs-on: windows-latest + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.24' + + - name: Configure Git + run: | + git config --global user.name "CI Bot" + git config --global user.email "ci@gastown.test" + + - name: Build + run: go build -v ./cmd/gt + + - name: Unit Tests + run: go test -short ./... diff --git a/CHANGELOG.md b/CHANGELOG.md index 05df8245..4a862fbb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,19 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [0.4.0] - 2026-01-17 - ### Fixed - **Orphan cleanup skips valid tmux sessions** - `gt orphans kill` and automatic orphan cleanup now check for Claude processes belonging to valid Gas Town tmux sessions (gt-*/hq-*) before killing. This prevents false kills of witnesses, refineries, and deacon during startup when they may temporarily show TTY "?" -## [0.3.1] - 2026-01-17 - -### Fixed - -- **Orphan cleanup on macOS** - Fixed TTY comparison (`??` vs `?`) so orphan detection works on macOS -- **Session kill leaves orphans** - `gt done` and `gt crew stop` now use `KillSessionWithProcesses` to properly terminate all child processes before killing the tmux session - ## [0.3.0] - 2026-01-17 ### Added diff --git a/cmd/gt/build_test.go b/cmd/gt/build_test.go new file mode 100644 index 00000000..8cd7c33a --- /dev/null +++ b/cmd/gt/build_test.go @@ -0,0 +1,57 @@ +package main + +import ( + "os" + "os/exec" + "runtime" + "testing" +) + +// TestCrossPlatformBuild verifies that the codebase compiles for all supported +// platforms. This catches cases where platform-specific code (using build tags +// like //go:build !windows) is called from platform-agnostic code without +// providing stubs for all platforms. +func TestCrossPlatformBuild(t *testing.T) { + if testing.Short() { + t.Skip("skipping cross-platform build test in short mode") + } + + // Skip if not running on a platform that can cross-compile + // (need Go toolchain, not just running tests) + if os.Getenv("CI") == "" && runtime.GOOS != "darwin" && runtime.GOOS != "linux" { + t.Skip("skipping cross-platform build test on unsupported platform") + } + + platforms := []struct { + goos string + goarch string + cgo string + }{ + {"linux", "amd64", "0"}, + {"linux", "arm64", "0"}, + {"darwin", "amd64", "0"}, + {"darwin", "arm64", "0"}, + {"windows", "amd64", "0"}, + {"freebsd", "amd64", "0"}, + } + + for _, p := range platforms { + p := p // capture range variable + t.Run(p.goos+"_"+p.goarch, func(t *testing.T) { + t.Parallel() + + cmd := exec.Command("go", "build", "-o", os.DevNull, ".") + cmd.Dir = "." + cmd.Env = append(os.Environ(), + "GOOS="+p.goos, + "GOARCH="+p.goarch, + "CGO_ENABLED="+p.cgo, + ) + + output, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("build failed for %s/%s:\n%s", p.goos, p.goarch, string(output)) + } + }) + } +} diff --git a/docs/INSTALLING.md b/docs/INSTALLING.md index 693d4d8c..6170a920 100644 --- a/docs/INSTALLING.md +++ b/docs/INSTALLING.md @@ -44,8 +44,8 @@ sudo apt update sudo apt install -y git # Install Go (apt version may be outdated, use official installer) -wget https://go.dev/dl/go1.24.linux-amd64.tar.gz -sudo rm -rf /usr/local/go && sudo tar -C /usr/local -xzf go1.24.linux-amd64.tar.gz +wget https://go.dev/dl/go1.24.12.linux-amd64.tar.gz +sudo rm -rf /usr/local/go && sudo tar -C /usr/local -xzf go1.24.12.linux-amd64.tar.gz echo 'export PATH=$PATH:/usr/local/go/bin:$HOME/go/bin' >> ~/.bashrc source ~/.bashrc diff --git a/docs/concepts/convoy.md b/docs/concepts/convoy.md index f096e71f..63c45aae 100644 --- a/docs/concepts/convoy.md +++ b/docs/concepts/convoy.md @@ -51,6 +51,7 @@ so you can see when it lands and what was included. |---------|-------------|-----|-------------| | **Convoy** | Yes | hq-cv-* | Tracking unit. What you create, track, get notified about. | | **Swarm** | No | None | Ephemeral. "The workers currently on this convoy's issues." | +| **Stranded Convoy** | Yes | hq-cv-* | A convoy with ready work but no polecats assigned. Needs attention. | When you "kick off a swarm", you're really: 1. Creating a convoy (the tracking unit) diff --git a/docs/concepts/molecules.md b/docs/concepts/molecules.md index 9f0fb198..8962b628 100644 --- a/docs/concepts/molecules.md +++ b/docs/concepts/molecules.md @@ -25,6 +25,7 @@ Protomolecule (frozen template) ─── Solid | **Molecule** | Active workflow instance with trackable steps | | **Wisp** | Ephemeral molecule for patrol cycles (never synced) | | **Digest** | Squashed summary of completed molecule | +| **Shiny Workflow** | Canonical polecat formula: design → implement → review → test → submit | ## Common Mistake: Reading Formulas Directly diff --git a/docs/reference.md b/docs/reference.md index bbe0f8dd..d9c6b676 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -545,6 +545,24 @@ gt stop --all # Kill all sessions gt stop --rig # Kill rig sessions ``` +### Health Check + +```bash +gt deacon health-check # Send health check ping, track response +gt deacon health-state # Show health check state for all agents +``` + +### Merge Queue (MQ) + +```bash +gt mq list [rig] # Show the merge queue +gt mq next [rig] # Show highest-priority merge request +gt mq submit # Submit current branch to merge queue +gt mq status # Show detailed merge request status +gt mq retry # Retry a failed merge request +gt mq reject # Reject a merge request +``` + ## Beads Commands (bd) ```bash diff --git a/internal/beads/beads.go b/internal/beads/beads.go index 70f90c1d..d1adbd4a 100644 --- a/internal/beads/beads.go +++ b/internal/beads/beads.go @@ -44,8 +44,8 @@ type Issue struct { // Agent bead slots (type=agent only) HookBead string `json:"hook_bead,omitempty"` // Current work attached to agent's hook - RoleBead string `json:"role_bead,omitempty"` // Role definition bead (shared) AgentState string `json:"agent_state,omitempty"` // Agent lifecycle state (spawning, working, done, stuck) + // Note: role_bead field removed - role definitions are now config-based // Counts from list output DependencyCount int `json:"dependency_count,omitempty"` diff --git a/internal/beads/beads_agent.go b/internal/beads/beads_agent.go index 6334f93d..3374a246 100644 --- a/internal/beads/beads_agent.go +++ b/internal/beads/beads_agent.go @@ -15,10 +15,11 @@ type AgentFields struct { Rig string // Rig name (empty for global agents like mayor/deacon) AgentState string // spawning, working, done, stuck HookBead string // Currently pinned work bead ID - RoleBead string // Role definition bead ID (canonical location; may not exist yet) CleanupStatus string // ZFC: polecat self-reports git state (clean, has_uncommitted, has_stash, has_unpushed) ActiveMR string // Currently active merge request bead ID (for traceability) NotificationLevel string // DND mode: verbose, normal, muted (default: normal) + // Note: RoleBead field removed - role definitions are now config-based. + // See internal/config/roles/*.toml and config-based-roles.md. } // Notification level constants @@ -53,11 +54,7 @@ func FormatAgentDescription(title string, fields *AgentFields) string { lines = append(lines, "hook_bead: null") } - if fields.RoleBead != "" { - lines = append(lines, fmt.Sprintf("role_bead: %s", fields.RoleBead)) - } else { - lines = append(lines, "role_bead: null") - } + // Note: role_bead field no longer written - role definitions are config-based if fields.CleanupStatus != "" { lines = append(lines, fmt.Sprintf("cleanup_status: %s", fields.CleanupStatus)) @@ -111,7 +108,7 @@ func ParseAgentFields(description string) *AgentFields { case "hook_bead": fields.HookBead = value case "role_bead": - fields.RoleBead = value + // Ignored - role definitions are now config-based (backward compat) case "cleanup_status": fields.CleanupStatus = value case "active_mr": @@ -158,13 +155,7 @@ func (b *Beads) CreateAgentBead(id, title string, fields *AgentFields) (*Issue, return nil, fmt.Errorf("parsing bd create output: %w", err) } - // Set the role slot if specified (this is the authoritative storage) - if fields != nil && fields.RoleBead != "" { - if _, err := b.run("slot", "set", id, "role", fields.RoleBead); err != nil { - // Non-fatal: warn but continue - fmt.Printf("Warning: could not set role slot: %v\n", err) - } - } + // Note: role slot no longer set - role definitions are config-based // Set the hook slot if specified (this is the authoritative storage) // This fixes the slot inconsistency bug where bead status is 'hooked' but @@ -223,13 +214,7 @@ func (b *Beads) CreateOrReopenAgentBead(id, title string, fields *AgentFields) ( return nil, fmt.Errorf("updating reopened agent bead: %w", err) } - // Set the role slot if specified - if fields != nil && fields.RoleBead != "" { - if _, err := b.run("slot", "set", id, "role", fields.RoleBead); err != nil { - // Non-fatal: warn but continue - fmt.Printf("Warning: could not set role slot: %v\n", err) - } - } + // Note: role slot no longer set - role definitions are config-based // Clear any existing hook slot (handles stale state from previous lifecycle) _, _ = b.run("slot", "clear", id, "hook") diff --git a/internal/beads/beads_role.go b/internal/beads/beads_role.go index 14bcef6e..0bd18e79 100644 --- a/internal/beads/beads_role.go +++ b/internal/beads/beads_role.go @@ -1,4 +1,11 @@ // Package beads provides role bead management. +// +// DEPRECATED: Role beads are deprecated. Role definitions are now config-based. +// See internal/config/roles/*.toml and config-based-roles.md for the new system. +// +// This file is kept for backward compatibility with existing role beads but +// new code should use config.LoadRoleDefinition() instead of reading role beads. +// The daemon no longer uses role beads as of Phase 2 (config-based roles). package beads import ( @@ -6,10 +13,12 @@ import ( "fmt" ) -// Role bead ID naming convention: -// Role beads are stored in town beads (~/.beads/) with hq- prefix. +// DEPRECATED: Role bead ID naming convention is no longer used. +// Role definitions are now config-based (internal/config/roles/*.toml). // -// Canonical format: hq--role +// Role beads were stored in town beads (~/.beads/) with hq- prefix. +// +// Canonical format was: hq--role // // Examples: // - hq-mayor-role @@ -19,8 +28,8 @@ import ( // - hq-crew-role // - hq-polecat-role // -// Use RoleBeadIDTown() to get canonical role bead IDs. -// The legacy RoleBeadID() function returns gt--role for backward compatibility. +// Legacy functions RoleBeadID() and RoleBeadIDTown() still work for +// backward compatibility but should not be used in new code. // RoleBeadID returns the role bead ID for a given role type. // Role beads define lifecycle configuration for each agent type. @@ -67,6 +76,9 @@ func PolecatRoleBeadID() string { // GetRoleConfig looks up a role bead and returns its parsed RoleConfig. // Returns nil, nil if the role bead doesn't exist or has no config. +// +// Deprecated: Use config.LoadRoleDefinition() instead. Role definitions +// are now config-based, not stored as beads. func (b *Beads) GetRoleConfig(roleBeadID string) (*RoleConfig, error) { issue, err := b.Show(roleBeadID) if err != nil { @@ -94,7 +106,9 @@ func HasLabel(issue *Issue, label string) bool { } // RoleBeadDef defines a role bead's metadata. -// Used by gt install and gt doctor to create missing role beads. +// +// Deprecated: Role beads are no longer created. Role definitions are +// now config-based (internal/config/roles/*.toml). type RoleBeadDef struct { ID string // e.g., "hq-witness-role" Title string // e.g., "Witness Role" @@ -102,8 +116,9 @@ type RoleBeadDef struct { } // AllRoleBeadDefs returns all role bead definitions. -// This is the single source of truth for role beads used by both -// gt install (initial creation) and gt doctor --fix (repair). +// +// Deprecated: Role beads are no longer created by gt install or gt doctor. +// This function is kept for backward compatibility only. func AllRoleBeadDefs() []RoleBeadDef { return []RoleBeadDef{ { diff --git a/internal/beads/beads_test.go b/internal/beads/beads_test.go index eeb907df..103b68e4 100644 --- a/internal/beads/beads_test.go +++ b/internal/beads/beads_test.go @@ -1972,7 +1972,6 @@ func TestCreateOrReopenAgentBead_ClosedBead(t *testing.T) { Rig: "testrig", AgentState: "spawning", HookBead: "test-task-1", - RoleBead: "test-polecat-role", }) if err != nil { t.Fatalf("Spawn 1 - CreateOrReopenAgentBead: %v", err) @@ -1993,7 +1992,6 @@ func TestCreateOrReopenAgentBead_ClosedBead(t *testing.T) { Rig: "testrig", AgentState: "spawning", HookBead: "test-task-2", // Different task - RoleBead: "test-polecat-role", }) if err != nil { t.Fatalf("Spawn 2 - CreateOrReopenAgentBead: %v", err) @@ -2020,7 +2018,6 @@ func TestCreateOrReopenAgentBead_ClosedBead(t *testing.T) { Rig: "testrig", AgentState: "spawning", HookBead: "test-task-3", - RoleBead: "test-polecat-role", }) if err != nil { t.Fatalf("Spawn 3 - CreateOrReopenAgentBead: %v", err) @@ -2059,7 +2056,6 @@ func TestCloseAndClearAgentBead_FieldClearing(t *testing.T) { Rig: "testrig", AgentState: "running", HookBead: "test-issue-123", - RoleBead: "test-polecat-role", CleanupStatus: "clean", ActiveMR: "test-mr-456", NotificationLevel: "normal", @@ -2279,7 +2275,6 @@ func TestCloseAndClearAgentBead_ReopenHasCleanState(t *testing.T) { Rig: "testrig", AgentState: "running", HookBead: "test-old-issue", - RoleBead: "test-polecat-role", CleanupStatus: "clean", ActiveMR: "test-old-mr", NotificationLevel: "normal", @@ -2300,7 +2295,6 @@ func TestCloseAndClearAgentBead_ReopenHasCleanState(t *testing.T) { Rig: "testrig", AgentState: "spawning", HookBead: "test-new-issue", - RoleBead: "test-polecat-role", }) if err != nil { t.Fatalf("CreateOrReopenAgentBead: %v", err) diff --git a/internal/beads/handoff.go b/internal/beads/handoff.go index 7ab4afc5..0a2e7ee9 100644 --- a/internal/beads/handoff.go +++ b/internal/beads/handoff.go @@ -158,8 +158,12 @@ func (b *Beads) AttachMolecule(pinnedBeadID, moleculeID string) (*Issue, error) return nil, fmt.Errorf("fetching pinned bead: %w", err) } + // Allow pinned beads OR open polecat agent beads (polecats have a lifecycle, not permanent) if issue.Status != StatusPinned { - return nil, fmt.Errorf("issue %s is not pinned (status: %s)", pinnedBeadID, issue.Status) + _, role, _, ok := ParseAgentBeadID(pinnedBeadID) + if !(issue.Status == "open" && ok && role == "polecat") { + return nil, fmt.Errorf("issue %s is not pinned or open polecat (status: %s)", pinnedBeadID, issue.Status) + } } // Build attachment fields with current timestamp diff --git a/internal/boot/boot.go b/internal/boot/boot.go index d6057a6c..ad01d3c9 100644 --- a/internal/boot/boot.go +++ b/internal/boot/boot.go @@ -160,9 +160,10 @@ func (b *Boot) Spawn(agentOverride string) error { // spawnTmux spawns Boot in a tmux session. func (b *Boot) spawnTmux(agentOverride string) error { - // Kill any stale session first + // Kill any stale session first. + // Use KillSessionWithProcesses to ensure all descendant processes are killed. if b.IsSessionAlive() { - _ = b.tmux.KillSession(SessionName) + _ = b.tmux.KillSessionWithProcesses(SessionName) } // Ensure boot directory exists (it should have CLAUDE.md with Boot context) diff --git a/internal/claude/config/settings-autonomous.json b/internal/claude/config/settings-autonomous.json index 07267d51..463758be 100644 --- a/internal/claude/config/settings-autonomous.json +++ b/internal/claude/config/settings-autonomous.json @@ -3,6 +3,35 @@ "beads@beads-marketplace": false }, "hooks": { + "PreToolUse": [ + { + "matcher": "Bash(gh pr create*)", + "hooks": [ + { + "type": "command", + "command": "export PATH=\"$HOME/go/bin:$HOME/.local/bin:$PATH\" && gt tap guard pr-workflow" + } + ] + }, + { + "matcher": "Bash(git checkout -b*)", + "hooks": [ + { + "type": "command", + "command": "export PATH=\"$HOME/go/bin:$HOME/.local/bin:$PATH\" && gt tap guard pr-workflow" + } + ] + }, + { + "matcher": "Bash(git switch -c*)", + "hooks": [ + { + "type": "command", + "command": "export PATH=\"$HOME/go/bin:$HOME/.local/bin:$PATH\" && gt tap guard pr-workflow" + } + ] + } + ], "SessionStart": [ { "matcher": "", diff --git a/internal/claude/config/settings-interactive.json b/internal/claude/config/settings-interactive.json index 66747e06..3ee30e5e 100644 --- a/internal/claude/config/settings-interactive.json +++ b/internal/claude/config/settings-interactive.json @@ -3,6 +3,35 @@ "beads@beads-marketplace": false }, "hooks": { + "PreToolUse": [ + { + "matcher": "Bash(gh pr create*)", + "hooks": [ + { + "type": "command", + "command": "export PATH=\"$HOME/go/bin:$HOME/.local/bin:$PATH\" && gt tap guard pr-workflow" + } + ] + }, + { + "matcher": "Bash(git checkout -b*)", + "hooks": [ + { + "type": "command", + "command": "export PATH=\"$HOME/go/bin:$HOME/.local/bin:$PATH\" && gt tap guard pr-workflow" + } + ] + }, + { + "matcher": "Bash(git switch -c*)", + "hooks": [ + { + "type": "command", + "command": "export PATH=\"$HOME/go/bin:$HOME/.local/bin:$PATH\" && gt tap guard pr-workflow" + } + ] + } + ], "SessionStart": [ { "matcher": "", diff --git a/internal/cmd/account_test.go b/internal/cmd/account_test.go index 2fb3ef33..f93b059e 100644 --- a/internal/cmd/account_test.go +++ b/internal/cmd/account_test.go @@ -3,6 +3,8 @@ package cmd import ( "os" "path/filepath" + "runtime" + "strings" "testing" "time" @@ -54,15 +56,33 @@ func setupTestTownForAccount(t *testing.T) (townRoot string, accountsDir string) return townRoot, accountsDir } +func setTestHome(t *testing.T, fakeHome string) { + t.Helper() + + t.Setenv("HOME", fakeHome) + + if runtime.GOOS != "windows" { + return + } + + t.Setenv("USERPROFILE", fakeHome) + + drive := filepath.VolumeName(fakeHome) + if drive == "" { + return + } + + t.Setenv("HOMEDRIVE", drive) + t.Setenv("HOMEPATH", strings.TrimPrefix(fakeHome, drive)) +} + func TestAccountSwitch(t *testing.T) { t.Run("switch between accounts", func(t *testing.T) { townRoot, accountsDir := setupTestTownForAccount(t) // Create fake home directory for ~/.claude fakeHome := t.TempDir() - originalHome := os.Getenv("HOME") - os.Setenv("HOME", fakeHome) - defer os.Setenv("HOME", originalHome) + setTestHome(t, fakeHome) // Create account config directories workConfigDir := filepath.Join(accountsDir, "work") @@ -133,9 +153,7 @@ func TestAccountSwitch(t *testing.T) { townRoot, accountsDir := setupTestTownForAccount(t) fakeHome := t.TempDir() - originalHome := os.Getenv("HOME") - os.Setenv("HOME", fakeHome) - defer os.Setenv("HOME", originalHome) + setTestHome(t, fakeHome) workConfigDir := filepath.Join(accountsDir, "work") if err := os.MkdirAll(workConfigDir, 0755); err != nil { @@ -186,9 +204,7 @@ func TestAccountSwitch(t *testing.T) { townRoot, accountsDir := setupTestTownForAccount(t) fakeHome := t.TempDir() - originalHome := os.Getenv("HOME") - os.Setenv("HOME", fakeHome) - defer os.Setenv("HOME", originalHome) + setTestHome(t, fakeHome) workConfigDir := filepath.Join(accountsDir, "work") if err := os.MkdirAll(workConfigDir, 0755); err != nil { @@ -224,9 +240,7 @@ func TestAccountSwitch(t *testing.T) { townRoot, accountsDir := setupTestTownForAccount(t) fakeHome := t.TempDir() - originalHome := os.Getenv("HOME") - os.Setenv("HOME", fakeHome) - defer os.Setenv("HOME", originalHome) + setTestHome(t, fakeHome) workConfigDir := filepath.Join(accountsDir, "work") personalConfigDir := filepath.Join(accountsDir, "personal") diff --git a/internal/cmd/bead.go b/internal/cmd/bead.go index a39abd89..80ee164a 100644 --- a/internal/cmd/bead.go +++ b/internal/cmd/bead.go @@ -57,10 +57,29 @@ Examples: }, } +var beadReadCmd = &cobra.Command{ + Use: "read [flags]", + Short: "Show details of a bead (alias for 'show')", + Long: `Displays the full details of a bead by ID. + +This is an alias for 'gt bead show'. All bd show flags are supported. + +Examples: + gt bead read gt-abc123 # Show a gastown issue + gt bead read hq-xyz789 # Show a town-level bead + gt bead read bd-def456 # Show a beads issue + gt bead read gt-abc123 --json # Output as JSON`, + DisableFlagParsing: true, // Pass all flags through to bd show + RunE: func(cmd *cobra.Command, args []string) error { + return runShow(cmd, args) + }, +} + func init() { beadMoveCmd.Flags().BoolVarP(&beadMoveDryRun, "dry-run", "n", false, "Show what would be done") beadCmd.AddCommand(beadMoveCmd) beadCmd.AddCommand(beadShowCmd) + beadCmd.AddCommand(beadReadCmd) rootCmd.AddCommand(beadCmd) } diff --git a/internal/cmd/boot.go b/internal/cmd/boot.go index 5142c695..0af2cf7f 100644 --- a/internal/cmd/boot.go +++ b/internal/cmd/boot.go @@ -301,9 +301,10 @@ func runDegradedTriage(b *boot.Boot) (action, target string, err error) { // Nudge the session to try to wake it up age := hb.Age() if age > 30*time.Minute { - // Very stuck - restart the session + // Very stuck - restart the session. + // Use KillSessionWithProcesses to ensure all descendant processes are killed. fmt.Printf("Deacon heartbeat is %s old - restarting session\n", age.Round(time.Minute)) - if err := tm.KillSession(deaconSession); err == nil { + if err := tm.KillSessionWithProcesses(deaconSession); err == nil { return "restart", "deacon-stuck", nil } } else { diff --git a/internal/cmd/convoy.go b/internal/cmd/convoy.go index 3cc1b18f..d2aa4a57 100644 --- a/internal/cmd/convoy.go +++ b/internal/cmd/convoy.go @@ -73,6 +73,7 @@ var ( convoyStrandedJSON bool convoyCloseReason string convoyCloseNotify string + convoyCheckDryRun bool ) var convoyCmd = &cobra.Command{ @@ -177,14 +178,22 @@ Examples: } var convoyCheckCmd = &cobra.Command{ - Use: "check", + Use: "check [convoy-id]", Short: "Check and auto-close completed convoys", - Long: `Check all open convoys and auto-close any where all tracked issues are complete. + Long: `Check convoys and auto-close any where all tracked issues are complete. + +Without arguments, checks all open convoys. With a convoy ID, checks only that convoy. This handles cross-rig convoy completion: convoys in town beads tracking issues in rig beads won't auto-close via bd close alone. This command bridges that gap. -Can be run manually or by deacon patrol to ensure convoys close promptly.`, +Can be run manually or by deacon patrol to ensure convoys close promptly. + +Examples: + gt convoy check # Check all open convoys + gt convoy check hq-cv-abc # Check specific convoy + gt convoy check --dry-run # Preview what would close without acting`, + Args: cobra.MaximumNArgs(1), RunE: runConvoyCheck, } @@ -248,6 +257,9 @@ func init() { // Interactive TUI flag (on parent command) convoyCmd.Flags().BoolVarP(&convoyInteractive, "interactive", "i", false, "Interactive tree view") + // Check flags + convoyCheckCmd.Flags().BoolVar(&convoyCheckDryRun, "dry-run", false, "Preview what would close without acting") + // Stranded flags convoyStrandedCmd.Flags().BoolVar(&convoyStrandedJSON, "json", false, "Output as JSON") @@ -299,8 +311,14 @@ func runConvoyCreate(cmd *cobra.Command, args []string) error { // Create convoy issue in town beads description := fmt.Sprintf("Convoy tracking %d issues", len(trackedIssues)) - if convoyOwner != "" { - description += fmt.Sprintf("\nOwner: %s", convoyOwner) + + // Default owner to creator identity if not specified + owner := convoyOwner + if owner == "" { + owner = detectSender() + } + if owner != "" { + description += fmt.Sprintf("\nOwner: %s", owner) } if convoyNotify != "" { description += fmt.Sprintf("\nNotify: %s", convoyNotify) @@ -365,8 +383,8 @@ func runConvoyCreate(cmd *cobra.Command, args []string) error { if len(trackedIssues) > 0 { fmt.Printf(" Issues: %s\n", strings.Join(trackedIssues, ", ")) } - if convoyOwner != "" { - fmt.Printf(" Owner: %s\n", convoyOwner) + if owner != "" { + fmt.Printf(" Owner: %s\n", owner) } if convoyNotify != "" { fmt.Printf(" Notify: %s\n", convoyNotify) @@ -472,7 +490,14 @@ func runConvoyCheck(cmd *cobra.Command, args []string) error { return err } - closed, err := checkAndCloseCompletedConvoys(townBeads) + // If a specific convoy ID is provided, check only that convoy + if len(args) == 1 { + convoyID := args[0] + return checkSingleConvoy(townBeads, convoyID, convoyCheckDryRun) + } + + // Check all open convoys + closed, err := checkAndCloseCompletedConvoys(townBeads, convoyCheckDryRun) if err != nil { return err } @@ -480,7 +505,11 @@ func runConvoyCheck(cmd *cobra.Command, args []string) error { if len(closed) == 0 { fmt.Println("No convoys ready to close.") } else { - fmt.Printf("%s Auto-closed %d convoy(s):\n", style.Bold.Render("✓"), len(closed)) + if convoyCheckDryRun { + fmt.Printf("%s Would auto-close %d convoy(s):\n", style.Warning.Render("⚠"), len(closed)) + } else { + fmt.Printf("%s Auto-closed %d convoy(s):\n", style.Bold.Render("✓"), len(closed)) + } for _, c := range closed { fmt.Printf(" 🚚 %s: %s\n", c.ID, c.Title) } @@ -489,6 +518,92 @@ func runConvoyCheck(cmd *cobra.Command, args []string) error { return nil } +// checkSingleConvoy checks a specific convoy and closes it if all tracked issues are complete. +func checkSingleConvoy(townBeads, convoyID string, dryRun bool) error { + // Get convoy details + showArgs := []string{"show", convoyID, "--json"} + showCmd := exec.Command("bd", showArgs...) + showCmd.Dir = townBeads + var stdout bytes.Buffer + showCmd.Stdout = &stdout + + if err := showCmd.Run(); err != nil { + return fmt.Errorf("convoy '%s' not found", convoyID) + } + + var convoys []struct { + ID string `json:"id"` + Title string `json:"title"` + Status string `json:"status"` + Type string `json:"issue_type"` + Description string `json:"description"` + } + if err := json.Unmarshal(stdout.Bytes(), &convoys); err != nil { + return fmt.Errorf("parsing convoy data: %w", err) + } + + if len(convoys) == 0 { + return fmt.Errorf("convoy '%s' not found", convoyID) + } + + convoy := convoys[0] + + // Verify it's actually a convoy type + if convoy.Type != "convoy" { + return fmt.Errorf("'%s' is not a convoy (type: %s)", convoyID, convoy.Type) + } + + // Check if convoy is already closed + if convoy.Status == "closed" { + fmt.Printf("%s Convoy %s is already closed\n", style.Dim.Render("○"), convoyID) + return nil + } + + // Get tracked issues + tracked := getTrackedIssues(townBeads, convoyID) + if len(tracked) == 0 { + fmt.Printf("%s Convoy %s has no tracked issues\n", style.Dim.Render("○"), convoyID) + return nil + } + + // Check if all tracked issues are closed + allClosed := true + openCount := 0 + for _, t := range tracked { + if t.Status != "closed" && t.Status != "tombstone" { + allClosed = false + openCount++ + } + } + + if !allClosed { + fmt.Printf("%s Convoy %s has %d open issue(s) remaining\n", style.Dim.Render("○"), convoyID, openCount) + return nil + } + + // All tracked issues are complete - close the convoy + if dryRun { + fmt.Printf("%s Would auto-close convoy 🚚 %s: %s\n", style.Warning.Render("⚠"), convoyID, convoy.Title) + return nil + } + + // Actually close the convoy + closeArgs := []string{"close", convoyID, "-r", "All tracked issues completed"} + closeCmd := exec.Command("bd", closeArgs...) + closeCmd.Dir = townBeads + + if err := closeCmd.Run(); err != nil { + return fmt.Errorf("closing convoy: %w", err) + } + + fmt.Printf("%s Auto-closed convoy 🚚 %s: %s\n", style.Bold.Render("✓"), convoyID, convoy.Title) + + // Send completion notification + notifyConvoyCompletion(townBeads, convoyID, convoy.Title) + + return nil +} + func runConvoyClose(cmd *cobra.Command, args []string) error { convoyID := args[0] @@ -755,8 +870,9 @@ func isReadyIssue(t trackedIssueInfo, blockedIssues map[string]bool) bool { } // checkAndCloseCompletedConvoys finds open convoys where all tracked issues are closed -// and auto-closes them. Returns the list of convoys that were closed. -func checkAndCloseCompletedConvoys(townBeads string) ([]struct{ ID, Title string }, error) { +// and auto-closes them. Returns the list of convoys that were closed (or would be closed in dry-run mode). +// If dryRun is true, no changes are made and the function returns what would have been closed. +func checkAndCloseCompletedConvoys(townBeads string, dryRun bool) ([]struct{ ID, Title string }, error) { var closed []struct{ ID, Title string } // List all open convoys @@ -795,6 +911,12 @@ func checkAndCloseCompletedConvoys(townBeads string) ([]struct{ ID, Title string } if allClosed { + if dryRun { + // In dry-run mode, just record what would be closed + closed = append(closed, struct{ ID, Title string }{convoy.ID, convoy.Title}) + continue + } + // Close the convoy closeArgs := []string{"close", convoy.ID, "-r", "All tracked issues completed"} closeCmd := exec.Command("bd", closeArgs...) diff --git a/internal/cmd/costs_workdir_test.go b/internal/cmd/costs_workdir_test.go index 3954d69d..18abf739 100644 --- a/internal/cmd/costs_workdir_test.go +++ b/internal/cmd/costs_workdir_test.go @@ -37,6 +37,11 @@ func filterGTEnv(env []string) []string { // 2. Creates session.ended events in both town and rig beads // 3. Verifies querySessionEvents finds events from both locations func TestQuerySessionEvents_FindsEventsFromAllLocations(t *testing.T) { + // Skip: bd CLI 0.47.2 has a bug where database writes don't commit + // ("sql: database is closed" during auto-flush). This affects all tests + // that create issues via bd create. See gt-lnn1xn for tracking. + t.Skip("bd CLI 0.47.2 bug: database writes don't commit") + // Skip if gt and bd are not installed if _, err := exec.LookPath("gt"); err != nil { t.Skip("gt not installed, skipping integration test") diff --git a/internal/cmd/crew_add.go b/internal/cmd/crew_add.go index a24b3444..b772872d 100644 --- a/internal/cmd/crew_add.go +++ b/internal/cmd/crew_add.go @@ -106,7 +106,6 @@ func runCrewAdd(cmd *cobra.Command, args []string) error { RoleType: "crew", Rig: rigName, AgentState: "idle", - RoleBead: beads.RoleBeadIDTown("crew"), } desc := fmt.Sprintf("Crew worker %s in %s - human-managed persistent workspace.", name, rigName) if _, err := bd.CreateAgentBead(crewID, desc, fields); err != nil { diff --git a/internal/cmd/crew_maintenance.go b/internal/cmd/crew_maintenance.go index 7665515f..8a9dcf54 100644 --- a/internal/cmd/crew_maintenance.go +++ b/internal/cmd/crew_maintenance.go @@ -28,11 +28,12 @@ func runCrewRename(cmd *cobra.Command, args []string) error { return err } - // Kill any running session for the old name + // Kill any running session for the old name. + // Use KillSessionWithProcesses to ensure all descendant processes are killed. t := tmux.NewTmux() oldSessionID := crewSessionName(r.Name, oldName) if hasSession, _ := t.HasSession(oldSessionID); hasSession { - if err := t.KillSession(oldSessionID); err != nil { + if err := t.KillSessionWithProcesses(oldSessionID); err != nil { return fmt.Errorf("killing old session: %w", err) } fmt.Printf("Killed session %s\n", oldSessionID) diff --git a/internal/cmd/deacon.go b/internal/cmd/deacon.go index 969ac145..17286bcc 100644 --- a/internal/cmd/deacon.go +++ b/internal/cmd/deacon.go @@ -264,6 +264,30 @@ Example: RunE: runDeaconCleanupOrphans, } +var deaconZombieScanCmd = &cobra.Command{ + Use: "zombie-scan", + Short: "Find and clean zombie Claude processes not in active tmux sessions", + Long: `Find and clean zombie Claude processes not in active tmux sessions. + +Unlike cleanup-orphans (which uses TTY detection), zombie-scan uses tmux +verification: it checks if each Claude process is in an active tmux session +by comparing against actual pane PIDs. + +A process is a zombie if: +- It's a Claude/codex process +- It's NOT the pane PID of any active tmux session +- It's NOT a child of any pane PID +- It's older than 60 seconds + +This catches "ghost" processes that have a TTY (from a dead tmux session) +but are no longer part of any active Gas Town session. + +Examples: + gt deacon zombie-scan # Find and kill zombies + gt deacon zombie-scan --dry-run # Just list zombies, don't kill`, + RunE: runDeaconZombieScan, +} + var ( triggerTimeout time.Duration @@ -282,6 +306,9 @@ var ( // Pause flags pauseReason string + + // Zombie scan flags + zombieScanDryRun bool ) func init() { @@ -299,6 +326,7 @@ func init() { deaconCmd.AddCommand(deaconPauseCmd) deaconCmd.AddCommand(deaconResumeCmd) deaconCmd.AddCommand(deaconCleanupOrphansCmd) + deaconCmd.AddCommand(deaconZombieScanCmd) // Flags for trigger-pending deaconTriggerPendingCmd.Flags().DurationVar(&triggerTimeout, "timeout", 2*time.Second, @@ -328,6 +356,10 @@ func init() { deaconPauseCmd.Flags().StringVar(&pauseReason, "reason", "", "Reason for pausing the Deacon") + // Flags for zombie-scan + deaconZombieScanCmd.Flags().BoolVar(&zombieScanDryRun, "dry-run", false, + "List zombies without killing them") + deaconStartCmd.Flags().StringVar(&deaconAgentOverride, "agent", "", "Agent alias to run the Deacon with (overrides town default)") deaconAttachCmd.Flags().StringVar(&deaconAgentOverride, "agent", "", "Agent alias to run the Deacon with (overrides town default)") deaconRestartCmd.Flags().StringVar(&deaconAgentOverride, "agent", "", "Agent alias to run the Deacon with (overrides town default)") @@ -459,8 +491,9 @@ func runDeaconStop(cmd *cobra.Command, args []string) error { _ = t.SendKeysRaw(sessionName, "C-c") time.Sleep(100 * time.Millisecond) - // Kill the session - if err := t.KillSession(sessionName); err != nil { + // Kill the session. + // Use KillSessionWithProcesses to ensure all descendant processes are killed. + if err := t.KillSessionWithProcesses(sessionName); err != nil { return fmt.Errorf("killing session: %w", err) } @@ -560,8 +593,9 @@ func runDeaconRestart(cmd *cobra.Command, args []string) error { fmt.Println("Restarting Deacon...") if running { - // Kill existing session - if err := t.KillSession(sessionName); err != nil { + // Kill existing session. + // Use KillSessionWithProcesses to ensure all descendant processes are killed. + if err := t.KillSessionWithProcesses(sessionName); err != nil { style.PrintWarning("failed to kill session: %v", err) } } @@ -844,9 +878,10 @@ func runDeaconForceKill(cmd *cobra.Command, args []string) error { mailBody := fmt.Sprintf("Deacon detected %s as unresponsive.\nReason: %s\nAction: force-killing session", agent, reason) sendMail(townRoot, agent, "FORCE_KILL: unresponsive", mailBody) - // Step 2: Kill the tmux session + // Step 2: Kill the tmux session. + // Use KillSessionWithProcesses to ensure all descendant processes are killed. fmt.Printf("%s Killing tmux session %s...\n", style.Dim.Render("2."), sessionName) - if err := t.KillSession(sessionName); err != nil { + if err := t.KillSessionWithProcesses(sessionName); err != nil { return fmt.Errorf("killing session: %w", err) } @@ -1185,3 +1220,68 @@ func runDeaconCleanupOrphans(cmd *cobra.Command, args []string) error { return nil } + +// runDeaconZombieScan finds and cleans zombie Claude processes not in active tmux sessions. +func runDeaconZombieScan(cmd *cobra.Command, args []string) error { + // Find zombies using tmux verification + zombies, err := util.FindZombieClaudeProcesses() + if err != nil { + return fmt.Errorf("finding zombie processes: %w", err) + } + + if len(zombies) == 0 { + fmt.Printf("%s No zombie claude processes found\n", style.Dim.Render("○")) + return nil + } + + fmt.Printf("%s Found %d zombie claude process(es)\n", style.Bold.Render("●"), len(zombies)) + + // In dry-run mode, just list them + if zombieScanDryRun { + for _, z := range zombies { + ageStr := fmt.Sprintf("%dm", z.Age/60) + fmt.Printf(" %s PID %d (%s) TTY=%s age=%s\n", + style.Dim.Render("→"), z.PID, z.Cmd, z.TTY, ageStr) + } + fmt.Printf("%s Dry run - no processes killed\n", style.Dim.Render("○")) + return nil + } + + // Process them with signal escalation + results, err := util.CleanupZombieClaudeProcesses() + if err != nil { + style.PrintWarning("cleanup had errors: %v", err) + } + + // Report results + var terminated, escalated, unkillable int + for _, r := range results { + switch r.Signal { + case "SIGTERM": + fmt.Printf(" %s Sent SIGTERM to PID %d (%s) TTY=%s\n", + style.Bold.Render("→"), r.Process.PID, r.Process.Cmd, r.Process.TTY) + terminated++ + case "SIGKILL": + fmt.Printf(" %s Escalated to SIGKILL for PID %d (%s)\n", + style.Bold.Render("!"), r.Process.PID, r.Process.Cmd) + escalated++ + case "UNKILLABLE": + fmt.Printf(" %s WARNING: PID %d (%s) survived SIGKILL\n", + style.Bold.Render("⚠"), r.Process.PID, r.Process.Cmd) + unkillable++ + } + } + + if len(results) > 0 { + summary := fmt.Sprintf("Processed %d zombie(s)", len(results)) + if escalated > 0 { + summary += fmt.Sprintf(" (%d escalated to SIGKILL)", escalated) + } + if unkillable > 0 { + summary += fmt.Sprintf(" (%d unkillable)", unkillable) + } + fmt.Printf("%s %s\n", style.Bold.Render("✓"), summary) + } + + return nil +} diff --git a/internal/cmd/doctor.go b/internal/cmd/doctor.go index bc269ff4..1b5b2b2b 100644 --- a/internal/cmd/doctor.go +++ b/internal/cmd/doctor.go @@ -134,10 +134,12 @@ func runDoctor(cmd *cobra.Command, args []string) error { d.Register(doctor.NewPrefixMismatchCheck()) d.Register(doctor.NewRoutesCheck()) d.Register(doctor.NewRigRoutesJSONLCheck()) + d.Register(doctor.NewRoutingModeCheck()) d.Register(doctor.NewOrphanSessionCheck()) d.Register(doctor.NewZombieSessionCheck()) d.Register(doctor.NewOrphanProcessCheck()) d.Register(doctor.NewWispGCCheck()) + d.Register(doctor.NewCheckMisclassifiedWisps()) d.Register(doctor.NewBranchCheck()) d.Register(doctor.NewBeadsSyncOrphanCheck()) d.Register(doctor.NewCloneDivergenceCheck()) diff --git a/internal/cmd/done.go b/internal/cmd/done.go index 792aab17..4fd24a35 100644 --- a/internal/cmd/done.go +++ b/internal/cmd/done.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "path/filepath" + "strconv" "strings" "github.com/spf13/cobra" @@ -81,6 +82,14 @@ func init() { } func runDone(cmd *cobra.Command, args []string) error { + // Guard: Only polecats should call gt done + // Crew, deacons, witnesses etc. don't use gt done - they persist across tasks. + // Polecats are ephemeral workers that self-destruct after completing work. + actor := os.Getenv("BD_ACTOR") + if actor != "" && !isPolecatActor(actor) { + return fmt.Errorf("gt done is for polecats only (you are %s)\nPolecats are ephemeral workers that self-destruct after completing work.\nOther roles persist across tasks and don't use gt done.", actor) + } + // Handle --phase-complete flag (overrides --status) var exitType string if donePhaseComplete { @@ -462,27 +471,28 @@ func runDone(cmd *cobra.Command, args []string) error { // This is the self-cleaning model - polecats clean up after themselves // "done means gone" - both worktree and session are terminated selfCleanAttempted := false - if exitType == ExitCompleted { - if roleInfo, err := GetRoleWithContext(cwd, townRoot); err == nil && roleInfo.Role == RolePolecat { - selfCleanAttempted = true + if roleInfo, err := GetRoleWithContext(cwd, townRoot); err == nil && roleInfo.Role == RolePolecat { + selfCleanAttempted = true - // Step 1: Nuke the worktree + // Step 1: Nuke the worktree (only for COMPLETED - other statuses preserve work) + if exitType == ExitCompleted { if err := selfNukePolecat(roleInfo, townRoot); err != nil { // Non-fatal: Witness will clean up if we fail style.PrintWarning("worktree nuke failed: %v (Witness will clean up)", err) } else { fmt.Printf("%s Worktree nuked\n", style.Bold.Render("✓")) } - - // Step 2: Kill our own session (this terminates Claude and the shell) - // This is the last thing we do - the process will be killed when tmux session dies - fmt.Printf("%s Terminating session (done means gone)\n", style.Bold.Render("→")) - if err := selfKillSession(townRoot, roleInfo); err != nil { - // If session kill fails, fall through to os.Exit - style.PrintWarning("session kill failed: %v", err) - } - // If selfKillSession succeeds, we won't reach here (process killed by tmux) } + + // Step 2: Kill our own session (this terminates Claude and the shell) + // This is the last thing we do - the process will be killed when tmux session dies + // All exit types kill the session - "done means gone" + fmt.Printf("%s Terminating session (done means gone)\n", style.Bold.Render("→")) + if err := selfKillSession(townRoot, roleInfo); err != nil { + // If session kill fails, fall through to os.Exit + style.PrintWarning("session kill failed: %v", err) + } + // If selfKillSession succeeds, we won't reach here (process killed by tmux) } // Fallback exit for non-polecats or if self-clean failed @@ -706,6 +716,14 @@ func selfNukePolecat(roleInfo RoleInfo, _ string) error { return nil } +// isPolecatActor checks if a BD_ACTOR value represents a polecat. +// Polecat actors have format: rigname/polecats/polecatname +// Non-polecat actors have formats like: gastown/crew/name, rigname/witness, etc. +func isPolecatActor(actor string) bool { + parts := strings.Split(actor, "/") + return len(parts) >= 2 && parts[1] == "polecats" +} + // selfKillSession terminates the polecat's own tmux session after logging the event. // This completes the self-cleaning model: "done means gone" - both worktree and session. // @@ -745,9 +763,12 @@ func selfKillSession(townRoot string, roleInfo RoleInfo) error { // Kill our own tmux session with proper process cleanup // This will terminate Claude and all child processes, completing the self-cleaning cycle. - // We use KillSessionWithProcesses to ensure no orphaned processes are left behind. + // We use KillSessionWithProcessesExcluding to ensure no orphaned processes are left behind, + // while excluding our own PID to avoid killing ourselves before cleanup completes. + // The tmux kill-session at the end will terminate us along with the session. t := tmux.NewTmux() - if err := t.KillSessionWithProcesses(sessionName); err != nil { + myPID := strconv.Itoa(os.Getpid()) + if err := t.KillSessionWithProcessesExcluding(sessionName, []string{myPID}); err != nil { return fmt.Errorf("killing session %s: %w", sessionName, err) } diff --git a/internal/cmd/done_test.go b/internal/cmd/done_test.go index 26272387..1166377a 100644 --- a/internal/cmd/done_test.go +++ b/internal/cmd/done_test.go @@ -341,3 +341,39 @@ func TestGetIssueFromAgentHook(t *testing.T) { }) } } + +// TestIsPolecatActor verifies that isPolecatActor correctly identifies +// polecat actors vs other roles based on the BD_ACTOR format. +func TestIsPolecatActor(t *testing.T) { + tests := []struct { + actor string + want bool + }{ + // Polecats: rigname/polecats/polecatname + {"testrig/polecats/furiosa", true}, + {"testrig/polecats/nux", true}, + {"myrig/polecats/witness", true}, // even if named "witness", still a polecat + + // Non-polecats + {"gastown/crew/george", false}, + {"gastown/crew/max", false}, + {"testrig/witness", false}, + {"testrig/deacon", false}, + {"testrig/mayor", false}, + {"gastown/refinery", false}, + + // Edge cases + {"", false}, + {"single", false}, + {"polecats/name", false}, // needs rig prefix + } + + for _, tt := range tests { + t.Run(tt.actor, func(t *testing.T) { + got := isPolecatActor(tt.actor) + if got != tt.want { + t.Errorf("isPolecatActor(%q) = %v, want %v", tt.actor, got, tt.want) + } + }) + } +} diff --git a/internal/cmd/gitinit.go b/internal/cmd/gitinit.go index 2a13b43f..af1f0dea 100644 --- a/internal/cmd/gitinit.go +++ b/internal/cmd/gitinit.go @@ -228,6 +228,12 @@ func createGitHubRepo(hqRoot, repo string, private bool) error { } fmt.Printf(" → Creating %s GitHub repository %s...\n", visibility, repo) + // Ensure there's at least one commit before pushing. + // gh repo create --push fails on empty repos with no commits. + if err := ensureInitialCommit(hqRoot); err != nil { + return fmt.Errorf("creating initial commit: %w", err) + } + // Build gh repo create command args := []string{"repo", "create", repo, "--source", hqRoot} if private { @@ -252,6 +258,33 @@ func createGitHubRepo(hqRoot, repo string, private bool) error { return nil } +// ensureInitialCommit creates an initial commit if the repo has no commits. +// gh repo create --push requires at least one commit to push. +func ensureInitialCommit(hqRoot string) error { + // Check if commits exist + cmd := exec.Command("git", "rev-parse", "HEAD") + cmd.Dir = hqRoot + if cmd.Run() == nil { + return nil + } + + // Stage and commit + addCmd := exec.Command("git", "add", ".") + addCmd.Dir = hqRoot + if err := addCmd.Run(); err != nil { + return fmt.Errorf("git add: %w", err) + } + + commitCmd := exec.Command("git", "commit", "-m", "Initial Gas Town HQ") + commitCmd.Dir = hqRoot + if output, err := commitCmd.CombinedOutput(); err != nil { + return fmt.Errorf("git commit failed: %s", strings.TrimSpace(string(output))) + } + + fmt.Printf(" ✓ Created initial commit\n") + return nil +} + // InitGitForHarness is the shared implementation for git initialization. // It can be called from both 'gt git-init' and 'gt install --git'. // Note: Function name kept for backwards compatibility. diff --git a/internal/cmd/handoff.go b/internal/cmd/handoff.go index ccbd0ef6..f5dd4574 100644 --- a/internal/cmd/handoff.go +++ b/internal/cmd/handoff.go @@ -11,6 +11,7 @@ import ( "github.com/steveyegge/gastown/internal/config" "github.com/steveyegge/gastown/internal/constants" "github.com/steveyegge/gastown/internal/events" + "github.com/steveyegge/gastown/internal/mail" "github.com/steveyegge/gastown/internal/session" "github.com/steveyegge/gastown/internal/style" "github.com/steveyegge/gastown/internal/tmux" @@ -383,7 +384,20 @@ func buildRestartCommand(sessionName string) (string, error) { // 3. export Claude-related env vars (not inherited by fresh shell) // 4. run claude with the startup beacon (triggers immediate context loading) // Use exec to ensure clean process replacement. - runtimeCmd := config.GetRuntimeCommandWithPrompt("", beacon) + // + // Check if current session is using a non-default agent (GT_AGENT env var). + // If so, preserve it across handoff by using the override variant. + currentAgent := os.Getenv("GT_AGENT") + var runtimeCmd string + if currentAgent != "" { + var err error + runtimeCmd, err = config.GetRuntimeCommandWithPromptAndAgentOverride("", beacon, currentAgent) + if err != nil { + return "", fmt.Errorf("resolving agent config: %w", err) + } + } else { + runtimeCmd = config.GetRuntimeCommandWithPrompt("", beacon) + } // Build environment exports - role vars first, then Claude vars var exports []string @@ -397,6 +411,11 @@ func buildRestartCommand(sessionName string) (string, error) { } } + // Preserve GT_AGENT across handoff so agent override persists + if currentAgent != "" { + exports = append(exports, "GT_AGENT="+currentAgent) + } + // Add Claude-related env vars from current environment for _, name := range claudeEnvVars { if val := os.Getenv(name); val != "" { @@ -577,6 +596,9 @@ func sendHandoffMail(subject, message string) (string, error) { return "", fmt.Errorf("detecting agent identity: %w", err) } + // Normalize identity to match mailbox query format + agentID = mail.AddressToIdentity(agentID) + // Detect town root for beads location townRoot := detectTownRootFromCwd() if townRoot == "" { diff --git a/internal/cmd/install.go b/internal/cmd/install.go index c6e6d58c..40ea0252 100644 --- a/internal/cmd/install.go +++ b/internal/cmd/install.go @@ -278,7 +278,7 @@ func runInstall(cmd *cobra.Command, args []string) error { } } - // Create town-level agent beads (Mayor, Deacon) and role beads. + // Create town-level agent beads (Mayor, Deacon). // These use hq- prefix and are stored in town beads for cross-rig coordination. if err := initTownAgentBeads(absPath); err != nil { fmt.Printf(" %s Could not create town-level agent beads: %v\n", style.Dim.Render("⚠"), err) @@ -491,58 +491,30 @@ func ensureCustomTypes(beadsPath string) error { return nil } -// initTownAgentBeads creates town-level agent and role beads using hq- prefix. +// initTownAgentBeads creates town-level agent beads using hq- prefix. // This creates: // - hq-mayor, hq-deacon (agent beads for town-level agents) -// - hq-mayor-role, hq-deacon-role, hq-witness-role, hq-refinery-role, -// hq-polecat-role, hq-crew-role (role definition beads) // // These beads are stored in town beads (~/gt/.beads/) and are shared across all rigs. // Rig-level agent beads (witness, refinery) are created by gt rig add in rig beads. // -// ERROR HANDLING ASYMMETRY: -// Agent beads (Mayor, Deacon) use hard fail - installation aborts if creation fails. -// Role beads use soft fail - logs warning and continues if creation fails. +// Note: Role definitions are now config-based (internal/config/roles/*.toml), +// not stored as beads. See config-based-roles.md for details. // -// Rationale: Agent beads are identity beads that track agent state, hooks, and +// Agent beads use hard fail - installation aborts if creation fails. +// Agent beads are identity beads that track agent state, hooks, and // form the foundation of the CV/reputation ledger. Without them, agents cannot -// be properly tracked or coordinated. Role beads are documentation templates -// that define role characteristics but are not required for agent operation - -// agents can function without their role bead existing. +// be properly tracked or coordinated. func initTownAgentBeads(townPath string) error { bd := beads.New(townPath) // bd init doesn't enable "custom" issue types by default, but Gas Town uses - // agent/role beads during install and runtime. Ensure these types are enabled + // agent beads during install and runtime. Ensure these types are enabled // before attempting to create any town-level system beads. if err := ensureBeadsCustomTypes(townPath, constants.BeadsCustomTypesList()); err != nil { return err } - // Role beads (global templates) - use shared definitions from beads package - for _, role := range beads.AllRoleBeadDefs() { - // Check if already exists - if _, err := bd.Show(role.ID); err == nil { - continue // Already exists - } - - // Create role bead using the beads API - // CreateWithID with Type: "role" automatically adds gt:role label - _, err := bd.CreateWithID(role.ID, beads.CreateOptions{ - Title: role.Title, - Type: "role", - Description: role.Desc, - Priority: -1, // No priority - }) - if err != nil { - // Log but continue - role beads are optional - fmt.Printf(" %s Could not create role bead %s: %v\n", - style.Dim.Render("⚠"), role.ID, err) - continue - } - fmt.Printf(" ✓ Created role bead: %s\n", role.ID) - } - // Town-level agent beads agentDefs := []struct { id string @@ -584,7 +556,7 @@ func initTownAgentBeads(townPath string) error { Rig: "", // Town-level agents have no rig AgentState: "idle", HookBead: "", - RoleBead: beads.RoleBeadIDTown(agent.roleType), + // Note: RoleBead field removed - role definitions are now config-based } if _, err := bd.CreateAgentBead(agent.id, agent.title, fields); err != nil { diff --git a/internal/cmd/install_integration_test.go b/internal/cmd/install_integration_test.go index 71b0eefb..376fb3f2 100644 --- a/internal/cmd/install_integration_test.go +++ b/internal/cmd/install_integration_test.go @@ -122,46 +122,6 @@ func TestInstallBeadsHasCorrectPrefix(t *testing.T) { } } -// TestInstallTownRoleSlots validates that town-level agent beads -// have their role slot set after install. -func TestInstallTownRoleSlots(t *testing.T) { - // Skip if bd is not available - if _, err := exec.LookPath("bd"); err != nil { - t.Skip("bd not installed, skipping role slot test") - } - - tmpDir := t.TempDir() - hqPath := filepath.Join(tmpDir, "test-hq") - - gtBinary := buildGT(t) - - // Run gt install (includes beads init by default) - cmd := exec.Command(gtBinary, "install", hqPath) - cmd.Env = append(os.Environ(), "HOME="+tmpDir) - output, err := cmd.CombinedOutput() - if err != nil { - t.Fatalf("gt install failed: %v\nOutput: %s", err, output) - } - - // Log install output for CI debugging - t.Logf("gt install output:\n%s", output) - - // Verify beads directory was created - beadsDir := filepath.Join(hqPath, ".beads") - if _, err := os.Stat(beadsDir); os.IsNotExist(err) { - t.Fatalf("beads directory not created at %s", beadsDir) - } - - // List beads for debugging - listCmd := exec.Command("bd", "--no-daemon", "list", "--type=agent") - listCmd.Dir = hqPath - listOutput, _ := listCmd.CombinedOutput() - t.Logf("bd list --type=agent output:\n%s", listOutput) - - assertSlotValue(t, hqPath, "hq-mayor", "role", "hq-mayor-role") - assertSlotValue(t, hqPath, "hq-deacon", "role", "hq-deacon-role") -} - // TestInstallIdempotent validates that running gt install twice // on the same directory fails without --force flag. func TestInstallIdempotent(t *testing.T) { @@ -327,54 +287,6 @@ func TestInstallNoBeadsFlag(t *testing.T) { } } -// buildGT builds the gt binary and returns its path. -// It caches the build across tests in the same run. -var cachedGTBinary string - -func buildGT(t *testing.T) string { - t.Helper() - - if cachedGTBinary != "" { - // Verify cached binary still exists - if _, err := os.Stat(cachedGTBinary); err == nil { - return cachedGTBinary - } - // Binary was cleaned up, rebuild - cachedGTBinary = "" - } - - // Find project root (where go.mod is) - wd, err := os.Getwd() - if err != nil { - t.Fatalf("failed to get working directory: %v", err) - } - - // Walk up to find go.mod - projectRoot := wd - for { - if _, err := os.Stat(filepath.Join(projectRoot, "go.mod")); err == nil { - break - } - parent := filepath.Dir(projectRoot) - if parent == projectRoot { - t.Fatal("could not find project root (go.mod)") - } - projectRoot = parent - } - - // Build gt binary to a persistent temp location (not per-test) - tmpDir := os.TempDir() - tmpBinary := filepath.Join(tmpDir, "gt-integration-test") - cmd := exec.Command("go", "build", "-o", tmpBinary, "./cmd/gt") - cmd.Dir = projectRoot - if output, err := cmd.CombinedOutput(); err != nil { - t.Fatalf("failed to build gt: %v\nOutput: %s", err, output) - } - - cachedGTBinary = tmpBinary - return tmpBinary -} - // assertDirExists checks that the given path exists and is a directory. func assertDirExists(t *testing.T, path, name string) { t.Helper() diff --git a/internal/cmd/mail.go b/internal/cmd/mail.go index 3d6be4a2..623f137c 100644 --- a/internal/cmd/mail.go +++ b/internal/cmd/mail.go @@ -21,6 +21,7 @@ var ( mailInboxJSON bool mailReadJSON bool mailInboxUnread bool + mailInboxAll bool mailInboxIdentity string mailCheckInject bool mailCheckJSON bool @@ -138,8 +139,13 @@ var mailInboxCmd = &cobra.Command{ If no address is specified, shows the current context's inbox. Use --identity for polecats to explicitly specify their identity. +By default, shows all messages. Use --unread to filter to unread only, +or --all to explicitly show all messages (read and unread). + Examples: gt mail inbox # Current context (auto-detected) + gt mail inbox --all # Explicitly show all messages + gt mail inbox --unread # Show only unread messages gt mail inbox mayor/ # Mayor's inbox gt mail inbox greenplace/Toast # Polecat's inbox gt mail inbox --identity greenplace/Toast # Explicit polecat identity`, @@ -433,6 +439,7 @@ func init() { // Inbox flags mailInboxCmd.Flags().BoolVar(&mailInboxJSON, "json", false, "Output as JSON") mailInboxCmd.Flags().BoolVarP(&mailInboxUnread, "unread", "u", false, "Show only unread messages") + mailInboxCmd.Flags().BoolVarP(&mailInboxAll, "all", "a", false, "Show all messages (read and unread)") mailInboxCmd.Flags().StringVar(&mailInboxIdentity, "identity", "", "Explicit identity for inbox (e.g., greenplace/Toast)") mailInboxCmd.Flags().StringVar(&mailInboxIdentity, "address", "", "Alias for --identity") diff --git a/internal/cmd/mail_inbox.go b/internal/cmd/mail_inbox.go index 91ca4a51..2f865203 100644 --- a/internal/cmd/mail_inbox.go +++ b/internal/cmd/mail_inbox.go @@ -30,6 +30,11 @@ func getMailbox(address string) (*mail.Mailbox, error) { } func runMailInbox(cmd *cobra.Command, args []string) error { + // Check for mutually exclusive flags + if mailInboxAll && mailInboxUnread { + return errors.New("--all and --unread are mutually exclusive") + } + // Determine which inbox to check (priority: --identity flag, positional arg, auto-detect) address := "" if mailInboxIdentity != "" { @@ -46,6 +51,8 @@ func runMailInbox(cmd *cobra.Command, args []string) error { } // Get messages + // --all is the default behavior (shows all messages) + // --unread filters to only unread messages var messages []*mail.Message if mailInboxUnread { messages, err = mailbox.ListUnread() diff --git a/internal/cmd/migrate_agents.go b/internal/cmd/migrate_agents.go deleted file mode 100644 index 742326c8..00000000 --- a/internal/cmd/migrate_agents.go +++ /dev/null @@ -1,325 +0,0 @@ -package cmd - -import ( - "fmt" - "path/filepath" - "strings" - - "github.com/spf13/cobra" - "github.com/steveyegge/gastown/internal/beads" - "github.com/steveyegge/gastown/internal/workspace" -) - -var ( - migrateAgentsDryRun bool - migrateAgentsForce bool -) - -var migrateAgentsCmd = &cobra.Command{ - Use: "migrate-agents", - GroupID: GroupDiag, - Short: "Migrate agent beads to two-level architecture", - Long: `Migrate agent beads from the old single-tier to the two-level architecture. - -This command migrates town-level agent beads (Mayor, Deacon) from rig beads -with gt-* prefix to town beads with hq-* prefix: - - OLD (rig beads): gt-mayor, gt-deacon - NEW (town beads): hq-mayor, hq-deacon - -Rig-level agents (Witness, Refinery, Polecats) remain in rig beads unchanged. - -The migration: -1. Detects old gt-mayor/gt-deacon beads in rig beads -2. Creates new hq-mayor/hq-deacon beads in town beads -3. Copies agent state (hook_bead, agent_state, etc.) -4. Adds migration note to old beads (preserves them) - -Safety: -- Dry-run mode by default (use --execute to apply changes) -- Old beads are preserved with migration notes -- Validates new beads exist before marking migration complete -- Skips if new beads already exist (idempotent) - -Examples: - gt migrate-agents # Dry-run: show what would be migrated - gt migrate-agents --execute # Apply the migration - gt migrate-agents --force # Re-migrate even if new beads exist`, - RunE: runMigrateAgents, -} - -func init() { - migrateAgentsCmd.Flags().BoolVar(&migrateAgentsDryRun, "dry-run", true, "Show what would be migrated without making changes (default)") - migrateAgentsCmd.Flags().BoolVar(&migrateAgentsForce, "force", false, "Re-migrate even if new beads already exist") - // Add --execute as inverse of --dry-run for clarity - migrateAgentsCmd.Flags().BoolP("execute", "x", false, "Actually apply the migration (opposite of --dry-run)") - rootCmd.AddCommand(migrateAgentsCmd) -} - -// migrationResult holds the result of a single bead migration. -type migrationResult struct { - OldID string - NewID string - Status string // "migrated", "skipped", "error" - Message string - OldFields *beads.AgentFields - WasDryRun bool -} - -func runMigrateAgents(cmd *cobra.Command, args []string) error { - // Handle --execute flag - if execute, _ := cmd.Flags().GetBool("execute"); execute { - migrateAgentsDryRun = false - } - - // Find town root - townRoot, err := workspace.FindFromCwdOrError() - if err != nil { - return fmt.Errorf("not in a Gas Town workspace: %w", err) - } - - // Get town beads path - townBeadsDir := filepath.Join(townRoot, ".beads") - - // Load routes to find rig beads - routes, err := beads.LoadRoutes(townBeadsDir) - if err != nil { - return fmt.Errorf("loading routes.jsonl: %w", err) - } - - // Find the first rig with gt- prefix (where global agents are currently stored) - var sourceRigPath string - for _, r := range routes { - if strings.TrimSuffix(r.Prefix, "-") == "gt" && r.Path != "." { - sourceRigPath = r.Path - break - } - } - - if sourceRigPath == "" { - fmt.Println("No rig with gt- prefix found. Nothing to migrate.") - return nil - } - - // Source beads (rig beads where old agent beads are) - sourceBeadsDir := filepath.Join(townRoot, sourceRigPath, ".beads") - sourceBd := beads.New(sourceBeadsDir) - - // Target beads (town beads where new agent beads should go) - targetBd := beads.NewWithBeadsDir(townRoot, townBeadsDir) - - // Agents to migrate: town-level agents only - agentsToMigrate := []struct { - oldID string - newID string - desc string - }{ - { - oldID: beads.MayorBeadID(), // gt-mayor - newID: beads.MayorBeadIDTown(), // hq-mayor - desc: "Mayor - global coordinator, handles cross-rig communication and escalations.", - }, - { - oldID: beads.DeaconBeadID(), // gt-deacon - newID: beads.DeaconBeadIDTown(), // hq-deacon - desc: "Deacon (daemon beacon) - receives mechanical heartbeats, runs town plugins and monitoring.", - }, - } - - // Also migrate role beads - rolesToMigrate := []string{"mayor", "deacon", "witness", "refinery", "polecat", "crew", "dog"} - - if migrateAgentsDryRun { - fmt.Println("🔍 DRY RUN: Showing what would be migrated") - fmt.Println(" Use --execute to apply changes") - fmt.Println() - } else { - fmt.Println("🚀 Migrating agent beads to two-level architecture") - fmt.Println() - } - - var results []migrationResult - - // Migrate agent beads - fmt.Println("Agent Beads:") - for _, agent := range agentsToMigrate { - result := migrateAgentBead(sourceBd, targetBd, agent.oldID, agent.newID, agent.desc, migrateAgentsDryRun, migrateAgentsForce) - results = append(results, result) - printMigrationResult(result) - } - - // Migrate role beads - fmt.Println("\nRole Beads:") - for _, role := range rolesToMigrate { - oldID := "gt-" + role + "-role" - newID := beads.RoleBeadIDTown(role) // hq--role - result := migrateRoleBead(sourceBd, targetBd, oldID, newID, role, migrateAgentsDryRun, migrateAgentsForce) - results = append(results, result) - printMigrationResult(result) - } - - // Summary - fmt.Println() - printMigrationSummary(results, migrateAgentsDryRun) - - return nil -} - -// migrateAgentBead migrates a single agent bead from source to target. -func migrateAgentBead(sourceBd, targetBd *beads.Beads, oldID, newID, desc string, dryRun, force bool) migrationResult { - result := migrationResult{ - OldID: oldID, - NewID: newID, - WasDryRun: dryRun, - } - - // Check if old bead exists - oldIssue, oldFields, err := sourceBd.GetAgentBead(oldID) - if err != nil { - result.Status = "skipped" - result.Message = "old bead not found" - return result - } - result.OldFields = oldFields - - // Check if new bead already exists - if _, err := targetBd.Show(newID); err == nil { - if !force { - result.Status = "skipped" - result.Message = "new bead already exists (use --force to re-migrate)" - return result - } - } - - if dryRun { - result.Status = "would migrate" - result.Message = fmt.Sprintf("would copy state from %s", oldIssue.ID) - return result - } - - // Create new bead in town beads - newFields := &beads.AgentFields{ - RoleType: oldFields.RoleType, - Rig: oldFields.Rig, - AgentState: oldFields.AgentState, - HookBead: oldFields.HookBead, - RoleBead: beads.RoleBeadIDTown(oldFields.RoleType), // Update to hq- role - CleanupStatus: oldFields.CleanupStatus, - ActiveMR: oldFields.ActiveMR, - NotificationLevel: oldFields.NotificationLevel, - } - - _, err = targetBd.CreateAgentBead(newID, desc, newFields) - if err != nil { - result.Status = "error" - result.Message = fmt.Sprintf("failed to create: %v", err) - return result - } - - // Add migration label to old bead - migrationLabel := fmt.Sprintf("migrated-to:%s", newID) - if err := sourceBd.Update(oldID, beads.UpdateOptions{AddLabels: []string{migrationLabel}}); err != nil { - // Non-fatal: just log it - result.Message = fmt.Sprintf("created but couldn't add migration label: %v", err) - } - - result.Status = "migrated" - result.Message = "successfully migrated" - return result -} - -// migrateRoleBead migrates a role definition bead. -func migrateRoleBead(sourceBd, targetBd *beads.Beads, oldID, newID, role string, dryRun, force bool) migrationResult { - result := migrationResult{ - OldID: oldID, - NewID: newID, - WasDryRun: dryRun, - } - - // Check if old bead exists - oldIssue, err := sourceBd.Show(oldID) - if err != nil { - result.Status = "skipped" - result.Message = "old bead not found" - return result - } - - // Check if new bead already exists - if _, err := targetBd.Show(newID); err == nil { - if !force { - result.Status = "skipped" - result.Message = "new bead already exists (use --force to re-migrate)" - return result - } - } - - if dryRun { - result.Status = "would migrate" - result.Message = fmt.Sprintf("would copy from %s", oldIssue.ID) - return result - } - - // Create new role bead in town beads - // Role beads are simple - just copy the description - _, err = targetBd.CreateWithID(newID, beads.CreateOptions{ - Title: fmt.Sprintf("Role: %s", role), - Type: "role", - Description: oldIssue.Title, // Use old title as description - }) - if err != nil { - result.Status = "error" - result.Message = fmt.Sprintf("failed to create: %v", err) - return result - } - - // Add migration label to old bead - migrationLabel := fmt.Sprintf("migrated-to:%s", newID) - if err := sourceBd.Update(oldID, beads.UpdateOptions{AddLabels: []string{migrationLabel}}); err != nil { - // Non-fatal - result.Message = fmt.Sprintf("created but couldn't add migration label: %v", err) - } - - result.Status = "migrated" - result.Message = "successfully migrated" - return result -} - -func getMigrationStatusIcon(status string) string { - switch status { - case "migrated", "would migrate": - return " ✓" - case "skipped": - return " ⊘" - case "error": - return " ✗" - default: - return " ?" - } -} - -func printMigrationResult(r migrationResult) { - fmt.Printf("%s %s → %s: %s\n", getMigrationStatusIcon(r.Status), r.OldID, r.NewID, r.Message) -} - -func printMigrationSummary(results []migrationResult, dryRun bool) { - var migrated, skipped, errors int - for _, r := range results { - switch r.Status { - case "migrated", "would migrate": - migrated++ - case "skipped": - skipped++ - case "error": - errors++ - } - } - - if dryRun { - fmt.Printf("Summary (dry-run): %d would migrate, %d skipped, %d errors\n", migrated, skipped, errors) - if migrated > 0 { - fmt.Println("\nRun with --execute to apply these changes.") - } - } else { - fmt.Printf("Summary: %d migrated, %d skipped, %d errors\n", migrated, skipped, errors) - } -} diff --git a/internal/cmd/migrate_agents_test.go b/internal/cmd/migrate_agents_test.go deleted file mode 100644 index b5d1ea2f..00000000 --- a/internal/cmd/migrate_agents_test.go +++ /dev/null @@ -1,87 +0,0 @@ -package cmd - -import ( - "testing" - - "github.com/steveyegge/gastown/internal/beads" -) - -func TestMigrationResultStatus(t *testing.T) { - tests := []struct { - name string - result migrationResult - wantIcon string - }{ - { - name: "migrated shows checkmark", - result: migrationResult{ - OldID: "gt-mayor", - NewID: "hq-mayor", - Status: "migrated", - Message: "successfully migrated", - }, - wantIcon: " ✓", - }, - { - name: "would migrate shows checkmark", - result: migrationResult{ - OldID: "gt-mayor", - NewID: "hq-mayor", - Status: "would migrate", - Message: "would copy state from gt-mayor", - }, - wantIcon: " ✓", - }, - { - name: "skipped shows empty circle", - result: migrationResult{ - OldID: "gt-mayor", - NewID: "hq-mayor", - Status: "skipped", - Message: "already exists", - }, - wantIcon: " ⊘", - }, - { - name: "error shows X", - result: migrationResult{ - OldID: "gt-mayor", - NewID: "hq-mayor", - Status: "error", - Message: "failed to create", - }, - wantIcon: " ✗", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - icon := getMigrationStatusIcon(tt.result.Status) - if icon != tt.wantIcon { - t.Errorf("getMigrationStatusIcon(%q) = %q, want %q", tt.result.Status, icon, tt.wantIcon) - } - }) - } -} - -func TestTownBeadIDHelpers(t *testing.T) { - tests := []struct { - name string - got string - want string - }{ - {"MayorBeadIDTown", beads.MayorBeadIDTown(), "hq-mayor"}, - {"DeaconBeadIDTown", beads.DeaconBeadIDTown(), "hq-deacon"}, - {"DogBeadIDTown", beads.DogBeadIDTown("fido"), "hq-dog-fido"}, - {"RoleBeadIDTown mayor", beads.RoleBeadIDTown("mayor"), "hq-mayor-role"}, - {"RoleBeadIDTown witness", beads.RoleBeadIDTown("witness"), "hq-witness-role"}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if tt.got != tt.want { - t.Errorf("%s = %q, want %q", tt.name, tt.got, tt.want) - } - }) - } -} diff --git a/internal/cmd/molecule_await_signal.go b/internal/cmd/molecule_await_signal.go index 696a1c33..a496d0a6 100644 --- a/internal/cmd/molecule_await_signal.go +++ b/internal/cmd/molecule_await_signal.go @@ -160,7 +160,14 @@ func runMoleculeAwaitSignal(cmd *cobra.Command, args []string) error { result.IdleCycles = newIdleCycles } } else if result.Reason == "signal" && awaitSignalAgentBead != "" { - // On signal, report current idle cycles (caller should reset) + // On signal, update last_activity to prove agent is alive + if err := updateAgentHeartbeat(awaitSignalAgentBead, beadsDir); err != nil { + if !awaitSignalQuiet { + fmt.Printf("%s Failed to update agent heartbeat: %v\n", + style.Dim.Render("⚠"), err) + } + } + // Report current idle cycles (caller should reset) result.IdleCycles = idleCycles } @@ -319,6 +326,14 @@ func parseIntSimple(s string) (int, error) { return n, nil } +// updateAgentHeartbeat updates the last_activity timestamp on an agent bead. +// This proves the agent is alive and processing signals. +func updateAgentHeartbeat(agentBead, beadsDir string) error { + cmd := exec.Command("bd", "agent", "heartbeat", agentBead) + cmd.Env = append(os.Environ(), "BEADS_DIR="+beadsDir) + return cmd.Run() +} + // setAgentIdleCycles sets the idle:N label on an agent bead. // Uses read-modify-write pattern to update only the idle label. func setAgentIdleCycles(agentBead, beadsDir string, cycles int) error { diff --git a/internal/cmd/molecule_step.go b/internal/cmd/molecule_step.go index d494b91a..f7408bd3 100644 --- a/internal/cmd/molecule_step.go +++ b/internal/cmd/molecule_step.go @@ -322,6 +322,12 @@ func handleStepContinue(cwd, townRoot, _ string, nextStep *beads.Issue, dryRun b t := tmux.NewTmux() + // Kill all processes in the pane before respawning to prevent process leaks + if err := t.KillPaneProcesses(pane); err != nil { + // Non-fatal but log the warning + style.PrintWarning("could not kill pane processes: %v", err) + } + // Clear history before respawn if err := t.ClearHistory(pane); err != nil { // Non-fatal diff --git a/internal/cmd/mq_list.go b/internal/cmd/mq_list.go index 9a5c2ef7..5288b308 100644 --- a/internal/cmd/mq_list.go +++ b/internal/cmd/mq_list.go @@ -48,9 +48,9 @@ func runMQList(cmd *cobra.Command, args []string) error { if err != nil { return fmt.Errorf("querying ready MRs: %w", err) } - // Filter to only merge-request type + // Filter to only merge-request label (issue_type field is deprecated) for _, issue := range allReady { - if issue.Type == "merge-request" { + if beads.HasLabel(issue, "gt:merge-request") { issues = append(issues, issue) } } diff --git a/internal/cmd/mq_test.go b/internal/cmd/mq_test.go index b7e91c7a..595ff800 100644 --- a/internal/cmd/mq_test.go +++ b/internal/cmd/mq_test.go @@ -740,3 +740,64 @@ func TestPolecatCleanupTimeoutConstant(t *testing.T) { t.Errorf("expectedMaxCleanupWait = %v, want 5m", expectedMaxCleanupWait) } } + +// TestMRFilteringByLabel verifies that MRs are identified by their gt:merge-request +// label rather than the deprecated issue_type field. This is the fix for #816 where +// MRs created by `gt done` have issue_type='task' but correct gt:merge-request label. +func TestMRFilteringByLabel(t *testing.T) { + tests := []struct { + name string + issue *beads.Issue + wantIsMR bool + }{ + { + name: "MR with correct label and wrong type (bug #816 scenario)", + issue: &beads.Issue{ + ID: "mr-1", + Title: "Merge: test-branch", + Type: "task", // Wrong type (default from bd create) + Labels: []string{"gt:merge-request"}, // Correct label + }, + wantIsMR: true, + }, + { + name: "MR with correct label and correct type", + issue: &beads.Issue{ + ID: "mr-2", + Title: "Merge: another-branch", + Type: "merge-request", + Labels: []string{"gt:merge-request"}, + }, + wantIsMR: true, + }, + { + name: "Task without MR label", + issue: &beads.Issue{ + ID: "task-1", + Title: "Regular task", + Type: "task", + Labels: []string{"other-label"}, + }, + wantIsMR: false, + }, + { + name: "Issue with no labels", + issue: &beads.Issue{ + ID: "issue-1", + Title: "No labels", + Type: "task", + }, + wantIsMR: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := beads.HasLabel(tt.issue, "gt:merge-request") + if got != tt.wantIsMR { + t.Errorf("HasLabel(%q, \"gt:merge-request\") = %v, want %v", + tt.issue.ID, got, tt.wantIsMR) + } + }) + } +} diff --git a/internal/cmd/orphans.go b/internal/cmd/orphans.go index c4895f3a..60b42940 100644 --- a/internal/cmd/orphans.go +++ b/internal/cmd/orphans.go @@ -13,6 +13,7 @@ import ( "github.com/spf13/cobra" "github.com/steveyegge/gastown/internal/style" + "github.com/steveyegge/gastown/internal/util" "github.com/steveyegge/gastown/internal/workspace" ) @@ -48,7 +49,8 @@ var ( orphansKillForce bool // Process orphan flags - orphansProcsForce bool + orphansProcsForce bool + orphansProcsAggressive bool ) // Commit orphan kill command @@ -89,10 +91,16 @@ var orphansProcsCmd = &cobra.Command{ These are processes that survived session termination and are now parented to init/launchd. They consume resources and should be killed. +Use --aggressive to detect ALL orphaned Claude processes by cross-referencing +against active tmux sessions. Any Claude process NOT in a gt-* or hq-* session +is considered an orphan. This catches processes that have been reparented to +something other than init (PPID != 1). + Examples: - gt orphans procs # List orphaned Claude processes - gt orphans procs list # Same as above - gt orphans procs kill # Kill orphaned processes`, + gt orphans procs # List orphaned Claude processes (PPID=1 only) + gt orphans procs list # Same as above + gt orphans procs --aggressive # List ALL orphaned processes (tmux verification) + gt orphans procs kill # Kill orphaned processes`, RunE: runOrphansListProcesses, // Default to list } @@ -104,12 +112,17 @@ var orphansProcsListCmd = &cobra.Command{ These are processes that survived session termination and are now parented to init/launchd. They consume resources and should be killed. +Use --aggressive to detect ALL orphaned Claude processes by cross-referencing +against active tmux sessions. Any Claude process NOT in a gt-* or hq-* session +is considered an orphan. + Excludes: - tmux server processes - Claude.app desktop application processes Examples: - gt orphans procs list # Show all orphan Claude processes`, + gt orphans procs list # Show orphans with PPID=1 + gt orphans procs list --aggressive # Show ALL orphans (tmux verification)`, RunE: runOrphansListProcesses, } @@ -120,10 +133,12 @@ var orphansProcsKillCmd = &cobra.Command{ Without flags, prompts for confirmation before killing. Use -f/--force to kill without confirmation. +Use --aggressive to kill ALL orphaned processes (not just PPID=1). Examples: - gt orphans procs kill # Kill with confirmation - gt orphans procs kill -f # Force kill without confirmation`, + gt orphans procs kill # Kill with confirmation + gt orphans procs kill -f # Force kill without confirmation + gt orphans procs kill --aggressive # Kill ALL orphans (tmux verification)`, RunE: runOrphansKillProcesses, } @@ -140,6 +155,9 @@ func init() { // Process orphan kill command flags orphansProcsKillCmd.Flags().BoolVarP(&orphansProcsForce, "force", "f", false, "Kill without confirmation") + // Aggressive flag for all procs commands (persistent so it applies to subcommands) + orphansProcsCmd.PersistentFlags().BoolVar(&orphansProcsAggressive, "aggressive", false, "Use tmux session verification to find ALL orphans (not just PPID=1)") + // Wire up subcommands orphansProcsCmd.AddCommand(orphansProcsListCmd) orphansProcsCmd.AddCommand(orphansProcsKillCmd) @@ -579,17 +597,22 @@ func isExcludedProcess(args string) bool { // runOrphansListProcesses lists orphaned Claude processes func runOrphansListProcesses(cmd *cobra.Command, args []string) error { + if orphansProcsAggressive { + return runOrphansListProcessesAggressive() + } + orphans, err := findOrphanProcesses() if err != nil { return fmt.Errorf("finding orphan processes: %w", err) } if len(orphans) == 0 { - fmt.Printf("%s No orphaned Claude processes found\n", style.Bold.Render("✓")) + fmt.Printf("%s No orphaned Claude processes found (PPID=1)\n", style.Bold.Render("✓")) + fmt.Printf("%s Use --aggressive to find orphans via tmux session verification\n", style.Dim.Render("Hint:")) return nil } - fmt.Printf("%s Found %d orphaned Claude process(es):\n\n", style.Warning.Render("⚠"), len(orphans)) + fmt.Printf("%s Found %d orphaned Claude process(es) with PPID=1:\n\n", style.Warning.Render("⚠"), len(orphans)) for _, o := range orphans { // Truncate args for display @@ -601,24 +624,72 @@ func runOrphansListProcesses(cmd *cobra.Command, args []string) error { } fmt.Printf("\n%s\n", style.Dim.Render("Use 'gt orphans procs kill' to terminate these processes")) + fmt.Printf("%s\n", style.Dim.Render("Use --aggressive to find more orphans via tmux session verification")) return nil } +// runOrphansListProcessesAggressive lists orphans using tmux session verification. +// This finds ALL Claude processes not in any gt-* or hq-* tmux session. +func runOrphansListProcessesAggressive() error { + zombies, err := util.FindZombieClaudeProcesses() + if err != nil { + return fmt.Errorf("finding zombie processes: %w", err) + } + + if len(zombies) == 0 { + fmt.Printf("%s No orphaned Claude processes found (aggressive mode)\n", style.Bold.Render("✓")) + return nil + } + + fmt.Printf("%s Found %d orphaned Claude process(es) not in any tmux session:\n\n", style.Warning.Render("⚠"), len(zombies)) + + for _, z := range zombies { + ageStr := formatProcessAge(z.Age) + fmt.Printf(" %s %s (age: %s, tty: %s)\n", + style.Bold.Render(fmt.Sprintf("PID %d", z.PID)), + z.Cmd, + style.Dim.Render(ageStr), + z.TTY) + } + + fmt.Printf("\n%s\n", style.Dim.Render("Use 'gt orphans procs kill --aggressive' to terminate these processes")) + + return nil +} + +// formatProcessAge formats seconds into a human-readable age string +func formatProcessAge(seconds int) string { + if seconds < 60 { + return fmt.Sprintf("%ds", seconds) + } + if seconds < 3600 { + return fmt.Sprintf("%dm%ds", seconds/60, seconds%60) + } + hours := seconds / 3600 + mins := (seconds % 3600) / 60 + return fmt.Sprintf("%dh%dm", hours, mins) +} + // runOrphansKillProcesses kills orphaned Claude processes func runOrphansKillProcesses(cmd *cobra.Command, args []string) error { + if orphansProcsAggressive { + return runOrphansKillProcessesAggressive() + } + orphans, err := findOrphanProcesses() if err != nil { return fmt.Errorf("finding orphan processes: %w", err) } if len(orphans) == 0 { - fmt.Printf("%s No orphaned Claude processes found\n", style.Bold.Render("✓")) + fmt.Printf("%s No orphaned Claude processes found (PPID=1)\n", style.Bold.Render("✓")) + fmt.Printf("%s Use --aggressive to find orphans via tmux session verification\n", style.Dim.Render("Hint:")) return nil } // Show what we're about to kill - fmt.Printf("%s Found %d orphaned Claude process(es):\n\n", style.Warning.Render("⚠"), len(orphans)) + fmt.Printf("%s Found %d orphaned Claude process(es) with PPID=1:\n\n", style.Warning.Render("⚠"), len(orphans)) for _, o := range orphans { displayArgs := o.Args if len(displayArgs) > 80 { @@ -674,3 +745,75 @@ func runOrphansKillProcesses(cmd *cobra.Command, args []string) error { return nil } + +// runOrphansKillProcessesAggressive kills orphans using tmux session verification. +// This kills ALL Claude processes not in any gt-* or hq-* tmux session. +func runOrphansKillProcessesAggressive() error { + zombies, err := util.FindZombieClaudeProcesses() + if err != nil { + return fmt.Errorf("finding zombie processes: %w", err) + } + + if len(zombies) == 0 { + fmt.Printf("%s No orphaned Claude processes found (aggressive mode)\n", style.Bold.Render("✓")) + return nil + } + + // Show what we're about to kill + fmt.Printf("%s Found %d orphaned Claude process(es) not in any tmux session:\n\n", style.Warning.Render("⚠"), len(zombies)) + for _, z := range zombies { + ageStr := formatProcessAge(z.Age) + fmt.Printf(" %s %s (age: %s, tty: %s)\n", + style.Bold.Render(fmt.Sprintf("PID %d", z.PID)), + z.Cmd, + style.Dim.Render(ageStr), + z.TTY) + } + fmt.Println() + + // Confirm unless --force + if !orphansProcsForce { + fmt.Printf("Kill these %d process(es)? [y/N] ", len(zombies)) + var response string + _, _ = fmt.Scanln(&response) + response = strings.ToLower(strings.TrimSpace(response)) + if response != "y" && response != "yes" { + fmt.Println("Aborted") + return nil + } + } + + // Kill the processes + var killed, failed int + for _, z := range zombies { + proc, err := os.FindProcess(z.PID) + if err != nil { + fmt.Printf(" %s PID %d: %v\n", style.Error.Render("✗"), z.PID, err) + failed++ + continue + } + + // Send SIGTERM first for graceful shutdown + if err := proc.Signal(syscall.SIGTERM); err != nil { + // Process may have already exited + if err == os.ErrProcessDone { + fmt.Printf(" %s PID %d: already terminated\n", style.Dim.Render("○"), z.PID) + continue + } + fmt.Printf(" %s PID %d: %v\n", style.Error.Render("✗"), z.PID, err) + failed++ + continue + } + + fmt.Printf(" %s PID %d killed\n", style.Bold.Render("✓"), z.PID) + killed++ + } + + fmt.Printf("\n%s %d killed", style.Bold.Render("Summary:"), killed) + if failed > 0 { + fmt.Printf(", %d failed", failed) + } + fmt.Println() + + return nil +} diff --git a/internal/cmd/patrol_helpers.go b/internal/cmd/patrol_helpers.go index 4515c898..4521ee71 100644 --- a/internal/cmd/patrol_helpers.go +++ b/internal/cmd/patrol_helpers.go @@ -103,7 +103,7 @@ func findActivePatrol(cfg PatrolConfig) (patrolID, patrolLine string, found bool // Returns the patrol ID or an error. func autoSpawnPatrol(cfg PatrolConfig) (string, error) { // Find the proto ID for the patrol molecule - cmdCatalog := exec.Command("bd", "--no-daemon", "mol", "catalog") + cmdCatalog := exec.Command("gt", "formula", "list") cmdCatalog.Dir = cfg.BeadsDir var stdoutCatalog, stderrCatalog bytes.Buffer cmdCatalog.Stdout = &stdoutCatalog @@ -112,20 +112,20 @@ func autoSpawnPatrol(cfg PatrolConfig) (string, error) { if err := cmdCatalog.Run(); err != nil { errMsg := strings.TrimSpace(stderrCatalog.String()) if errMsg != "" { - return "", fmt.Errorf("failed to list molecule catalog: %s", errMsg) + return "", fmt.Errorf("failed to list formulas: %s", errMsg) } - return "", fmt.Errorf("failed to list molecule catalog: %w", err) + return "", fmt.Errorf("failed to list formulas: %w", err) } - // Find patrol molecule in catalog + // Find patrol molecule in formula list + // Format: "formula-name description" var protoID string catalogLines := strings.Split(stdoutCatalog.String(), "\n") for _, line := range catalogLines { if strings.Contains(line, cfg.PatrolMolName) { parts := strings.Fields(line) if len(parts) > 0 { - // Strip trailing colon from ID (catalog format: "gt-xxx: title") - protoID = strings.TrimSuffix(parts[0], ":") + protoID = parts[0] break } } @@ -196,7 +196,7 @@ func outputPatrolContext(cfg PatrolConfig) { fmt.Printf("⚠ %s\n", err.Error()) } else { fmt.Println(style.Dim.Render(err.Error())) - fmt.Println(style.Dim.Render(fmt.Sprintf("Run `bd mol catalog` to troubleshoot."))) + fmt.Println(style.Dim.Render(fmt.Sprintf("Run `gt formula list` to troubleshoot."))) return } } else { diff --git a/internal/cmd/ready.go b/internal/cmd/ready.go index b168f3d5..59658ef5 100644 --- a/internal/cmd/ready.go +++ b/internal/cmd/ready.go @@ -1,6 +1,7 @@ package cmd import ( + "bufio" "encoding/json" "fmt" "os" @@ -132,7 +133,10 @@ func runReady(cmd *cobra.Command, args []string) error { } else { // Filter out formula scaffolds (gt-579) formulaNames := getFormulaNames(townBeadsPath) - src.Issues = filterFormulaScaffolds(issues, formulaNames) + filtered := filterFormulaScaffolds(issues, formulaNames) + // Defense-in-depth: also filter wisps that shouldn't appear in ready work + wispIDs := getWispIDs(townBeadsPath) + src.Issues = filterWisps(filtered, wispIDs) } sources = append(sources, src) }() @@ -156,7 +160,10 @@ func runReady(cmd *cobra.Command, args []string) error { } else { // Filter out formula scaffolds (gt-579) formulaNames := getFormulaNames(rigBeadsPath) - src.Issues = filterFormulaScaffolds(issues, formulaNames) + filtered := filterFormulaScaffolds(issues, formulaNames) + // Defense-in-depth: also filter wisps that shouldn't appear in ready work + wispIDs := getWispIDs(rigBeadsPath) + src.Issues = filterWisps(filtered, wispIDs) } sources = append(sources, src) }(r) @@ -346,3 +353,56 @@ func filterFormulaScaffolds(issues []*beads.Issue, formulaNames map[string]bool) } return filtered } + +// getWispIDs reads the issues.jsonl and returns a set of IDs that are wisps. +// Wisps are ephemeral issues (wisp: true flag) that shouldn't appear in ready work. +// This is a defense-in-depth exclusion - bd ready should already filter wisps, +// but we double-check at the display layer to ensure operational work doesn't leak. +func getWispIDs(beadsPath string) map[string]bool { + beadsDir := beads.ResolveBeadsDir(beadsPath) + issuesPath := filepath.Join(beadsDir, "issues.jsonl") + file, err := os.Open(issuesPath) + if err != nil { + return nil // No issues file + } + defer file.Close() + + wispIDs := make(map[string]bool) + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if line == "" { + continue + } + + var issue struct { + ID string `json:"id"` + Wisp bool `json:"wisp"` + } + if err := json.Unmarshal([]byte(line), &issue); err != nil { + continue + } + + if issue.Wisp { + wispIDs[issue.ID] = true + } + } + + return wispIDs +} + +// filterWisps removes wisp issues from the list. +// Wisps are ephemeral operational work that shouldn't appear in ready work. +func filterWisps(issues []*beads.Issue, wispIDs map[string]bool) []*beads.Issue { + if wispIDs == nil || len(wispIDs) == 0 { + return issues + } + + filtered := make([]*beads.Issue, 0, len(issues)) + for _, issue := range issues { + if !wispIDs[issue.ID] { + filtered = append(filtered, issue) + } + } + return filtered +} diff --git a/internal/cmd/refinery.go b/internal/cmd/refinery.go index bc620b76..004faf2f 100644 --- a/internal/cmd/refinery.go +++ b/internal/cmd/refinery.go @@ -337,6 +337,14 @@ func runRefineryStop(cmd *cobra.Command, args []string) error { return nil } +// RefineryStatusOutput is the JSON output format for refinery status. +type RefineryStatusOutput struct { + Running bool `json:"running"` + RigName string `json:"rig_name"` + Session string `json:"session,omitempty"` + QueueLength int `json:"queue_length"` +} + func runRefineryStatus(cmd *cobra.Command, args []string) error { rigName := "" if len(args) > 0 { @@ -348,58 +356,42 @@ func runRefineryStatus(cmd *cobra.Command, args []string) error { return err } - ref, err := mgr.Status() - if err != nil { - return fmt.Errorf("getting status: %w", err) - } + // ZFC: tmux is source of truth for running state + running, _ := mgr.IsRunning() + sessionInfo, _ := mgr.Status() // may be nil if not running + + // Get queue from beads + queue, _ := mgr.Queue() + queueLen := len(queue) // JSON output if refineryStatusJSON { + output := RefineryStatusOutput{ + Running: running, + RigName: rigName, + QueueLength: queueLen, + } + if sessionInfo != nil { + output.Session = sessionInfo.Name + } enc := json.NewEncoder(os.Stdout) enc.SetIndent("", " ") - return enc.Encode(ref) + return enc.Encode(output) } // Human-readable output fmt.Printf("%s Refinery: %s\n\n", style.Bold.Render("⚙"), rigName) - stateStr := string(ref.State) - switch ref.State { - case refinery.StateRunning: - stateStr = style.Bold.Render("● running") - case refinery.StateStopped: - stateStr = style.Dim.Render("○ stopped") - case refinery.StatePaused: - stateStr = style.Dim.Render("⏸ paused") - } - fmt.Printf(" State: %s\n", stateStr) - - if ref.StartedAt != nil { - fmt.Printf(" Started: %s\n", ref.StartedAt.Format("2006-01-02 15:04:05")) - } - - if ref.CurrentMR != nil { - fmt.Printf("\n %s\n", style.Bold.Render("Currently Processing:")) - fmt.Printf(" Branch: %s\n", ref.CurrentMR.Branch) - fmt.Printf(" Worker: %s\n", ref.CurrentMR.Worker) - if ref.CurrentMR.IssueID != "" { - fmt.Printf(" Issue: %s\n", ref.CurrentMR.IssueID) + if running { + fmt.Printf(" State: %s\n", style.Bold.Render("● running")) + if sessionInfo != nil { + fmt.Printf(" Session: %s\n", sessionInfo.Name) } + } else { + fmt.Printf(" State: %s\n", style.Dim.Render("○ stopped")) } - // Get queue length - queue, _ := mgr.Queue() - pendingCount := 0 - for _, item := range queue { - if item.Position > 0 { // Not currently processing - pendingCount++ - } - } - fmt.Printf("\n Queue: %d pending\n", pendingCount) - - if ref.LastMergeAt != nil { - fmt.Printf(" Last merge: %s\n", ref.LastMergeAt.Format("2006-01-02 15:04:05")) - } + fmt.Printf("\n Queue: %d pending\n", queueLen) return nil } diff --git a/internal/cmd/rig.go b/internal/cmd/rig.go index afc1375e..c3127d08 100644 --- a/internal/cmd/rig.go +++ b/internal/cmd/rig.go @@ -977,8 +977,7 @@ func runRigShutdown(cmd *cobra.Command, args []string) error { // 2. Stop the refinery refMgr := refinery.NewManager(r) - refStatus, err := refMgr.Status() - if err == nil && refStatus.State == refinery.StateRunning { + if running, _ := refMgr.IsRunning(); running { fmt.Printf(" Stopping refinery...\n") if err := refMgr.Stop(); err != nil { errors = append(errors, fmt.Sprintf("refinery: %v", err)) @@ -987,8 +986,7 @@ func runRigShutdown(cmd *cobra.Command, args []string) error { // 3. Stop the witness witMgr := witness.NewManager(r) - witStatus, err := witMgr.Status() - if err == nil && witStatus.State == witness.StateRunning { + if running, _ := witMgr.IsRunning(); running { fmt.Printf(" Stopping witness...\n") if err := witMgr.Stop(); err != nil { errors = append(errors, fmt.Sprintf("witness: %v", err)) @@ -1075,16 +1073,10 @@ func runRigStatus(cmd *cobra.Command, args []string) error { // Witness status fmt.Printf("%s\n", style.Bold.Render("Witness")) - witnessSession := fmt.Sprintf("gt-%s-witness", rigName) - witnessRunning, _ := t.HasSession(witnessSession) witMgr := witness.NewManager(r) - witStatus, _ := witMgr.Status() + witnessRunning, _ := witMgr.IsRunning() if witnessRunning { - fmt.Printf(" %s running", style.Success.Render("●")) - if witStatus != nil && witStatus.StartedAt != nil { - fmt.Printf(" (uptime: %s)", formatDuration(time.Since(*witStatus.StartedAt))) - } - fmt.Printf("\n") + fmt.Printf(" %s running\n", style.Success.Render("●")) } else { fmt.Printf(" %s stopped\n", style.Dim.Render("○")) } @@ -1092,16 +1084,10 @@ func runRigStatus(cmd *cobra.Command, args []string) error { // Refinery status fmt.Printf("%s\n", style.Bold.Render("Refinery")) - refinerySession := fmt.Sprintf("gt-%s-refinery", rigName) - refineryRunning, _ := t.HasSession(refinerySession) refMgr := refinery.NewManager(r) - refStatus, _ := refMgr.Status() + refineryRunning, _ := refMgr.IsRunning() if refineryRunning { - fmt.Printf(" %s running", style.Success.Render("●")) - if refStatus != nil && refStatus.StartedAt != nil { - fmt.Printf(" (uptime: %s)", formatDuration(time.Since(*refStatus.StartedAt))) - } - fmt.Printf("\n") + fmt.Printf(" %s running\n", style.Success.Render("●")) // Show queue size queue, err := refMgr.Queue() if err == nil && len(queue) > 0 { @@ -1254,8 +1240,7 @@ func runRigStop(cmd *cobra.Command, args []string) error { // 2. Stop the refinery refMgr := refinery.NewManager(r) - refStatus, err := refMgr.Status() - if err == nil && refStatus.State == refinery.StateRunning { + if running, _ := refMgr.IsRunning(); running { fmt.Printf(" Stopping refinery...\n") if err := refMgr.Stop(); err != nil { errors = append(errors, fmt.Sprintf("refinery: %v", err)) @@ -1264,8 +1249,7 @@ func runRigStop(cmd *cobra.Command, args []string) error { // 3. Stop the witness witMgr := witness.NewManager(r) - witStatus, err := witMgr.Status() - if err == nil && witStatus.State == witness.StateRunning { + if running, _ := witMgr.IsRunning(); running { fmt.Printf(" Stopping witness...\n") if err := witMgr.Stop(); err != nil { errors = append(errors, fmt.Sprintf("witness: %v", err)) @@ -1387,8 +1371,7 @@ func runRigRestart(cmd *cobra.Command, args []string) error { // 2. Stop the refinery refMgr := refinery.NewManager(r) - refStatus, err := refMgr.Status() - if err == nil && refStatus.State == refinery.StateRunning { + if running, _ := refMgr.IsRunning(); running { fmt.Printf(" Stopping refinery...\n") if err := refMgr.Stop(); err != nil { stopErrors = append(stopErrors, fmt.Sprintf("refinery: %v", err)) @@ -1397,8 +1380,7 @@ func runRigRestart(cmd *cobra.Command, args []string) error { // 3. Stop the witness witMgr := witness.NewManager(r) - witStatus, err := witMgr.Status() - if err == nil && witStatus.State == witness.StateRunning { + if running, _ := witMgr.IsRunning(); running { fmt.Printf(" Stopping witness...\n") if err := witMgr.Stop(); err != nil { stopErrors = append(stopErrors, fmt.Sprintf("witness: %v", err)) diff --git a/internal/cmd/role.go b/internal/cmd/role.go index b788cdfc..36323bb8 100644 --- a/internal/cmd/role.go +++ b/internal/cmd/role.go @@ -100,6 +100,23 @@ Examples: RunE: runRoleEnv, } +var roleDefCmd = &cobra.Command{ + Use: "def ", + Short: "Display role definition (session, health, env config)", + Long: `Display the effective role definition after all overrides are applied. + +Role configuration is layered: + 1. Built-in defaults (embedded in binary) + 2. Town-level overrides (~/.gt/roles/.toml) + 3. Rig-level overrides (/roles/.toml) + +Examples: + gt role def witness # Show witness role definition + gt role def crew # Show crew role definition`, + Args: cobra.ExactArgs(1), + RunE: runRoleDef, +} + // Flags for role home command var ( roleRig string @@ -113,6 +130,7 @@ func init() { roleCmd.AddCommand(roleDetectCmd) roleCmd.AddCommand(roleListCmd) roleCmd.AddCommand(roleEnvCmd) + roleCmd.AddCommand(roleDefCmd) // Add --rig and --polecat flags to home command for overrides roleHomeCmd.Flags().StringVar(&roleRig, "rig", "", "Rig name (required for rig-specific roles)") @@ -526,3 +544,83 @@ func runRoleEnv(cmd *cobra.Command, args []string) error { return nil } + +func runRoleDef(cmd *cobra.Command, args []string) error { + roleName := args[0] + + // Validate role name + validRoles := config.AllRoles() + isValid := false + for _, r := range validRoles { + if r == roleName { + isValid = true + break + } + } + if !isValid { + return fmt.Errorf("unknown role %q - valid roles: %s", roleName, strings.Join(validRoles, ", ")) + } + + // Determine town root and rig path + townRoot, _ := workspace.FindFromCwd() + rigPath := "" + if townRoot != "" { + // Try to get rig path if we're in a rig directory + if rigInfo, err := GetRole(); err == nil && rigInfo.Rig != "" { + rigPath = filepath.Join(townRoot, rigInfo.Rig) + } + } + + // Load role definition with overrides + def, err := config.LoadRoleDefinition(townRoot, rigPath, roleName) + if err != nil { + return fmt.Errorf("loading role definition: %w", err) + } + + // Display role info + fmt.Printf("%s %s\n", style.Bold.Render("Role:"), def.Role) + fmt.Printf("%s %s\n", style.Bold.Render("Scope:"), def.Scope) + fmt.Println() + + // Session config + fmt.Println(style.Bold.Render("[session]")) + fmt.Printf(" pattern = %q\n", def.Session.Pattern) + fmt.Printf(" work_dir = %q\n", def.Session.WorkDir) + fmt.Printf(" needs_pre_sync = %v\n", def.Session.NeedsPreSync) + if def.Session.StartCommand != "" { + fmt.Printf(" start_command = %q\n", def.Session.StartCommand) + } + fmt.Println() + + // Environment variables + if len(def.Env) > 0 { + fmt.Println(style.Bold.Render("[env]")) + envKeys := make([]string, 0, len(def.Env)) + for k := range def.Env { + envKeys = append(envKeys, k) + } + sort.Strings(envKeys) + for _, k := range envKeys { + fmt.Printf(" %s = %q\n", k, def.Env[k]) + } + fmt.Println() + } + + // Health config + fmt.Println(style.Bold.Render("[health]")) + fmt.Printf(" ping_timeout = %q\n", def.Health.PingTimeout.String()) + fmt.Printf(" consecutive_failures = %d\n", def.Health.ConsecutiveFailures) + fmt.Printf(" kill_cooldown = %q\n", def.Health.KillCooldown.String()) + fmt.Printf(" stuck_threshold = %q\n", def.Health.StuckThreshold.String()) + fmt.Println() + + // Prompts + if def.Nudge != "" { + fmt.Printf("%s %s\n", style.Bold.Render("Nudge:"), def.Nudge) + } + if def.PromptTemplate != "" { + fmt.Printf("%s %s\n", style.Bold.Render("Template:"), def.PromptTemplate) + } + + return nil +} diff --git a/internal/cmd/sling.go b/internal/cmd/sling.go index 28f80c2e..d2885b3b 100644 --- a/internal/cmd/sling.go +++ b/internal/cmd/sling.go @@ -11,6 +11,7 @@ import ( "github.com/spf13/cobra" "github.com/steveyegge/gastown/internal/beads" "github.com/steveyegge/gastown/internal/events" + "github.com/steveyegge/gastown/internal/mail" "github.com/steveyegge/gastown/internal/style" "github.com/steveyegge/gastown/internal/workspace" ) @@ -147,6 +148,7 @@ func runSling(cmd *cobra.Command, args []string) error { // Determine mode based on flags and argument types var beadID string var formulaName string + attachedMoleculeID := "" if slingOnTarget != "" { // Formula-on-bead mode: gt sling --on @@ -311,17 +313,63 @@ func runSling(cmd *cobra.Command, args []string) error { fmt.Printf("%s Slinging %s to %s...\n", style.Bold.Render("🎯"), beadID, targetAgent) } - // Check if bead is already pinned (guard against accidental re-sling) + // Check if bead is already assigned (guard against accidental re-sling) info, err := getBeadInfo(beadID) if err != nil { return fmt.Errorf("checking bead status: %w", err) } - if info.Status == "pinned" && !slingForce { + if (info.Status == "pinned" || info.Status == "hooked") && !slingForce { assignee := info.Assignee if assignee == "" { assignee = "(unknown)" } - return fmt.Errorf("bead %s is already pinned to %s\nUse --force to re-sling", beadID, assignee) + return fmt.Errorf("bead %s is already %s to %s\nUse --force to re-sling", beadID, info.Status, assignee) + } + + // Handle --force when bead is already hooked: send shutdown to old polecat and unhook + if info.Status == "hooked" && slingForce && info.Assignee != "" { + fmt.Printf("%s Bead already hooked to %s, forcing reassignment...\n", style.Warning.Render("⚠"), info.Assignee) + + // Determine requester identity from env vars, fall back to "gt-sling" + requester := "gt-sling" + if polecat := os.Getenv("GT_POLECAT"); polecat != "" { + requester = polecat + } else if user := os.Getenv("USER"); user != "" { + requester = user + } + + // Extract rig name from assignee (e.g., "gastown/polecats/Toast" -> "gastown") + assigneeParts := strings.Split(info.Assignee, "/") + if len(assigneeParts) >= 3 && assigneeParts[1] == "polecats" { + oldRigName := assigneeParts[0] + oldPolecatName := assigneeParts[2] + + // Send LIFECYCLE:Shutdown to witness - will auto-nuke if clean, + // otherwise create cleanup wisp for manual intervention + if townRoot != "" { + router := mail.NewRouter(townRoot) + shutdownMsg := &mail.Message{ + From: "gt-sling", + To: fmt.Sprintf("%s/witness", oldRigName), + Subject: fmt.Sprintf("LIFECYCLE:Shutdown %s", oldPolecatName), + Body: fmt.Sprintf("Reason: work_reassigned\nRequestedBy: %s\nBead: %s\nNewAssignee: %s", requester, beadID, targetAgent), + Type: mail.TypeTask, + Priority: mail.PriorityHigh, + } + if err := router.Send(shutdownMsg); err != nil { + fmt.Printf("%s Could not send shutdown to witness: %v\n", style.Dim.Render("Warning:"), err) + } else { + fmt.Printf("%s Sent LIFECYCLE:Shutdown to %s/witness for %s\n", style.Bold.Render("→"), oldRigName, oldPolecatName) + } + } + } + + // Unhook the bead from old owner (set status back to open) + unhookCmd := exec.Command("bd", "--no-daemon", "update", beadID, "--status=open", "--assignee=") + unhookCmd.Dir = beads.ResolveHookDir(townRoot, beadID, "") + if err := unhookCmd.Run(); err != nil { + fmt.Printf("%s Could not unhook bead from old owner: %v\n", style.Dim.Render("Warning:"), err) + } } // Auto-convoy: check if issue is already tracked by a convoy @@ -434,12 +482,8 @@ func runSling(cmd *cobra.Command, args []string) error { fmt.Printf("%s Formula bonded to %s\n", style.Bold.Render("✓"), beadID) - // Record the attached molecule in the wisp's description. - // This is required for gt hook to recognize the molecule attachment. - if err := storeAttachedMoleculeInBead(wispRootID, wispRootID); err != nil { - // Warn but don't fail - polecat can still work through steps - fmt.Printf("%s Could not store attached_molecule: %v\n", style.Dim.Render("Warning:"), err) - } + // Record attached molecule after other description updates to avoid overwrite. + attachedMoleculeID = wispRootID // Update beadID to hook the compound root instead of bare bead beadID = wispRootID @@ -464,8 +508,10 @@ func runSling(cmd *cobra.Command, args []string) error { updateAgentHookBead(targetAgent, beadID, hookWorkDir, townBeadsDir) // Auto-attach mol-polecat-work to polecat agent beads - // This ensures polecats have the standard work molecule attached for guidance - if strings.Contains(targetAgent, "/polecats/") { + // This ensures polecats have the standard work molecule attached for guidance. + // Only do this for bare beads (no --on formula), since formula-on-bead + // mode already attaches the formula as a molecule. + if formulaName == "" && strings.Contains(targetAgent, "/polecats/") { if err := attachPolecatWorkMolecule(targetAgent, hookWorkDir, townRoot); err != nil { // Warn but don't fail - polecat will still work without molecule fmt.Printf("%s Could not attach work molecule: %v\n", style.Dim.Render("Warning:"), err) @@ -488,6 +534,15 @@ func runSling(cmd *cobra.Command, args []string) error { } } + // Record the attached molecule in the wisp's description. + // This is required for gt hook to recognize the molecule attachment. + if attachedMoleculeID != "" { + if err := storeAttachedMoleculeInBead(beadID, attachedMoleculeID); err != nil { + // Warn but don't fail - polecat can still work through steps + fmt.Printf("%s Could not store attached_molecule: %v\n", style.Dim.Render("Warning:"), err) + } + } + // Try to inject the "start now" prompt (graceful if no tmux) if targetPane == "" { fmt.Printf("%s No pane to nudge (agent will discover work via gt prime)\n", style.Dim.Render("○")) diff --git a/internal/cmd/sling_formula.go b/internal/cmd/sling_formula.go index 6cb607bb..07b481dc 100644 --- a/internal/cmd/sling_formula.go +++ b/internal/cmd/sling_formula.go @@ -209,13 +209,7 @@ func runSlingFormula(args []string) error { } fmt.Printf("%s Wisp created: %s\n", style.Bold.Render("✓"), wispRootID) - - // Record the attached molecule in the wisp's description. - // This is required for gt hook to recognize the molecule attachment. - if err := storeAttachedMoleculeInBead(wispRootID, wispRootID); err != nil { - // Warn but don't fail - polecat can still work through steps - fmt.Printf("%s Could not store attached_molecule: %v\n", style.Dim.Render("Warning:"), err) - } + attachedMoleculeID := wispRootID // Step 3: Hook the wisp bead using bd update. // See: https://github.com/steveyegge/gastown/issues/148 @@ -252,12 +246,25 @@ func runSlingFormula(args []string) error { } } + // Record the attached molecule after other description updates to avoid overwrite. + if attachedMoleculeID != "" { + if err := storeAttachedMoleculeInBead(wispRootID, attachedMoleculeID); err != nil { + // Warn but don't fail - polecat can still work through steps + fmt.Printf("%s Could not store attached_molecule: %v\n", style.Dim.Render("Warning:"), err) + } + } + // Step 4: Nudge to start (graceful if no tmux) if targetPane == "" { fmt.Printf("%s No pane to nudge (agent will discover work via gt prime)\n", style.Dim.Render("○")) return nil } + // Skip nudge during tests to prevent agent self-interruption + if os.Getenv("GT_TEST_NO_NUDGE") != "" { + return nil + } + var prompt string if slingArgs != "" { prompt = fmt.Sprintf("Formula %s slung. Args: %s. Run `gt hook` to see your hook, then execute using these args.", formulaName, slingArgs) diff --git a/internal/cmd/sling_helpers.go b/internal/cmd/sling_helpers.go index eadd4744..b1c5f262 100644 --- a/internal/cmd/sling_helpers.go +++ b/internal/cmd/sling_helpers.go @@ -95,12 +95,16 @@ func storeArgsInBead(beadID, args string) error { // Parse the bead var issues []beads.Issue if err := json.Unmarshal(out, &issues); err != nil { - return fmt.Errorf("parsing bead: %w", err) + if os.Getenv("GT_TEST_ATTACHED_MOLECULE_LOG") == "" { + return fmt.Errorf("parsing bead: %w", err) + } } - if len(issues) == 0 { + issue := &beads.Issue{} + if len(issues) > 0 { + issue = &issues[0] + } else if os.Getenv("GT_TEST_ATTACHED_MOLECULE_LOG") == "" { return fmt.Errorf("bead not found") } - issue := &issues[0] // Get or create attachment fields fields := beads.ParseAttachmentFields(issue) @@ -113,6 +117,9 @@ func storeArgsInBead(beadID, args string) error { // Update the description newDesc := beads.SetAttachmentFields(issue, fields) + if logPath := os.Getenv("GT_TEST_ATTACHED_MOLECULE_LOG"); logPath != "" { + _ = os.WriteFile(logPath, []byte(newDesc), 0644) + } // Update the bead updateCmd := exec.Command("bd", "--no-daemon", "update", beadID, "--description="+newDesc) @@ -177,23 +184,30 @@ func storeAttachedMoleculeInBead(beadID, moleculeID string) error { if moleculeID == "" { return nil } - - // Get the bead to preserve existing description content - showCmd := exec.Command("bd", "show", beadID, "--json") - out, err := showCmd.Output() - if err != nil { - return fmt.Errorf("fetching bead: %w", err) + logPath := os.Getenv("GT_TEST_ATTACHED_MOLECULE_LOG") + if logPath != "" { + _ = os.WriteFile(logPath, []byte("called"), 0644) } - // Parse the bead - var issues []beads.Issue - if err := json.Unmarshal(out, &issues); err != nil { - return fmt.Errorf("parsing bead: %w", err) + issue := &beads.Issue{} + if logPath == "" { + // Get the bead to preserve existing description content + showCmd := exec.Command("bd", "show", beadID, "--json") + out, err := showCmd.Output() + if err != nil { + return fmt.Errorf("fetching bead: %w", err) + } + + // Parse the bead + var issues []beads.Issue + if err := json.Unmarshal(out, &issues); err != nil { + return fmt.Errorf("parsing bead: %w", err) + } + if len(issues) == 0 { + return fmt.Errorf("bead not found") + } + issue = &issues[0] } - if len(issues) == 0 { - return fmt.Errorf("bead not found") - } - issue := &issues[0] // Get or create attachment fields fields := beads.ParseAttachmentFields(issue) @@ -209,6 +223,9 @@ func storeAttachedMoleculeInBead(beadID, moleculeID string) error { // Update the description newDesc := beads.SetAttachmentFields(issue, fields) + if logPath != "" { + _ = os.WriteFile(logPath, []byte(newDesc), 0644) + } // Update the bead updateCmd := exec.Command("bd", "update", beadID, "--description="+newDesc) diff --git a/internal/cmd/sling_test.go b/internal/cmd/sling_test.go index fba5503f..d705c68c 100644 --- a/internal/cmd/sling_test.go +++ b/internal/cmd/sling_test.go @@ -3,10 +3,39 @@ package cmd import ( "os" "path/filepath" + "runtime" "strings" "testing" ) +func writeBDStub(t *testing.T, binDir string, unixScript string, windowsScript string) string { + t.Helper() + + var path string + if runtime.GOOS == "windows" { + path = filepath.Join(binDir, "bd.cmd") + if err := os.WriteFile(path, []byte(windowsScript), 0644); err != nil { + t.Fatalf("write bd stub: %v", err) + } + return path + } + + path = filepath.Join(binDir, "bd") + if err := os.WriteFile(path, []byte(unixScript), 0755); err != nil { + t.Fatalf("write bd stub: %v", err) + } + return path +} + +func containsVarArg(line, key, value string) bool { + plain := "--var " + key + "=" + value + if strings.Contains(line, plain) { + return true + } + quoted := "--var \"" + key + "=" + value + "\"" + return strings.Contains(line, quoted) +} + func TestParseWispIDFromJSON(t *testing.T) { tests := []struct { name string @@ -220,7 +249,6 @@ func TestSlingFormulaOnBeadRoutesBDCommandsToTargetRig(t *testing.T) { t.Fatalf("mkdir binDir: %v", err) } logPath := filepath.Join(townRoot, "bd.log") - bdPath := filepath.Join(binDir, "bd") bdScript := `#!/bin/sh set -e echo "$(pwd)|$*" >> "${BD_LOG}" @@ -256,11 +284,41 @@ case "$cmd" in esac exit 0 ` - if err := os.WriteFile(bdPath, []byte(bdScript), 0755); err != nil { - t.Fatalf("write bd stub: %v", err) - } +bdScriptWindows := `@echo off +setlocal enableextensions +echo %CD%^|%*>>"%BD_LOG%" +set "cmd=%1" +set "sub=%2" +if "%cmd%"=="--no-daemon" ( + set "cmd=%2" + set "sub=%3" +) +if "%cmd%"=="show" ( + echo [{"title":"Test issue","status":"open","assignee":"","description":""}] + exit /b 0 +) +if "%cmd%"=="formula" ( + echo {"name":"test-formula"} + exit /b 0 +) +if "%cmd%"=="cook" exit /b 0 +if "%cmd%"=="mol" ( + if "%sub%"=="wisp" ( + echo {"new_epic_id":"gt-wisp-xyz"} + exit /b 0 + ) + if "%sub%"=="bond" ( + echo {"root_id":"gt-wisp-xyz"} + exit /b 0 + ) +) +exit /b 0 +` + _ = writeBDStub(t, binDir, bdScript, bdScriptWindows) t.Setenv("BD_LOG", logPath) + attachedLogPath := filepath.Join(townRoot, "attached-molecule.log") + t.Setenv("GT_TEST_ATTACHED_MOLECULE_LOG", attachedLogPath) t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH")) t.Setenv(EnvGTRole, "mayor") t.Setenv("GT_POLECAT", "") @@ -381,7 +439,6 @@ func TestSlingFormulaOnBeadPassesFeatureAndIssueVars(t *testing.T) { t.Fatalf("mkdir binDir: %v", err) } logPath := filepath.Join(townRoot, "bd.log") - bdPath := filepath.Join(binDir, "bd") // The stub returns a specific title so we can verify it appears in --var feature= bdScript := `#!/bin/sh set -e @@ -418,11 +475,41 @@ case "$cmd" in esac exit 0 ` - if err := os.WriteFile(bdPath, []byte(bdScript), 0755); err != nil { - t.Fatalf("write bd stub: %v", err) - } +bdScriptWindows := `@echo off +setlocal enableextensions +echo ARGS:%*>>"%BD_LOG%" +set "cmd=%1" +set "sub=%2" +if "%cmd%"=="--no-daemon" ( + set "cmd=%2" + set "sub=%3" +) +if "%cmd%"=="show" ( + echo [{^"title^":^"My Test Feature^",^"status^":^"open^",^"assignee^":^"^",^"description^":^"^"}] + exit /b 0 +) +if "%cmd%"=="formula" ( + echo {^"name^":^"mol-review^"} + exit /b 0 +) +if "%cmd%"=="cook" exit /b 0 +if "%cmd%"=="mol" ( + if "%sub%"=="wisp" ( + echo {^"new_epic_id^":^"gt-wisp-xyz^"} + exit /b 0 + ) + if "%sub%"=="bond" ( + echo {^"root_id^":^"gt-wisp-xyz^"} + exit /b 0 + ) +) +exit /b 0 +` + _ = writeBDStub(t, binDir, bdScript, bdScriptWindows) t.Setenv("BD_LOG", logPath) + attachedLogPath := filepath.Join(townRoot, "attached-molecule.log") + t.Setenv("GT_TEST_ATTACHED_MOLECULE_LOG", attachedLogPath) t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH")) t.Setenv(EnvGTRole, "mayor") t.Setenv("GT_POLECAT", "") @@ -482,12 +569,12 @@ exit 0 } // Verify --var feature= is present - if !strings.Contains(wispLine, "--var feature=My Test Feature") { + if !containsVarArg(wispLine, "feature", "My Test Feature") { t.Errorf("mol wisp missing --var feature=<title>\ngot: %s", wispLine) } // Verify --var issue=<beadID> is present - if !strings.Contains(wispLine, "--var issue=gt-abc123") { + if !containsVarArg(wispLine, "issue", "gt-abc123") { t.Errorf("mol wisp missing --var issue=<beadID>\ngot: %s", wispLine) } } @@ -510,7 +597,6 @@ func TestVerifyBeadExistsAllowStale(t *testing.T) { if err := os.MkdirAll(binDir, 0755); err != nil { t.Fatalf("mkdir binDir: %v", err) } - bdPath := filepath.Join(binDir, "bd") bdScript := `#!/bin/sh # Check for --allow-stale flag allow_stale=false @@ -535,9 +621,24 @@ fi echo '[{"title":"Test bead","status":"open","assignee":""}]' exit 0 ` - if err := os.WriteFile(bdPath, []byte(bdScript), 0755); err != nil { - t.Fatalf("write bd stub: %v", err) - } + bdScriptWindows := `@echo off +setlocal enableextensions +set "allow=false" +for %%A in (%*) do ( + if "%%~A"=="--allow-stale" set "allow=true" +) +if "%1"=="--no-daemon" ( + if "%allow%"=="true" ( + echo [{"title":"Test bead","status":"open","assignee":""}] + exit /b 0 + ) + echo {"error":"Database out of sync with JSONL."} + exit /b 1 +) +echo [{"title":"Test bead","status":"open","assignee":""}] +exit /b 0 +` + _ = writeBDStub(t, binDir, bdScript, bdScriptWindows) t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH")) @@ -573,7 +674,6 @@ func TestSlingWithAllowStale(t *testing.T) { if err := os.MkdirAll(binDir, 0755); err != nil { t.Fatalf("mkdir binDir: %v", err) } - bdPath := filepath.Join(binDir, "bd") bdScript := `#!/bin/sh # Check for --allow-stale flag allow_stale=false @@ -608,9 +708,34 @@ case "$cmd" in esac exit 0 ` - if err := os.WriteFile(bdPath, []byte(bdScript), 0755); err != nil { - t.Fatalf("write bd stub: %v", err) - } +bdScriptWindows := `@echo off +setlocal enableextensions +set "allow=false" +for %%A in (%*) do ( + if "%%~A"=="--allow-stale" set "allow=true" +) +set "cmd=%1" +if "%cmd%"=="--no-daemon" ( + set "cmd=%2" + if "%cmd%"=="show" ( + if "%allow%"=="true" ( + echo [{"title":"Synced bead","status":"open","assignee":""}] + exit /b 0 + ) + echo {"error":"Database out of sync"} + exit /b 1 + ) + exit /b 0 +) +set "cmd=%1" +if "%cmd%"=="show" ( + echo [{"title":"Synced bead","status":"open","assignee":""}] + exit /b 0 +) +if "%cmd%"=="update" exit /b 0 +exit /b 0 +` + _ = writeBDStub(t, binDir, bdScript, bdScriptWindows) t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH")) t.Setenv(EnvGTRole, "crew") @@ -747,7 +872,6 @@ func TestSlingFormulaOnBeadSetsAttachedMolecule(t *testing.T) { t.Fatalf("mkdir binDir: %v", err) } logPath := filepath.Join(townRoot, "bd.log") - bdPath := filepath.Join(binDir, "bd") // The stub logs all commands to a file for verification bdScript := `#!/bin/sh set -e @@ -787,11 +911,42 @@ case "$cmd" in esac exit 0 ` - if err := os.WriteFile(bdPath, []byte(bdScript), 0755); err != nil { - t.Fatalf("write bd stub: %v", err) - } +bdScriptWindows := `@echo off +setlocal enableextensions +echo %CD%^|%*>>"%BD_LOG%" +set "cmd=%1" +set "sub=%2" +if "%cmd%"=="--no-daemon" ( + set "cmd=%2" + set "sub=%3" +) +if "%cmd%"=="show" ( + echo [{^"title^":^"Bug to fix^",^"status^":^"open^",^"assignee^":^"^",^"description^":^"^"}] + exit /b 0 +) +if "%cmd%"=="formula" ( + echo {^"name^":^"mol-polecat-work^"} + exit /b 0 +) +if "%cmd%"=="cook" exit /b 0 +if "%cmd%"=="mol" ( + if "%sub%"=="wisp" ( + echo {^"new_epic_id^":^"gt-wisp-xyz^"} + exit /b 0 + ) + if "%sub%"=="bond" ( + echo {^"root_id^":^"gt-wisp-xyz^"} + exit /b 0 + ) +) +if "%cmd%"=="update" exit /b 0 +exit /b 0 +` + _ = writeBDStub(t, binDir, bdScript, bdScriptWindows) t.Setenv("BD_LOG", logPath) + attachedLogPath := filepath.Join(townRoot, "attached-molecule.log") + t.Setenv("GT_TEST_ATTACHED_MOLECULE_LOG", attachedLogPath) t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH")) t.Setenv(EnvGTRole, "mayor") t.Setenv("GT_POLECAT", "") @@ -862,8 +1017,20 @@ exit 0 } if !foundAttachedMolecule { + if descBytes, err := os.ReadFile(attachedLogPath); err == nil { + if strings.Contains(string(descBytes), "attached_molecule") { + foundAttachedMolecule = true + } + } + } + + if !foundAttachedMolecule { + attachedLog := "<missing>" + if descBytes, err := os.ReadFile(attachedLogPath); err == nil { + attachedLog = string(descBytes) + } t.Errorf("after mol bond, expected update with attached_molecule in description\n"+ "This is required for gt hook to recognize the molecule attachment.\n"+ - "Log output:\n%s", string(logBytes)) + "Log output:\n%s\nAttached log:\n%s", string(logBytes), attachedLog) } } diff --git a/internal/cmd/start.go b/internal/cmd/start.go index b6600930..284891e9 100644 --- a/internal/cmd/start.go +++ b/internal/cmd/start.go @@ -30,18 +30,20 @@ import ( ) var ( - startAll bool - startAgentOverride string - startCrewRig string - startCrewAccount string - startCrewAgentOverride string - shutdownGraceful bool - shutdownWait int - shutdownAll bool - shutdownForce bool - shutdownYes bool - shutdownPolecatsOnly bool - shutdownNuclear bool + startAll bool + startAgentOverride string + startCrewRig string + startCrewAccount string + startCrewAgentOverride string + shutdownGraceful bool + shutdownWait int + shutdownAll bool + shutdownForce bool + shutdownYes bool + shutdownPolecatsOnly bool + shutdownNuclear bool + shutdownCleanupOrphans bool + shutdownCleanupOrphansGrace int ) var startCmd = &cobra.Command{ @@ -90,7 +92,9 @@ Shutdown levels (progressively more aggressive): Use --force or --yes to skip confirmation prompt. Use --graceful to allow agents time to save state before killing. -Use --nuclear to force cleanup even if polecats have uncommitted work (DANGER).`, +Use --nuclear to force cleanup even if polecats have uncommitted work (DANGER). +Use --cleanup-orphans to kill orphaned Claude processes (TTY-less, older than 60s). +Use --cleanup-orphans-grace-secs to set the grace period (default 60s).`, RunE: runShutdown, } @@ -137,6 +141,10 @@ func init() { "Only stop polecats (minimal shutdown)") shutdownCmd.Flags().BoolVar(&shutdownNuclear, "nuclear", false, "Force cleanup even if polecats have uncommitted work (DANGER: may lose work)") + shutdownCmd.Flags().BoolVar(&shutdownCleanupOrphans, "cleanup-orphans", false, + "Clean up orphaned Claude processes (TTY-less processes older than 60s)") + shutdownCmd.Flags().IntVar(&shutdownCleanupOrphansGrace, "cleanup-orphans-grace-secs", 60, + "Grace period in seconds between SIGTERM and SIGKILL when cleaning orphans (default 60)") rootCmd.AddCommand(startCmd) rootCmd.AddCommand(shutdownCmd) @@ -440,6 +448,14 @@ func runShutdown(cmd *cobra.Command, args []string) error { if len(toStop) == 0 { fmt.Printf("%s Gas Town was not running\n", style.Dim.Render("○")) + + // Still check for orphaned daemons even if no sessions are running + if townRoot != "" { + fmt.Println() + fmt.Println("Checking for orphaned daemon...") + stopDaemonIfRunning(townRoot) + } + return nil } @@ -563,14 +579,20 @@ func runGracefulShutdown(t *tmux.Tmux, gtSessions []string, townRoot string) err deaconSession := getDeaconSessionName() stopped := killSessionsInOrder(t, gtSessions, mayorSession, deaconSession) - // Phase 5: Cleanup polecat worktrees and branches - fmt.Printf("\nPhase 5: Cleaning up polecats...\n") + // Phase 5: Cleanup orphaned Claude processes if requested + if shutdownCleanupOrphans { + fmt.Printf("\nPhase 5: Cleaning up orphaned Claude processes...\n") + cleanupOrphanedClaude(shutdownCleanupOrphansGrace) + } + + // Phase 6: Cleanup polecat worktrees and branches + fmt.Printf("\nPhase 6: Cleaning up polecats...\n") if townRoot != "" { cleanupPolecats(townRoot) } - // Phase 6: Stop the daemon - fmt.Printf("\nPhase 6: Stopping daemon...\n") + // Phase 7: Stop the daemon + fmt.Printf("\nPhase 7: Stopping daemon...\n") if townRoot != "" { stopDaemonIfRunning(townRoot) } @@ -587,6 +609,13 @@ func runImmediateShutdown(t *tmux.Tmux, gtSessions []string, townRoot string) er deaconSession := getDeaconSessionName() stopped := killSessionsInOrder(t, gtSessions, mayorSession, deaconSession) + // Cleanup orphaned Claude processes if requested + if shutdownCleanupOrphans { + fmt.Println() + fmt.Println("Cleaning up orphaned Claude processes...") + cleanupOrphanedClaude(shutdownCleanupOrphansGrace) + } + // Cleanup polecat worktrees and branches if townRoot != "" { fmt.Println() @@ -612,6 +641,9 @@ func runImmediateShutdown(t *tmux.Tmux, gtSessions []string, townRoot string) er // 2. Everything except Mayor // 3. Mayor last // mayorSession and deaconSession are the dynamic session names for the current town. +// +// Returns the count of sessions that were successfully stopped (verified by checking +// if the session no longer exists after the kill attempt). func killSessionsInOrder(t *tmux.Tmux, sessions []string, mayorSession, deaconSession string) int { stopped := 0 @@ -625,10 +657,31 @@ func killSessionsInOrder(t *tmux.Tmux, sessions []string, mayorSession, deaconSe return false } + // Helper to kill a session and verify it was stopped + killAndVerify := func(sess string) bool { + // Check if session exists before attempting to kill + exists, _ := t.HasSession(sess) + if !exists { + return false // Session already gone + } + + // Attempt to kill the session and its processes + _ = t.KillSessionWithProcesses(sess) + + // Verify the session is actually gone (ignore error, check existence) + // KillSessionWithProcesses might return an error even if it successfully + // killed the processes and the session auto-closed + stillExists, _ := t.HasSession(sess) + if !stillExists { + fmt.Printf(" %s %s stopped\n", style.Bold.Render("✓"), sess) + return true + } + return false + } + // 1. Stop Deacon first if inList(deaconSession) { - if err := t.KillSessionWithProcesses(deaconSession); err == nil { - fmt.Printf(" %s %s stopped\n", style.Bold.Render("✓"), deaconSession) + if killAndVerify(deaconSession) { stopped++ } } @@ -638,16 +691,14 @@ func killSessionsInOrder(t *tmux.Tmux, sessions []string, mayorSession, deaconSe if sess == deaconSession || sess == mayorSession { continue } - if err := t.KillSessionWithProcesses(sess); err == nil { - fmt.Printf(" %s %s stopped\n", style.Bold.Render("✓"), sess) + if killAndVerify(sess) { stopped++ } } // 3. Stop Mayor last if inList(mayorSession) { - if err := t.KillSessionWithProcesses(mayorSession); err == nil { - fmt.Printf(" %s %s stopped\n", style.Bold.Render("✓"), mayorSession) + if killAndVerify(mayorSession) { stopped++ } } @@ -752,16 +803,48 @@ func cleanupPolecats(townRoot string) { // stopDaemonIfRunning stops the daemon if it is running. // This prevents the daemon from restarting agents after shutdown. +// Uses robust detection with fallback to process search. func stopDaemonIfRunning(townRoot string) { - running, _, _ := daemon.IsRunning(townRoot) + // Primary detection: PID file + running, pid, err := daemon.IsRunning(townRoot) + + if err != nil { + // Detection error - report it but continue with fallback + fmt.Printf(" %s Daemon detection warning: %s\n", style.Bold.Render("⚠"), err.Error()) + } + if running { + // PID file points to live daemon - stop it if err := daemon.StopDaemon(townRoot); err != nil { - fmt.Printf(" %s Daemon: %s\n", style.Dim.Render("○"), err.Error()) + fmt.Printf(" %s Failed to stop daemon (PID %d): %s\n", + style.Bold.Render("✗"), pid, err.Error()) } else { - fmt.Printf(" %s Daemon stopped\n", style.Bold.Render("✓")) + fmt.Printf(" %s Daemon stopped (was PID %d)\n", style.Bold.Render("✓"), pid) } } else { - fmt.Printf(" %s Daemon not running\n", style.Dim.Render("○")) + fmt.Printf(" %s Daemon not tracked by PID file\n", style.Dim.Render("○")) + } + + // Fallback: Search for orphaned daemon processes + orphaned, err := daemon.FindOrphanedDaemons() + if err != nil { + fmt.Printf(" %s Warning: failed to search for orphaned daemons: %v\n", + style.Dim.Render("○"), err) + return + } + + if len(orphaned) > 0 { + fmt.Printf(" %s Found %d orphaned daemon process(es): %v\n", + style.Bold.Render("⚠"), len(orphaned), orphaned) + + killed, err := daemon.KillOrphanedDaemons() + if err != nil { + fmt.Printf(" %s Failed to kill orphaned daemons: %v\n", + style.Bold.Render("✗"), err) + } else if killed > 0 { + fmt.Printf(" %s Killed %d orphaned daemon(s)\n", + style.Bold.Render("✓"), killed) + } } } diff --git a/internal/cmd/start_orphan_unix.go b/internal/cmd/start_orphan_unix.go new file mode 100644 index 00000000..3944e738 --- /dev/null +++ b/internal/cmd/start_orphan_unix.go @@ -0,0 +1,88 @@ +//go:build !windows + +package cmd + +import ( + "fmt" + "syscall" + "time" + + "github.com/steveyegge/gastown/internal/style" + "github.com/steveyegge/gastown/internal/util" +) + +// cleanupOrphanedClaude finds and kills orphaned Claude processes with a grace period. +// This is a simpler synchronous implementation that: +// 1. Finds orphaned processes (TTY-less, older than 60s, not in Gas Town sessions) +// 2. Sends SIGTERM to all of them +// 3. Waits for the grace period +// 4. Sends SIGKILL to any that are still alive +func cleanupOrphanedClaude(graceSecs int) { + // Find orphaned processes + orphans, err := util.FindOrphanedClaudeProcesses() + if err != nil { + fmt.Printf(" %s Warning: %v\n", style.Bold.Render("⚠"), err) + return + } + + if len(orphans) == 0 { + fmt.Printf(" %s No orphaned processes found\n", style.Dim.Render("○")) + return + } + + // Send SIGTERM to all orphans + var termPIDs []int + for _, orphan := range orphans { + if err := syscall.Kill(orphan.PID, syscall.SIGTERM); err != nil { + if err != syscall.ESRCH { + fmt.Printf(" %s PID %d: failed to send SIGTERM: %v\n", + style.Bold.Render("⚠"), orphan.PID, err) + } + continue + } + termPIDs = append(termPIDs, orphan.PID) + fmt.Printf(" %s PID %d: sent SIGTERM (waiting %ds before SIGKILL)\n", + style.Bold.Render("→"), orphan.PID, graceSecs) + } + + if len(termPIDs) == 0 { + return + } + + // Wait for grace period + fmt.Printf(" %s Waiting %d seconds for processes to terminate gracefully...\n", + style.Dim.Render("⏳"), graceSecs) + time.Sleep(time.Duration(graceSecs) * time.Second) + + // Check which processes are still alive and send SIGKILL + var killedCount, alreadyDeadCount int + for _, pid := range termPIDs { + // Check if process still exists + if err := syscall.Kill(pid, 0); err != nil { + // Process is gone (either died from SIGTERM or doesn't exist) + alreadyDeadCount++ + continue + } + + // Process still alive - send SIGKILL + if err := syscall.Kill(pid, syscall.SIGKILL); err != nil { + if err != syscall.ESRCH { + fmt.Printf(" %s PID %d: failed to send SIGKILL: %v\n", + style.Bold.Render("⚠"), pid, err) + } + continue + } + killedCount++ + fmt.Printf(" %s PID %d: sent SIGKILL (did not respond to SIGTERM)\n", + style.Bold.Render("✓"), pid) + } + + if alreadyDeadCount > 0 { + fmt.Printf(" %s %d process(es) terminated gracefully from SIGTERM\n", + style.Bold.Render("✓"), alreadyDeadCount) + } + if killedCount == 0 && alreadyDeadCount > 0 { + fmt.Printf(" %s All processes cleaned up successfully\n", + style.Bold.Render("✓")) + } +} diff --git a/internal/cmd/start_orphan_windows.go b/internal/cmd/start_orphan_windows.go new file mode 100644 index 00000000..39834e72 --- /dev/null +++ b/internal/cmd/start_orphan_windows.go @@ -0,0 +1,16 @@ +//go:build windows + +package cmd + +import ( + "fmt" + + "github.com/steveyegge/gastown/internal/style" +) + +// cleanupOrphanedClaude is a Windows stub. +// Orphan cleanup requires Unix-specific signals (SIGTERM/SIGKILL). +func cleanupOrphanedClaude(graceSecs int) { + fmt.Printf(" %s Orphan cleanup not supported on Windows\n", + style.Dim.Render("○")) +} diff --git a/internal/cmd/synthesis_test.go b/internal/cmd/synthesis_test.go index ff699a60..c8ef8b38 100644 --- a/internal/cmd/synthesis_test.go +++ b/internal/cmd/synthesis_test.go @@ -1,6 +1,7 @@ package cmd import ( + "path/filepath" "testing" ) @@ -42,7 +43,7 @@ func TestExpandOutputPath(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := expandOutputPath(tt.directory, tt.pattern, tt.reviewID, tt.legID) - if got != tt.want { + if filepath.ToSlash(got) != tt.want { t.Errorf("expandOutputPath() = %q, want %q", got, tt.want) } }) diff --git a/internal/cmd/tap.go b/internal/cmd/tap.go new file mode 100644 index 00000000..12099a10 --- /dev/null +++ b/internal/cmd/tap.go @@ -0,0 +1,35 @@ +package cmd + +import ( + "github.com/spf13/cobra" +) + +var tapCmd = &cobra.Command{ + Use: "tap", + Short: "Claude Code hook handlers", + Long: `Hook handlers for Claude Code PreToolUse and PostToolUse events. + +These commands are called by Claude Code hooks to implement policies, +auditing, and input transformation. They tap into the tool execution +flow to guard, audit, inject, or check. + +Subcommands: + guard - Block forbidden operations (PreToolUse, exit 2) + audit - Log/record tool executions (PostToolUse) [planned] + inject - Modify tool inputs (PreToolUse, updatedInput) [planned] + check - Validate after execution (PostToolUse) [planned] + +Hook configuration in .claude/settings.json: + { + "PreToolUse": [{ + "matcher": "Bash(gh pr create*)", + "hooks": [{"command": "gt tap guard pr-workflow"}] + }] + } + +See ~/gt/docs/HOOKS.md for full documentation.`, +} + +func init() { + rootCmd.AddCommand(tapCmd) +} diff --git a/internal/cmd/tap_guard.go b/internal/cmd/tap_guard.go new file mode 100644 index 00000000..55d05ee7 --- /dev/null +++ b/internal/cmd/tap_guard.go @@ -0,0 +1,116 @@ +package cmd + +import ( + "fmt" + "os" + "strings" + + "github.com/spf13/cobra" +) + +var tapGuardCmd = &cobra.Command{ + Use: "guard", + Short: "Block forbidden operations (PreToolUse hook)", + Long: `Block forbidden operations via Claude Code PreToolUse hooks. + +Guard commands exit with code 2 to BLOCK tool execution when a policy +is violated. They're called before the tool runs, preventing the +forbidden operation entirely. + +Available guards: + pr-workflow - Block PR creation and feature branches + +Example hook configuration: + { + "PreToolUse": [{ + "matcher": "Bash(gh pr create*)", + "hooks": [{"command": "gt tap guard pr-workflow"}] + }] + }`, +} + +var tapGuardPRWorkflowCmd = &cobra.Command{ + Use: "pr-workflow", + Short: "Block PR creation and feature branches", + Long: `Block PR workflow operations in Gas Town. + +Gas Town workers push directly to main. PRs add friction that breaks +the autonomous execution model (GUPP principle). + +This guard blocks: + - gh pr create + - git checkout -b (feature branches) + - git switch -c (feature branches) + +Exit codes: + 0 - Operation allowed (not in Gas Town agent context) + 2 - Operation BLOCKED (in agent context) + +The guard only blocks when running as a Gas Town agent (crew, polecat, +witness, etc.). Humans running outside Gas Town can still use PRs.`, + RunE: runTapGuardPRWorkflow, +} + +func init() { + tapCmd.AddCommand(tapGuardCmd) + tapGuardCmd.AddCommand(tapGuardPRWorkflowCmd) +} + +func runTapGuardPRWorkflow(cmd *cobra.Command, args []string) error { + // Check if we're in a Gas Town agent context + if !isGasTownAgentContext() { + // Not in a Gas Town managed context - allow the operation + return nil + } + + // We're in a Gas Town context - block PR operations + fmt.Fprintln(os.Stderr, "") + fmt.Fprintln(os.Stderr, "╔══════════════════════════════════════════════════════════════════╗") + fmt.Fprintln(os.Stderr, "║ ❌ PR WORKFLOW BLOCKED ║") + fmt.Fprintln(os.Stderr, "╠══════════════════════════════════════════════════════════════════╣") + fmt.Fprintln(os.Stderr, "║ Gas Town workers push directly to main. PRs are forbidden. ║") + fmt.Fprintln(os.Stderr, "║ ║") + fmt.Fprintln(os.Stderr, "║ Instead of: gh pr create / git checkout -b / git switch -c ║") + fmt.Fprintln(os.Stderr, "║ Do this: git add . && git commit && git push origin main ║") + fmt.Fprintln(os.Stderr, "║ ║") + fmt.Fprintln(os.Stderr, "║ Why? PRs add friction that breaks autonomous execution. ║") + fmt.Fprintln(os.Stderr, "║ See: ~/gt/docs/PRIMING.md (GUPP principle) ║") + fmt.Fprintln(os.Stderr, "╚══════════════════════════════════════════════════════════════════╝") + fmt.Fprintln(os.Stderr, "") + os.Exit(2) // Exit 2 = BLOCK in Claude Code hooks + + return nil +} + +// isGasTownAgentContext returns true if we're running as a Gas Town managed agent. +func isGasTownAgentContext() bool { + // Check environment variables set by Gas Town session management + envVars := []string{ + "GT_POLECAT", + "GT_CREW", + "GT_WITNESS", + "GT_REFINERY", + "GT_MAYOR", + "GT_DEACON", + } + for _, env := range envVars { + if os.Getenv(env) != "" { + return true + } + } + + // Also check if we're in a crew or polecat worktree by path + cwd, err := os.Getwd() + if err != nil { + return false + } + + agentPaths := []string{"/crew/", "/polecats/"} + for _, path := range agentPaths { + if strings.Contains(cwd, path) { + return true + } + } + + return false +} diff --git a/internal/cmd/test_helpers_test.go b/internal/cmd/test_helpers_test.go new file mode 100644 index 00000000..4882ad52 --- /dev/null +++ b/internal/cmd/test_helpers_test.go @@ -0,0 +1,61 @@ +package cmd + +import ( + "os" + "os/exec" + "path/filepath" + "runtime" + "testing" +) + +// buildGT builds the gt binary and returns its path. +// It caches the build across tests in the same run. +var cachedGTBinary string + +func buildGT(t *testing.T) string { + t.Helper() + + if cachedGTBinary != "" { + // Verify cached binary still exists + if _, err := os.Stat(cachedGTBinary); err == nil { + return cachedGTBinary + } + // Binary was cleaned up, rebuild + cachedGTBinary = "" + } + + // Find project root (where go.mod is) + wd, err := os.Getwd() + if err != nil { + t.Fatalf("failed to get working directory: %v", err) + } + + // Walk up to find go.mod + projectRoot := wd + for { + if _, err := os.Stat(filepath.Join(projectRoot, "go.mod")); err == nil { + break + } + parent := filepath.Dir(projectRoot) + if parent == projectRoot { + t.Fatal("could not find project root (go.mod)") + } + projectRoot = parent + } + + // Build gt binary to a persistent temp location (not per-test) + tmpDir := os.TempDir() + binaryName := "gt-integration-test" + if runtime.GOOS == "windows" { + binaryName += ".exe" + } + tmpBinary := filepath.Join(tmpDir, binaryName) + cmd := exec.Command("go", "build", "-o", tmpBinary, "./cmd/gt") + cmd.Dir = projectRoot + if output, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("failed to build gt: %v\nOutput: %s", err, output) + } + + cachedGTBinary = tmpBinary + return tmpBinary +} diff --git a/internal/cmd/witness.go b/internal/cmd/witness.go index 0b5f5de0..73dfac7c 100644 --- a/internal/cmd/witness.go +++ b/internal/cmd/witness.go @@ -192,12 +192,13 @@ func runWitnessStop(cmd *cobra.Command, args []string) error { return err } - // Kill tmux session if it exists + // Kill tmux session if it exists. + // Use KillSessionWithProcesses to ensure all descendant processes are killed. t := tmux.NewTmux() sessionName := witnessSessionName(rigName) running, _ := t.HasSession(sessionName) if running { - if err := t.KillSession(sessionName); err != nil { + if err := t.KillSessionWithProcesses(sessionName); err != nil { style.PrintWarning("failed to kill session: %v", err) } } @@ -218,65 +219,65 @@ func runWitnessStop(cmd *cobra.Command, args []string) error { return nil } +// WitnessStatusOutput is the JSON output format for witness status. +type WitnessStatusOutput struct { + Running bool `json:"running"` + RigName string `json:"rig_name"` + Session string `json:"session,omitempty"` + MonitoredPolecats []string `json:"monitored_polecats,omitempty"` +} + func runWitnessStatus(cmd *cobra.Command, args []string) error { rigName := args[0] - mgr, err := getWitnessManager(rigName) + // Get rig for polecat info + _, r, err := getRig(rigName) if err != nil { return err } - w, err := mgr.Status() - if err != nil { - return fmt.Errorf("getting status: %w", err) - } + mgr := witness.NewManager(r) - // Check actual tmux session state (more reliable than state file) - t := tmux.NewTmux() - sessionName := witnessSessionName(rigName) - sessionRunning, _ := t.HasSession(sessionName) + // ZFC: tmux is source of truth for running state + running, _ := mgr.IsRunning() + sessionInfo, _ := mgr.Status() // may be nil if not running - // Reconcile state: tmux session is the source of truth for background mode - if sessionRunning && w.State != witness.StateRunning { - w.State = witness.StateRunning - } else if !sessionRunning && w.State == witness.StateRunning { - w.State = witness.StateStopped - } + // Polecats come from rig config, not state file + polecats := r.Polecats // JSON output if witnessStatusJSON { + output := WitnessStatusOutput{ + Running: running, + RigName: rigName, + MonitoredPolecats: polecats, + } + if sessionInfo != nil { + output.Session = sessionInfo.Name + } enc := json.NewEncoder(os.Stdout) enc.SetIndent("", " ") - return enc.Encode(w) + return enc.Encode(output) } // Human-readable output fmt.Printf("%s Witness: %s\n\n", style.Bold.Render(AgentTypeIcons[AgentWitness]), rigName) - stateStr := string(w.State) - switch w.State { - case witness.StateRunning: - stateStr = style.Bold.Render("● running") - case witness.StateStopped: - stateStr = style.Dim.Render("○ stopped") - case witness.StatePaused: - stateStr = style.Dim.Render("⏸ paused") - } - fmt.Printf(" State: %s\n", stateStr) - if sessionRunning { - fmt.Printf(" Session: %s\n", sessionName) - } - - if w.StartedAt != nil { - fmt.Printf(" Started: %s\n", w.StartedAt.Format("2006-01-02 15:04:05")) + if running { + fmt.Printf(" State: %s\n", style.Bold.Render("● running")) + if sessionInfo != nil { + fmt.Printf(" Session: %s\n", sessionInfo.Name) + } + } else { + fmt.Printf(" State: %s\n", style.Dim.Render("○ stopped")) } // Show monitored polecats fmt.Printf("\n %s\n", style.Bold.Render("Monitored Polecats:")) - if len(w.MonitoredPolecats) == 0 { + if len(polecats) == 0 { fmt.Printf(" %s\n", style.Dim.Render("(none)")) } else { - for _, p := range w.MonitoredPolecats { + for _, p := range polecats { fmt.Printf(" • %s\n", p) } } diff --git a/internal/config/agents.go b/internal/config/agents.go index 860481ce..08e8d654 100644 --- a/internal/config/agents.go +++ b/internal/config/agents.go @@ -327,10 +327,18 @@ func RuntimeConfigFromPreset(preset AgentPreset) *RuntimeConfig { return DefaultRuntimeConfig() } - return &RuntimeConfig{ + rc := &RuntimeConfig{ Command: info.Command, Args: append([]string(nil), info.Args...), // Copy to avoid mutation } + + // Resolve command path for claude preset (handles alias installations) + // Uses resolveClaudePath() from types.go which finds ~/.claude/local/claude + if preset == AgentClaude && rc.Command == "claude" { + rc.Command = resolveClaudePath() + } + + return rc } // BuildResumeCommand builds a command to resume an agent session. diff --git a/internal/config/agents_test.go b/internal/config/agents_test.go index efc0c3dc..ef5596f4 100644 --- a/internal/config/agents_test.go +++ b/internal/config/agents_test.go @@ -8,6 +8,12 @@ import ( "testing" ) +// isClaudeCmd checks if a command is claude (either "claude" or a path ending in "/claude"). +// Note: Named differently from loader_test.go's isClaudeCommand to avoid redeclaration. +func isClaudeCmd(cmd string) bool { + return cmd == "claude" || strings.HasSuffix(cmd, "/claude") +} + func TestBuiltinPresets(t *testing.T) { t.Parallel() // Ensure all built-in presets are accessible @@ -71,7 +77,7 @@ func TestRuntimeConfigFromPreset(t *testing.T) { preset AgentPreset wantCommand string }{ - {AgentClaude, "claude"}, + {AgentClaude, "claude"}, // Note: claude may resolve to full path {AgentGemini, "gemini"}, {AgentCodex, "codex"}, {AgentCursor, "cursor-agent"}, @@ -82,7 +88,13 @@ func TestRuntimeConfigFromPreset(t *testing.T) { for _, tt := range tests { t.Run(string(tt.preset), func(t *testing.T) { rc := RuntimeConfigFromPreset(tt.preset) - if rc.Command != tt.wantCommand { + // For claude, command may be full path due to resolveClaudePath + if tt.preset == AgentClaude { + if !isClaudeCmd(rc.Command) { + t.Errorf("RuntimeConfigFromPreset(%s).Command = %v, want claude or path ending in /claude", + tt.preset, rc.Command) + } + } else if rc.Command != tt.wantCommand { t.Errorf("RuntimeConfigFromPreset(%s).Command = %v, want %v", tt.preset, rc.Command, tt.wantCommand) } @@ -226,8 +238,8 @@ func TestMergeWithPreset(t *testing.T) { var nilConfig *RuntimeConfig merged = nilConfig.MergeWithPreset(AgentClaude) - if merged.Command != "claude" { - t.Errorf("nil config merge should get preset command, got %s", merged.Command) + if !isClaudeCmd(merged.Command) { + t.Errorf("nil config merge should get preset command (claude or path), got %s", merged.Command) } // Test empty config gets preset defaults @@ -456,7 +468,12 @@ func TestAgentCommandGeneration(t *testing.T) { t.Fatal("RuntimeConfigFromPreset returned nil") } - if rc.Command != tt.wantCommand { + // For claude, command may be full path due to resolveClaudePath + if tt.preset == AgentClaude { + if !isClaudeCmd(rc.Command) { + t.Errorf("Command = %q, want claude or path ending in /claude", rc.Command) + } + } else if rc.Command != tt.wantCommand { t.Errorf("Command = %q, want %q", rc.Command, tt.wantCommand) } @@ -536,7 +553,7 @@ func TestDefaultRigAgentRegistryPath(t *testing.T) { t.Run(tt.rigPath, func(t *testing.T) { got := DefaultRigAgentRegistryPath(tt.rigPath) want := tt.expectedPath - if got != want { + if filepath.ToSlash(got) != filepath.ToSlash(want) { t.Errorf("DefaultRigAgentRegistryPath(%s) = %s, want %s", tt.rigPath, got, want) } }) diff --git a/internal/config/loader.go b/internal/config/loader.go index 8ba00360..2f56808d 100644 --- a/internal/config/loader.go +++ b/internal/config/loader.go @@ -1353,6 +1353,10 @@ func BuildStartupCommandWithAgentOverride(envVars map[string]string, rigPath, pr if rc.Session != nil && rc.Session.SessionIDEnv != "" { resolvedEnv["GT_SESSION_ID_ENV"] = rc.Session.SessionIDEnv } + // Record agent override so handoff can preserve it + if agentOverride != "" { + resolvedEnv["GT_AGENT"] = agentOverride + } // Build environment export prefix var exports []string @@ -1461,13 +1465,14 @@ func BuildCrewStartupCommandWithAgentOverride(rigName, crewName, rigPath, prompt } // ExpectedPaneCommands returns tmux pane command names that indicate the runtime is running. -// For example, Claude runs as "node", while most other runtimes report their executable name. +// Claude can report as "node" (older versions) or "claude" (newer versions). +// Other runtimes typically report their executable name. func ExpectedPaneCommands(rc *RuntimeConfig) []string { if rc == nil || rc.Command == "" { return nil } if filepath.Base(rc.Command) == "claude" { - return []string{"node"} + return []string{"node", "claude"} } return []string{filepath.Base(rc.Command)} } diff --git a/internal/config/loader_test.go b/internal/config/loader_test.go index 06f86759..09a3be08 100644 --- a/internal/config/loader_test.go +++ b/internal/config/loader_test.go @@ -4,6 +4,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "strings" "testing" "time" @@ -23,6 +24,12 @@ func skipIfAgentBinaryMissing(t *testing.T, agents ...string) { } } +// isClaudeCommand checks if a command is claude (either "claude" or a path ending in "/claude"). +// This handles the case where resolveClaudePath returns the full path to the claude binary. +func isClaudeCommand(cmd string) bool { + return cmd == "claude" || strings.HasSuffix(cmd, "/claude") +} + func TestTownConfigRoundTrip(t *testing.T) { t.Parallel() dir := t.TempDir() @@ -809,7 +816,7 @@ func TestMessagingConfigPath(t *testing.T) { t.Parallel() path := MessagingConfigPath("/home/user/gt") expected := "/home/user/gt/config/messaging.json" - if path != expected { + if filepath.ToSlash(path) != expected { t.Errorf("MessagingConfigPath = %q, want %q", path, expected) } } @@ -820,8 +827,8 @@ func TestRuntimeConfigDefaults(t *testing.T) { if rc.Provider != "claude" { t.Errorf("Provider = %q, want %q", rc.Provider, "claude") } - if rc.Command != "claude" { - t.Errorf("Command = %q, want %q", rc.Command, "claude") + if !isClaudeCommand(rc.Command) { + t.Errorf("Command = %q, want claude or path ending in /claude", rc.Command) } if len(rc.Args) != 1 || rc.Args[0] != "--dangerously-skip-permissions" { t.Errorf("Args = %v, want [--dangerously-skip-permissions]", rc.Args) @@ -834,42 +841,58 @@ func TestRuntimeConfigDefaults(t *testing.T) { func TestRuntimeConfigBuildCommand(t *testing.T) { t.Parallel() tests := []struct { - name string - rc *RuntimeConfig - want string + name string + rc *RuntimeConfig + wantContains []string // Parts the command should contain + isClaudeCmd bool // Whether command should be claude (or path to claude) }{ { - name: "nil config uses defaults", - rc: nil, - want: "claude --dangerously-skip-permissions", + name: "nil config uses defaults", + rc: nil, + wantContains: []string{"--dangerously-skip-permissions"}, + isClaudeCmd: true, }, { - name: "default config", - rc: DefaultRuntimeConfig(), - want: "claude --dangerously-skip-permissions", + name: "default config", + rc: DefaultRuntimeConfig(), + wantContains: []string{"--dangerously-skip-permissions"}, + isClaudeCmd: true, }, { - name: "custom command", - rc: &RuntimeConfig{Command: "aider", Args: []string{"--no-git"}}, - want: "aider --no-git", + name: "custom command", + rc: &RuntimeConfig{Command: "aider", Args: []string{"--no-git"}}, + wantContains: []string{"aider", "--no-git"}, + isClaudeCmd: false, }, { - name: "multiple args", - rc: &RuntimeConfig{Command: "claude", Args: []string{"--model", "opus", "--no-confirm"}}, - want: "claude --model opus --no-confirm", + name: "multiple args", + rc: &RuntimeConfig{Command: "claude", Args: []string{"--model", "opus", "--no-confirm"}}, + wantContains: []string{"--model", "opus", "--no-confirm"}, + isClaudeCmd: true, }, { - name: "empty command uses default", - rc: &RuntimeConfig{Command: "", Args: nil}, - want: "claude --dangerously-skip-permissions", + name: "empty command uses default", + rc: &RuntimeConfig{Command: "", Args: nil}, + wantContains: []string{"--dangerously-skip-permissions"}, + isClaudeCmd: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := tt.rc.BuildCommand() - if got != tt.want { - t.Errorf("BuildCommand() = %q, want %q", got, tt.want) + // Check command contains expected parts + for _, part := range tt.wantContains { + if !strings.Contains(got, part) { + t.Errorf("BuildCommand() = %q, should contain %q", got, part) + } + } + // Check if command starts with claude (or path to claude) + if tt.isClaudeCmd { + parts := strings.Fields(got) + if len(parts) > 0 && !isClaudeCommand(parts[0]) { + t.Errorf("BuildCommand() = %q, command should be claude or path to claude", got) + } } }) } @@ -878,48 +901,64 @@ func TestRuntimeConfigBuildCommand(t *testing.T) { func TestRuntimeConfigBuildCommandWithPrompt(t *testing.T) { t.Parallel() tests := []struct { - name string - rc *RuntimeConfig - prompt string - want string + name string + rc *RuntimeConfig + prompt string + wantContains []string // Parts the command should contain + isClaudeCmd bool // Whether command should be claude (or path to claude) }{ { - name: "no prompt", - rc: DefaultRuntimeConfig(), - prompt: "", - want: "claude --dangerously-skip-permissions", + name: "no prompt", + rc: DefaultRuntimeConfig(), + prompt: "", + wantContains: []string{"--dangerously-skip-permissions"}, + isClaudeCmd: true, }, { - name: "with prompt", - rc: DefaultRuntimeConfig(), - prompt: "gt prime", - want: `claude --dangerously-skip-permissions "gt prime"`, + name: "with prompt", + rc: DefaultRuntimeConfig(), + prompt: "gt prime", + wantContains: []string{"--dangerously-skip-permissions", `"gt prime"`}, + isClaudeCmd: true, }, { - name: "prompt with quotes", - rc: DefaultRuntimeConfig(), - prompt: `Hello "world"`, - want: `claude --dangerously-skip-permissions "Hello \"world\""`, + name: "prompt with quotes", + rc: DefaultRuntimeConfig(), + prompt: `Hello "world"`, + wantContains: []string{"--dangerously-skip-permissions", `"Hello \"world\""`}, + isClaudeCmd: true, }, { - name: "config initial prompt used if no override", - rc: &RuntimeConfig{Command: "aider", Args: []string{}, InitialPrompt: "/help"}, - prompt: "", - want: `aider "/help"`, + name: "config initial prompt used if no override", + rc: &RuntimeConfig{Command: "aider", Args: []string{}, InitialPrompt: "/help"}, + prompt: "", + wantContains: []string{"aider", `"/help"`}, + isClaudeCmd: false, }, { - name: "override takes precedence over config", - rc: &RuntimeConfig{Command: "aider", Args: []string{}, InitialPrompt: "/help"}, - prompt: "custom prompt", - want: `aider "custom prompt"`, + name: "override takes precedence over config", + rc: &RuntimeConfig{Command: "aider", Args: []string{}, InitialPrompt: "/help"}, + prompt: "custom prompt", + wantContains: []string{"aider", `"custom prompt"`}, + isClaudeCmd: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := tt.rc.BuildCommandWithPrompt(tt.prompt) - if got != tt.want { - t.Errorf("BuildCommandWithPrompt(%q) = %q, want %q", tt.prompt, got, tt.want) + // Check command contains expected parts + for _, part := range tt.wantContains { + if !strings.Contains(got, part) { + t.Errorf("BuildCommandWithPrompt(%q) = %q, should contain %q", tt.prompt, got, part) + } + } + // Check if command starts with claude (or path to claude) + if tt.isClaudeCmd { + parts := strings.Fields(got) + if len(parts) > 0 && !isClaudeCommand(parts[0]) { + t.Errorf("BuildCommandWithPrompt(%q) = %q, command should be claude or path to claude", tt.prompt, got) + } } }) } @@ -1050,11 +1089,13 @@ func TestResolveAgentConfigWithOverride(t *testing.T) { if name != "claude-haiku" { t.Fatalf("name = %q, want %q", name, "claude-haiku") } - if rc.Command != "claude" { - t.Fatalf("rc.Command = %q, want %q", rc.Command, "claude") + if !isClaudeCommand(rc.Command) { + t.Fatalf("rc.Command = %q, want claude or path ending in /claude", rc.Command) } - if got := rc.BuildCommand(); got != "claude --model haiku --dangerously-skip-permissions" { - t.Fatalf("BuildCommand() = %q, want %q", got, "claude --model haiku --dangerously-skip-permissions") + got := rc.BuildCommand() + // Check command includes expected flags (path to claude may vary) + if !strings.Contains(got, "--model haiku") || !strings.Contains(got, "--dangerously-skip-permissions") { + t.Fatalf("BuildCommand() = %q, want command with --model haiku and --dangerously-skip-permissions", got) } }) @@ -1217,6 +1258,13 @@ func TestBuildStartupCommand_UsesRoleAgentsFromTownSettings(t *testing.T) { binDir := t.TempDir() for _, name := range []string{"gemini", "codex"} { + if runtime.GOOS == "windows" { + path := filepath.Join(binDir, name+".cmd") + if err := os.WriteFile(path, []byte("@echo off\r\nexit /b 0\r\n"), 0644); err != nil { + t.Fatalf("write %s stub: %v", name, err) + } + continue + } path := filepath.Join(binDir, name) if err := os.WriteFile(path, []byte("#!/bin/sh\nexit 0\n"), 0755); err != nil { t.Fatalf("write %s stub: %v", name, err) @@ -1399,8 +1447,9 @@ func TestResolveRoleAgentConfig_FallsBackOnInvalidAgent(t *testing.T) { // Should fall back to default (claude) when agent is invalid rc := ResolveRoleAgentConfig(constants.RoleRefinery, townRoot, rigPath) - if rc.Command != "claude" { - t.Errorf("expected fallback to claude, got: %s", rc.Command) + // Command can be "claude" or full path to claude + if rc.Command != "claude" && !strings.HasSuffix(rc.Command, "/claude") { + t.Errorf("expected fallback to claude or path ending in /claude, got: %s", rc.Command) } } @@ -1429,10 +1478,11 @@ func TestGetRuntimeCommand_UsesRigAgentWhenRigPathProvided(t *testing.T) { func TestExpectedPaneCommands(t *testing.T) { t.Parallel() - t.Run("claude maps to node", func(t *testing.T) { + t.Run("claude maps to node and claude", func(t *testing.T) { got := ExpectedPaneCommands(&RuntimeConfig{Command: "claude"}) - if len(got) != 1 || got[0] != "node" { - t.Fatalf("ExpectedPaneCommands(claude) = %v, want %v", got, []string{"node"}) + want := []string{"node", "claude"} + if len(got) != 2 || got[0] != "node" || got[1] != "claude" { + t.Fatalf("ExpectedPaneCommands(claude) = %v, want %v", got, want) } }) @@ -1481,8 +1531,8 @@ func TestLoadRuntimeConfigFallsBackToDefaults(t *testing.T) { t.Parallel() // Non-existent path should use defaults rc := LoadRuntimeConfig("/nonexistent/path") - if rc.Command != "claude" { - t.Errorf("Command = %q, want %q (default)", rc.Command, "claude") + if !isClaudeCommand(rc.Command) { + t.Errorf("Command = %q, want claude or path ending in /claude (default)", rc.Command) } } @@ -1595,7 +1645,7 @@ func TestDaemonPatrolConfigPath(t *testing.T) { for _, tt := range tests { t.Run(tt.townRoot, func(t *testing.T) { path := DaemonPatrolConfigPath(tt.townRoot) - if path != tt.expected { + if filepath.ToSlash(path) != filepath.ToSlash(tt.expected) { t.Errorf("DaemonPatrolConfigPath(%q) = %q, want %q", tt.townRoot, path, tt.expected) } }) @@ -1955,7 +2005,12 @@ func TestLookupAgentConfigWithRigSettings(t *testing.T) { t.Errorf("lookupAgentConfig(%s) returned nil", tt.name) } - if rc.Command != tt.expectedCommand { + // For claude commands, allow either "claude" or path ending in /claude + if tt.expectedCommand == "claude" { + if !isClaudeCommand(rc.Command) { + t.Errorf("lookupAgentConfig(%s).Command = %s, want claude or path ending in /claude", tt.name, rc.Command) + } + } else if rc.Command != tt.expectedCommand { t.Errorf("lookupAgentConfig(%s).Command = %s, want %s", tt.name, rc.Command, tt.expectedCommand) } }) @@ -1999,8 +2054,8 @@ func TestResolveRoleAgentConfig(t *testing.T) { t.Run("rig RoleAgents overrides town RoleAgents", func(t *testing.T) { rc := ResolveRoleAgentConfig("witness", townRoot, rigPath) // Should get claude-haiku from rig's RoleAgents - if rc.Command != "claude" { - t.Errorf("Command = %q, want %q", rc.Command, "claude") + if !isClaudeCommand(rc.Command) { + t.Errorf("Command = %q, want claude or path ending in /claude", rc.Command) } cmd := rc.BuildCommand() if !strings.Contains(cmd, "--model haiku") { @@ -2026,9 +2081,9 @@ func TestResolveRoleAgentConfig(t *testing.T) { t.Run("town-level role (no rigPath) uses town RoleAgents", func(t *testing.T) { rc := ResolveRoleAgentConfig("mayor", townRoot, "") - // mayor is in town's RoleAgents - if rc.Command != "claude" { - t.Errorf("Command = %q, want %q", rc.Command, "claude") + // mayor is in town's RoleAgents - command can be "claude" or full path to claude + if rc.Command != "claude" && !strings.HasSuffix(rc.Command, "/claude") { + t.Errorf("Command = %q, want claude or path ending in /claude", rc.Command) } }) } @@ -2529,7 +2584,7 @@ func TestEscalationConfigPath(t *testing.T) { path := EscalationConfigPath("/home/user/gt") expected := "/home/user/gt/settings/escalation.json" - if path != expected { + if filepath.ToSlash(path) != expected { t.Errorf("EscalationConfigPath = %q, want %q", path, expected) } } @@ -2603,3 +2658,122 @@ func TestBuildStartupCommandWithAgentOverride_IncludesGTRoot(t *testing.T) { t.Errorf("expected GT_ROOT=%s in command, got: %q", townRoot, cmd) } } + +func TestQuoteForShell(t *testing.T) { + t.Parallel() + tests := []struct { + name string + input string + want string + }{ + { + name: "simple string", + input: "hello", + want: `"hello"`, + }, + { + name: "string with double quote", + input: `say "hello"`, + want: `"say \"hello\""`, + }, + { + name: "string with backslash", + input: `path\to\file`, + want: `"path\\to\\file"`, + }, + { + name: "string with backtick", + input: "run `cmd`", + want: "\"run \\`cmd\\`\"", + }, + { + name: "string with dollar sign", + input: "cost is $100", + want: `"cost is \$100"`, + }, + { + name: "variable expansion prevented", + input: "$HOME/path", + want: `"\$HOME/path"`, + }, + { + name: "empty string", + input: "", + want: `""`, + }, + { + name: "combined special chars", + input: "`$HOME`", + want: "\"\\`\\$HOME\\`\"", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := quoteForShell(tt.input) + if got != tt.want { + t.Errorf("quoteForShell(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} + +func TestBuildStartupCommandWithAgentOverride_SetsGTAgent(t *testing.T) { + t.Parallel() + townRoot := t.TempDir() + rigPath := filepath.Join(townRoot, "testrig") + + // Create necessary config files + townSettings := NewTownSettings() + if err := SaveTownSettings(TownSettingsPath(townRoot), townSettings); err != nil { + t.Fatalf("SaveTownSettings: %v", err) + } + if err := SaveRigSettings(RigSettingsPath(rigPath), NewRigSettings()); err != nil { + t.Fatalf("SaveRigSettings: %v", err) + } + + cmd, err := BuildStartupCommandWithAgentOverride( + map[string]string{"GT_ROLE": constants.RoleWitness}, + rigPath, + "", + "gemini", + ) + if err != nil { + t.Fatalf("BuildStartupCommandWithAgentOverride: %v", err) + } + + // Should include GT_AGENT=gemini in export so handoff can preserve it + if !strings.Contains(cmd, "GT_AGENT=gemini") { + t.Errorf("expected GT_AGENT=gemini in command, got: %q", cmd) + } +} + +func TestBuildStartupCommandWithAgentOverride_NoGTAgentWhenNoOverride(t *testing.T) { + t.Parallel() + townRoot := t.TempDir() + rigPath := filepath.Join(townRoot, "testrig") + + // Create necessary config files + townSettings := NewTownSettings() + if err := SaveTownSettings(TownSettingsPath(townRoot), townSettings); err != nil { + t.Fatalf("SaveTownSettings: %v", err) + } + if err := SaveRigSettings(RigSettingsPath(rigPath), NewRigSettings()); err != nil { + t.Fatalf("SaveRigSettings: %v", err) + } + + cmd, err := BuildStartupCommandWithAgentOverride( + map[string]string{"GT_ROLE": constants.RoleWitness}, + rigPath, + "", + "", // No override + ) + if err != nil { + t.Fatalf("BuildStartupCommandWithAgentOverride: %v", err) + } + + // Should NOT include GT_AGENT when no override is used + if strings.Contains(cmd, "GT_AGENT=") { + t.Errorf("expected no GT_AGENT in command when no override, got: %q", cmd) + } +} diff --git a/internal/config/roles.go b/internal/config/roles.go new file mode 100644 index 00000000..7b121f6d --- /dev/null +++ b/internal/config/roles.go @@ -0,0 +1,298 @@ +// Package config provides role configuration for Gas Town agents. +package config + +import ( + "embed" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/BurntSushi/toml" +) + +//go:embed roles/*.toml +var defaultRolesFS embed.FS + +// RoleDefinition contains all configuration for a role type. +// This replaces the role bead system with config files. +type RoleDefinition struct { + // Role is the role identifier (mayor, deacon, witness, refinery, polecat, crew, dog). + Role string `toml:"role"` + + // Scope is "town" or "rig" - determines where the agent runs. + Scope string `toml:"scope"` + + // Session contains tmux session configuration. + Session RoleSessionConfig `toml:"session"` + + // Env contains environment variables to set in the session. + Env map[string]string `toml:"env,omitempty"` + + // Health contains health check configuration. + Health RoleHealthConfig `toml:"health"` + + // Nudge is the initial prompt sent when starting the agent. + Nudge string `toml:"nudge,omitempty"` + + // PromptTemplate is the name of the role's prompt template file. + PromptTemplate string `toml:"prompt_template,omitempty"` +} + +// RoleSessionConfig contains session-related configuration. +type RoleSessionConfig struct { + // Pattern is the tmux session name pattern. + // Supports placeholders: {rig}, {name}, {role} + // Examples: "hq-mayor", "gt-{rig}-witness", "gt-{rig}-{name}" + Pattern string `toml:"pattern"` + + // WorkDir is the working directory pattern. + // Supports placeholders: {town}, {rig}, {name}, {role} + // Examples: "{town}", "{town}/{rig}/witness" + WorkDir string `toml:"work_dir"` + + // NeedsPreSync indicates if workspace needs git sync before starting. + NeedsPreSync bool `toml:"needs_pre_sync"` + + // StartCommand is the command to run after creating the session. + // Default: "exec claude --dangerously-skip-permissions" + StartCommand string `toml:"start_command,omitempty"` +} + +// RoleHealthConfig contains health check thresholds. +type RoleHealthConfig struct { + // PingTimeout is how long to wait for a health check response. + PingTimeout Duration `toml:"ping_timeout"` + + // ConsecutiveFailures is how many failed health checks before force-kill. + ConsecutiveFailures int `toml:"consecutive_failures"` + + // KillCooldown is the minimum time between force-kills. + KillCooldown Duration `toml:"kill_cooldown"` + + // StuckThreshold is how long a wisp can be in_progress before considered stuck. + StuckThreshold Duration `toml:"stuck_threshold"` +} + +// Duration is a wrapper for time.Duration that supports TOML marshaling. +type Duration struct { + time.Duration +} + +// UnmarshalText implements encoding.TextUnmarshaler for Duration. +func (d *Duration) UnmarshalText(text []byte) error { + parsed, err := time.ParseDuration(string(text)) + if err != nil { + return fmt.Errorf("invalid duration %q: %w", string(text), err) + } + d.Duration = parsed + return nil +} + +// MarshalText implements encoding.TextMarshaler for Duration. +func (d Duration) MarshalText() ([]byte, error) { + return []byte(d.Duration.String()), nil +} + +// String returns the duration as a string. +func (d Duration) String() string { + return d.Duration.String() +} + +// AllRoles returns the list of all known role names. +func AllRoles() []string { + return []string{"mayor", "deacon", "dog", "witness", "refinery", "polecat", "crew"} +} + +// TownRoles returns roles that operate at town scope. +func TownRoles() []string { + return []string{"mayor", "deacon", "dog"} +} + +// RigRoles returns roles that operate at rig scope. +func RigRoles() []string { + return []string{"witness", "refinery", "polecat", "crew"} +} + +// isValidRoleName checks if the given name is a known role. +func isValidRoleName(name string) bool { + for _, r := range AllRoles() { + if r == name { + return true + } + } + return false +} + +// LoadRoleDefinition loads role configuration with override resolution. +// Resolution order (later overrides earlier): +// 1. Built-in defaults (embedded in binary) +// 2. Town-level overrides (<town>/roles/<role>.toml) +// 3. Rig-level overrides (<rig>/roles/<role>.toml) +// +// Each layer merges with (not replaces) the previous. Users only specify +// fields they want to change. +func LoadRoleDefinition(townRoot, rigPath, roleName string) (*RoleDefinition, error) { + // Validate role name + if !isValidRoleName(roleName) { + return nil, fmt.Errorf("unknown role %q - valid roles: %v", roleName, AllRoles()) + } + + // 1. Load built-in defaults + def, err := loadBuiltinRoleDefinition(roleName) + if err != nil { + return nil, fmt.Errorf("loading built-in role %s: %w", roleName, err) + } + + // 2. Apply town-level overrides if present + townOverridePath := filepath.Join(townRoot, "roles", roleName+".toml") + if override, err := loadRoleOverride(townOverridePath); err == nil { + mergeRoleDefinition(def, override) + } + + // 3. Apply rig-level overrides if present (only for rig-scoped roles) + if rigPath != "" { + rigOverridePath := filepath.Join(rigPath, "roles", roleName+".toml") + if override, err := loadRoleOverride(rigOverridePath); err == nil { + mergeRoleDefinition(def, override) + } + } + + return def, nil +} + +// loadBuiltinRoleDefinition loads a role definition from embedded defaults. +func loadBuiltinRoleDefinition(roleName string) (*RoleDefinition, error) { + data, err := defaultRolesFS.ReadFile("roles/" + roleName + ".toml") + if err != nil { + return nil, fmt.Errorf("role %s not found in defaults: %w", roleName, err) + } + + var def RoleDefinition + if err := toml.Unmarshal(data, &def); err != nil { + return nil, fmt.Errorf("parsing role %s: %w", roleName, err) + } + + return &def, nil +} + +// loadRoleOverride loads a role override from a file path. +// Returns nil, nil if file doesn't exist. +func loadRoleOverride(path string) (*RoleDefinition, error) { + data, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return nil, err // Signal no override exists + } + return nil, fmt.Errorf("reading %s: %w", path, err) + } + + var def RoleDefinition + if err := toml.Unmarshal(data, &def); err != nil { + return nil, fmt.Errorf("parsing %s: %w", path, err) + } + + return &def, nil +} + +// mergeRoleDefinition merges override into base. +// Only non-zero values in override are applied. +func mergeRoleDefinition(base, override *RoleDefinition) { + if override == nil { + return + } + + // Role and Scope are immutable + // (can't change a witness to a mayor via override) + + // Session config + if override.Session.Pattern != "" { + base.Session.Pattern = override.Session.Pattern + } + if override.Session.WorkDir != "" { + base.Session.WorkDir = override.Session.WorkDir + } + // NeedsPreSync can only be enabled via override, not disabled. + // This is intentional: if a role's builtin requires pre-sync (e.g., refinery), + // disabling it would break the role's assumptions about workspace state. + if override.Session.NeedsPreSync { + base.Session.NeedsPreSync = true + } + if override.Session.StartCommand != "" { + base.Session.StartCommand = override.Session.StartCommand + } + + // Env vars (merge, don't replace) + if override.Env != nil { + if base.Env == nil { + base.Env = make(map[string]string) + } + for k, v := range override.Env { + base.Env[k] = v + } + } + + // Health config + if override.Health.PingTimeout.Duration != 0 { + base.Health.PingTimeout = override.Health.PingTimeout + } + if override.Health.ConsecutiveFailures != 0 { + base.Health.ConsecutiveFailures = override.Health.ConsecutiveFailures + } + if override.Health.KillCooldown.Duration != 0 { + base.Health.KillCooldown = override.Health.KillCooldown + } + if override.Health.StuckThreshold.Duration != 0 { + base.Health.StuckThreshold = override.Health.StuckThreshold + } + + // Prompts + if override.Nudge != "" { + base.Nudge = override.Nudge + } + if override.PromptTemplate != "" { + base.PromptTemplate = override.PromptTemplate + } +} + +// ExpandPattern expands placeholders in a pattern string. +// Supported placeholders: {town}, {rig}, {name}, {role} +func ExpandPattern(pattern, townRoot, rig, name, role string) string { + result := pattern + result = strings.ReplaceAll(result, "{town}", townRoot) + result = strings.ReplaceAll(result, "{rig}", rig) + result = strings.ReplaceAll(result, "{name}", name) + result = strings.ReplaceAll(result, "{role}", role) + return result +} + +// ToLegacyRoleConfig converts a RoleDefinition to the legacy RoleConfig format +// for backward compatibility with existing daemon code. +func (rd *RoleDefinition) ToLegacyRoleConfig() *LegacyRoleConfig { + return &LegacyRoleConfig{ + SessionPattern: rd.Session.Pattern, + WorkDirPattern: rd.Session.WorkDir, + NeedsPreSync: rd.Session.NeedsPreSync, + StartCommand: rd.Session.StartCommand, + EnvVars: rd.Env, + PingTimeout: rd.Health.PingTimeout.String(), + ConsecutiveFailures: rd.Health.ConsecutiveFailures, + KillCooldown: rd.Health.KillCooldown.String(), + StuckThreshold: rd.Health.StuckThreshold.String(), + } +} + +// LegacyRoleConfig matches the old beads.RoleConfig struct for compatibility. +// This allows gradual migration without breaking existing code. +type LegacyRoleConfig struct { + SessionPattern string + WorkDirPattern string + NeedsPreSync bool + StartCommand string + EnvVars map[string]string + PingTimeout string + ConsecutiveFailures int + KillCooldown string + StuckThreshold string +} diff --git a/internal/config/roles/crew.toml b/internal/config/roles/crew.toml new file mode 100644 index 00000000..168dd5df --- /dev/null +++ b/internal/config/roles/crew.toml @@ -0,0 +1,23 @@ +# Crew role definition +# Persistent user-managed workspaces. Multiple per rig. + +role = "crew" +scope = "rig" +nudge = "Check your hook and mail, then act accordingly." +prompt_template = "crew.md.tmpl" + +[session] +pattern = "gt-{rig}-crew-{name}" +work_dir = "{town}/{rig}/crew/{name}" +needs_pre_sync = true +start_command = "exec claude --dangerously-skip-permissions" + +[env] +GT_ROLE = "crew" +GT_SCOPE = "rig" + +[health] +ping_timeout = "30s" +consecutive_failures = 3 +kill_cooldown = "5m" +stuck_threshold = "4h" diff --git a/internal/config/roles/deacon.toml b/internal/config/roles/deacon.toml new file mode 100644 index 00000000..fb0593f8 --- /dev/null +++ b/internal/config/roles/deacon.toml @@ -0,0 +1,23 @@ +# Deacon role definition +# Daemon beacon for heartbeats and monitoring. One per town. + +role = "deacon" +scope = "town" +nudge = "Run 'gt prime' to check patrol status and begin heartbeat cycle." +prompt_template = "deacon.md.tmpl" + +[session] +pattern = "hq-deacon" +work_dir = "{town}" +needs_pre_sync = false +start_command = "exec claude --dangerously-skip-permissions" + +[env] +GT_ROLE = "deacon" +GT_SCOPE = "town" + +[health] +ping_timeout = "30s" +consecutive_failures = 3 +kill_cooldown = "5m" +stuck_threshold = "1h" diff --git a/internal/config/roles/dog.toml b/internal/config/roles/dog.toml new file mode 100644 index 00000000..d2e24d55 --- /dev/null +++ b/internal/config/roles/dog.toml @@ -0,0 +1,23 @@ +# Dog role definition +# Town-level workers for cross-rig tasks. Dispatched by Deacon. + +role = "dog" +scope = "town" +nudge = "Check your hook for work assignments." +prompt_template = "dog.md.tmpl" + +[session] +pattern = "gt-dog-{name}" +work_dir = "{town}/deacon/dogs/{name}" +needs_pre_sync = false +start_command = "exec claude --dangerously-skip-permissions" + +[env] +GT_ROLE = "dog" +GT_SCOPE = "town" + +[health] +ping_timeout = "30s" +consecutive_failures = 3 +kill_cooldown = "5m" +stuck_threshold = "2h" diff --git a/internal/config/roles/mayor.toml b/internal/config/roles/mayor.toml new file mode 100644 index 00000000..9aa272be --- /dev/null +++ b/internal/config/roles/mayor.toml @@ -0,0 +1,23 @@ +# Mayor role definition +# Global coordinator for cross-rig work. One per town. + +role = "mayor" +scope = "town" +nudge = "Check mail and hook status, then act accordingly." +prompt_template = "mayor.md.tmpl" + +[session] +pattern = "hq-mayor" +work_dir = "{town}" +needs_pre_sync = false +start_command = "exec claude --dangerously-skip-permissions" + +[env] +GT_ROLE = "mayor" +GT_SCOPE = "town" + +[health] +ping_timeout = "30s" +consecutive_failures = 3 +kill_cooldown = "5m" +stuck_threshold = "1h" diff --git a/internal/config/roles/polecat.toml b/internal/config/roles/polecat.toml new file mode 100644 index 00000000..dbf50f92 --- /dev/null +++ b/internal/config/roles/polecat.toml @@ -0,0 +1,23 @@ +# Polecat role definition +# Ephemeral workers for batch work dispatch. Multiple per rig. + +role = "polecat" +scope = "rig" +nudge = "Check your hook for work assignments." +prompt_template = "polecat.md.tmpl" + +[session] +pattern = "gt-{rig}-{name}" +work_dir = "{town}/{rig}/polecats/{name}" +needs_pre_sync = true +start_command = "exec claude --dangerously-skip-permissions" + +[env] +GT_ROLE = "polecat" +GT_SCOPE = "rig" + +[health] +ping_timeout = "30s" +consecutive_failures = 3 +kill_cooldown = "5m" +stuck_threshold = "2h" diff --git a/internal/config/roles/refinery.toml b/internal/config/roles/refinery.toml new file mode 100644 index 00000000..be684f2f --- /dev/null +++ b/internal/config/roles/refinery.toml @@ -0,0 +1,23 @@ +# Refinery role definition +# Merge queue processor with verification gates. One per rig. + +role = "refinery" +scope = "rig" +nudge = "Run 'gt prime' to check merge queue and begin processing." +prompt_template = "refinery.md.tmpl" + +[session] +pattern = "gt-{rig}-refinery" +work_dir = "{town}/{rig}/refinery/rig" +needs_pre_sync = true +start_command = "exec claude --dangerously-skip-permissions" + +[env] +GT_ROLE = "refinery" +GT_SCOPE = "rig" + +[health] +ping_timeout = "30s" +consecutive_failures = 3 +kill_cooldown = "5m" +stuck_threshold = "2h" diff --git a/internal/config/roles/witness.toml b/internal/config/roles/witness.toml new file mode 100644 index 00000000..d68ac205 --- /dev/null +++ b/internal/config/roles/witness.toml @@ -0,0 +1,23 @@ +# Witness role definition +# Per-rig worker monitor with progressive nudging. One per rig. + +role = "witness" +scope = "rig" +nudge = "Run 'gt prime' to check worker status and begin patrol cycle." +prompt_template = "witness.md.tmpl" + +[session] +pattern = "gt-{rig}-witness" +work_dir = "{town}/{rig}/witness" +needs_pre_sync = false +start_command = "exec claude --dangerously-skip-permissions" + +[env] +GT_ROLE = "witness" +GT_SCOPE = "rig" + +[health] +ping_timeout = "30s" +consecutive_failures = 3 +kill_cooldown = "5m" +stuck_threshold = "1h" diff --git a/internal/config/roles_test.go b/internal/config/roles_test.go new file mode 100644 index 00000000..bf79f33d --- /dev/null +++ b/internal/config/roles_test.go @@ -0,0 +1,272 @@ +package config + +import ( + "strings" + "testing" + "time" +) + +func TestLoadBuiltinRoleDefinition(t *testing.T) { + tests := []struct { + name string + role string + wantScope string + wantPattern string + wantPreSync bool + }{ + { + name: "mayor", + role: "mayor", + wantScope: "town", + wantPattern: "hq-mayor", + wantPreSync: false, + }, + { + name: "deacon", + role: "deacon", + wantScope: "town", + wantPattern: "hq-deacon", + wantPreSync: false, + }, + { + name: "witness", + role: "witness", + wantScope: "rig", + wantPattern: "gt-{rig}-witness", + wantPreSync: false, + }, + { + name: "refinery", + role: "refinery", + wantScope: "rig", + wantPattern: "gt-{rig}-refinery", + wantPreSync: true, + }, + { + name: "polecat", + role: "polecat", + wantScope: "rig", + wantPattern: "gt-{rig}-{name}", + wantPreSync: true, + }, + { + name: "crew", + role: "crew", + wantScope: "rig", + wantPattern: "gt-{rig}-crew-{name}", + wantPreSync: true, + }, + { + name: "dog", + role: "dog", + wantScope: "town", + wantPattern: "gt-dog-{name}", + wantPreSync: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + def, err := loadBuiltinRoleDefinition(tt.role) + if err != nil { + t.Fatalf("loadBuiltinRoleDefinition(%s) error: %v", tt.role, err) + } + + if def.Role != tt.role { + t.Errorf("Role = %q, want %q", def.Role, tt.role) + } + if def.Scope != tt.wantScope { + t.Errorf("Scope = %q, want %q", def.Scope, tt.wantScope) + } + if def.Session.Pattern != tt.wantPattern { + t.Errorf("Session.Pattern = %q, want %q", def.Session.Pattern, tt.wantPattern) + } + if def.Session.NeedsPreSync != tt.wantPreSync { + t.Errorf("Session.NeedsPreSync = %v, want %v", def.Session.NeedsPreSync, tt.wantPreSync) + } + + // Verify health config has reasonable defaults + if def.Health.PingTimeout.Duration == 0 { + t.Error("Health.PingTimeout should not be zero") + } + if def.Health.ConsecutiveFailures == 0 { + t.Error("Health.ConsecutiveFailures should not be zero") + } + }) + } +} + +func TestLoadBuiltinRoleDefinition_UnknownRole(t *testing.T) { + _, err := loadBuiltinRoleDefinition("nonexistent") + if err == nil { + t.Error("expected error for unknown role, got nil") + } +} + +func TestLoadRoleDefinition_UnknownRole(t *testing.T) { + _, err := LoadRoleDefinition("/tmp/town", "", "nonexistent") + if err == nil { + t.Error("expected error for unknown role, got nil") + } + // Should have a clear error message, not a cryptic embed error + if !strings.Contains(err.Error(), "unknown role") { + t.Errorf("error should mention 'unknown role', got: %v", err) + } +} + +func TestAllRoles(t *testing.T) { + roles := AllRoles() + if len(roles) != 7 { + t.Errorf("AllRoles() returned %d roles, want 7", len(roles)) + } + + expected := map[string]bool{ + "mayor": true, + "deacon": true, + "dog": true, + "witness": true, + "refinery": true, + "polecat": true, + "crew": true, + } + + for _, r := range roles { + if !expected[r] { + t.Errorf("unexpected role %q in AllRoles()", r) + } + } +} + +func TestTownRoles(t *testing.T) { + roles := TownRoles() + if len(roles) != 3 { + t.Errorf("TownRoles() returned %d roles, want 3", len(roles)) + } + + for _, r := range roles { + def, err := loadBuiltinRoleDefinition(r) + if err != nil { + t.Fatalf("loadBuiltinRoleDefinition(%s) error: %v", r, err) + } + if def.Scope != "town" { + t.Errorf("role %s has scope %q, expected 'town'", r, def.Scope) + } + } +} + +func TestRigRoles(t *testing.T) { + roles := RigRoles() + if len(roles) != 4 { + t.Errorf("RigRoles() returned %d roles, want 4", len(roles)) + } + + for _, r := range roles { + def, err := loadBuiltinRoleDefinition(r) + if err != nil { + t.Fatalf("loadBuiltinRoleDefinition(%s) error: %v", r, err) + } + if def.Scope != "rig" { + t.Errorf("role %s has scope %q, expected 'rig'", r, def.Scope) + } + } +} + +func TestExpandPattern(t *testing.T) { + tests := []struct { + pattern string + town string + rig string + name string + role string + expected string + }{ + { + pattern: "{town}", + town: "/home/user/gt", + expected: "/home/user/gt", + }, + { + pattern: "gt-{rig}-witness", + rig: "gastown", + expected: "gt-gastown-witness", + }, + { + pattern: "{town}/{rig}/crew/{name}", + town: "/home/user/gt", + rig: "gastown", + name: "max", + expected: "/home/user/gt/gastown/crew/max", + }, + } + + for _, tt := range tests { + t.Run(tt.pattern, func(t *testing.T) { + got := ExpandPattern(tt.pattern, tt.town, tt.rig, tt.name, tt.role) + if got != tt.expected { + t.Errorf("ExpandPattern() = %q, want %q", got, tt.expected) + } + }) + } +} + +func TestDuration_UnmarshalText(t *testing.T) { + tests := []struct { + input string + expected time.Duration + }{ + {"30s", 30 * time.Second}, + {"5m", 5 * time.Minute}, + {"1h", time.Hour}, + {"1h30m", time.Hour + 30*time.Minute}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + var d Duration + if err := d.UnmarshalText([]byte(tt.input)); err != nil { + t.Fatalf("UnmarshalText() error: %v", err) + } + if d.Duration != tt.expected { + t.Errorf("Duration = %v, want %v", d.Duration, tt.expected) + } + }) + } +} + +func TestToLegacyRoleConfig(t *testing.T) { + def := &RoleDefinition{ + Role: "witness", + Scope: "rig", + Session: RoleSessionConfig{ + Pattern: "gt-{rig}-witness", + WorkDir: "{town}/{rig}/witness", + NeedsPreSync: false, + StartCommand: "exec claude", + }, + Env: map[string]string{"GT_ROLE": "witness"}, + Health: RoleHealthConfig{ + PingTimeout: Duration{30 * time.Second}, + ConsecutiveFailures: 3, + KillCooldown: Duration{5 * time.Minute}, + StuckThreshold: Duration{time.Hour}, + }, + } + + legacy := def.ToLegacyRoleConfig() + + if legacy.SessionPattern != "gt-{rig}-witness" { + t.Errorf("SessionPattern = %q, want %q", legacy.SessionPattern, "gt-{rig}-witness") + } + if legacy.WorkDirPattern != "{town}/{rig}/witness" { + t.Errorf("WorkDirPattern = %q, want %q", legacy.WorkDirPattern, "{town}/{rig}/witness") + } + if legacy.NeedsPreSync != false { + t.Errorf("NeedsPreSync = %v, want false", legacy.NeedsPreSync) + } + if legacy.PingTimeout != "30s" { + t.Errorf("PingTimeout = %q, want %q", legacy.PingTimeout, "30s") + } + if legacy.ConsecutiveFailures != 3 { + t.Errorf("ConsecutiveFailures = %d, want 3", legacy.ConsecutiveFailures) + } +} diff --git a/internal/config/types.go b/internal/config/types.go index 95427450..9489e0de 100644 --- a/internal/config/types.go +++ b/internal/config/types.go @@ -2,8 +2,9 @@ package config import ( - "path/filepath" "os" + "os/exec" + "path/filepath" "strings" "time" ) @@ -469,10 +470,35 @@ func defaultRuntimeCommand(provider string) string { case "generic": return "" default: - return "claude" + return resolveClaudePath() } } +// resolveClaudePath finds the claude binary, checking PATH first then common installation locations. +// This handles the case where claude is installed as an alias (not in PATH) which doesn't work +// in non-interactive shells spawned by tmux. +func resolveClaudePath() string { + // First, try to find claude in PATH + if path, err := exec.LookPath("claude"); err == nil { + return path + } + + // Check common Claude Code installation locations + home, err := os.UserHomeDir() + if err != nil { + return "claude" // Fall back to bare command + } + + // Standard Claude Code installation path + claudePath := filepath.Join(home, ".claude", "local", "claude") + if _, err := os.Stat(claudePath); err == nil { + return claudePath + } + + // Fall back to bare command (might work if PATH is set differently in tmux) + return "claude" +} + func defaultRuntimeArgs(provider string) []string { switch provider { case "claude": @@ -552,7 +578,8 @@ func defaultProcessNames(provider, command string) []string { func defaultReadyPromptPrefix(provider string) string { if provider == "claude" { - return "> " + // Claude Code uses ❯ (U+276F) as the prompt character + return "❯ " } return "" } @@ -579,9 +606,15 @@ func defaultInstructionsFile(provider string) string { // quoteForShell quotes a string for safe shell usage. func quoteForShell(s string) string { - // Simple quoting: wrap in double quotes, escape internal quotes + // Wrap in double quotes, escaping characters that are special in double-quoted strings: + // - backslash (escape character) + // - double quote (string delimiter) + // - backtick (command substitution) + // - dollar sign (variable expansion) escaped := strings.ReplaceAll(s, `\`, `\\`) escaped = strings.ReplaceAll(escaped, `"`, `\"`) + escaped = strings.ReplaceAll(escaped, "`", "\\`") + escaped = strings.ReplaceAll(escaped, "$", `\$`) return `"` + escaped + `"` } diff --git a/internal/connection/connection.go b/internal/connection/connection.go index f273708c..e479333f 100644 --- a/internal/connection/connection.go +++ b/internal/connection/connection.go @@ -62,6 +62,7 @@ type Connection interface { TmuxNewSession(name, dir string) error // TmuxKillSession terminates the named tmux session. + // Uses KillSessionWithProcesses internally to ensure all descendant processes are killed. TmuxKillSession(name string) error // TmuxSendKeys sends keys to the named tmux session. diff --git a/internal/connection/local.go b/internal/connection/local.go index 0bbbcd02..c57dce48 100644 --- a/internal/connection/local.go +++ b/internal/connection/local.go @@ -161,8 +161,9 @@ func (c *LocalConnection) TmuxNewSession(name, dir string) error { } // TmuxKillSession terminates a tmux session. +// Uses KillSessionWithProcesses to ensure all descendant processes are killed. func (c *LocalConnection) TmuxKillSession(name string) error { - return c.tmux.KillSession(name) + return c.tmux.KillSessionWithProcesses(name) } // TmuxSendKeys sends keys to a tmux session. diff --git a/internal/convoy/observer.go b/internal/convoy/observer.go new file mode 100644 index 00000000..588dba87 --- /dev/null +++ b/internal/convoy/observer.go @@ -0,0 +1,136 @@ +// Package convoy provides shared convoy operations for redundant observers. +package convoy + +import ( + "bytes" + "encoding/json" + "fmt" + "os/exec" + "path/filepath" + "strings" +) + +// CheckConvoysForIssue finds any convoys tracking the given issue and triggers +// convoy completion checks. This enables redundant convoy observation from +// multiple agents (Witness, Refinery, Daemon). +// +// The check is idempotent - running it multiple times for the same issue is safe. +// The underlying `gt convoy check` handles already-closed convoys gracefully. +// +// Parameters: +// - townRoot: path to the town root directory +// - issueID: the issue ID that was just closed +// - observer: identifier for logging (e.g., "witness", "refinery") +// - logger: optional logger function (can be nil) +// +// Returns the convoy IDs that were checked (may be empty if issue isn't tracked). +func CheckConvoysForIssue(townRoot, issueID, observer string, logger func(format string, args ...interface{})) []string { + if logger == nil { + logger = func(format string, args ...interface{}) {} // no-op + } + + // Find convoys tracking this issue + convoyIDs := getTrackingConvoys(townRoot, issueID) + if len(convoyIDs) == 0 { + return nil + } + + logger("%s: issue %s is tracked by %d convoy(s): %v", observer, issueID, len(convoyIDs), convoyIDs) + + // Run convoy check for each tracking convoy + // Note: gt convoy check is idempotent and handles already-closed convoys + for _, convoyID := range convoyIDs { + if isConvoyClosed(townRoot, convoyID) { + logger("%s: convoy %s already closed, skipping", observer, convoyID) + continue + } + + logger("%s: running convoy check for %s", observer, convoyID) + if err := runConvoyCheck(townRoot); err != nil { + logger("%s: convoy check failed: %v", observer, err) + } + } + + return convoyIDs +} + +// getTrackingConvoys returns convoy IDs that track the given issue. +// Uses direct SQLite query for efficiency (same approach as daemon/convoy_watcher). +func getTrackingConvoys(townRoot, issueID string) []string { + townBeads := filepath.Join(townRoot, ".beads") + dbPath := filepath.Join(townBeads, "beads.db") + + // Query for convoys that track this issue + // Handle both direct ID and external reference format + safeIssueID := strings.ReplaceAll(issueID, "'", "''") + + // Query for dependencies where this issue is the target + // Convoys use "tracks" type: convoy -> tracked issue (depends_on_id) + query := fmt.Sprintf(` + SELECT DISTINCT issue_id FROM dependencies + WHERE type = 'tracks' + AND (depends_on_id = '%s' OR depends_on_id LIKE '%%:%s') + `, safeIssueID, safeIssueID) + + queryCmd := exec.Command("sqlite3", "-json", dbPath, query) + var stdout bytes.Buffer + queryCmd.Stdout = &stdout + + if err := queryCmd.Run(); err != nil { + return nil + } + + var results []struct { + IssueID string `json:"issue_id"` + } + if err := json.Unmarshal(stdout.Bytes(), &results); err != nil { + return nil + } + + convoyIDs := make([]string, 0, len(results)) + for _, r := range results { + convoyIDs = append(convoyIDs, r.IssueID) + } + return convoyIDs +} + +// isConvoyClosed checks if a convoy is already closed. +func isConvoyClosed(townRoot, convoyID string) bool { + townBeads := filepath.Join(townRoot, ".beads") + dbPath := filepath.Join(townBeads, "beads.db") + + safeConvoyID := strings.ReplaceAll(convoyID, "'", "''") + query := fmt.Sprintf(`SELECT status FROM issues WHERE id = '%s'`, safeConvoyID) + + queryCmd := exec.Command("sqlite3", "-json", dbPath, query) + var stdout bytes.Buffer + queryCmd.Stdout = &stdout + + if err := queryCmd.Run(); err != nil { + return false + } + + var results []struct { + Status string `json:"status"` + } + if err := json.Unmarshal(stdout.Bytes(), &results); err != nil || len(results) == 0 { + return false + } + + return results[0].Status == "closed" +} + +// runConvoyCheck runs `gt convoy check` to close any completed convoys. +// This is idempotent and handles already-closed convoys gracefully. +func runConvoyCheck(townRoot string) error { + cmd := exec.Command("gt", "convoy", "check") + cmd.Dir = townRoot + var stderr bytes.Buffer + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return fmt.Errorf("%v: %s", err, stderr.String()) + } + + return nil +} diff --git a/internal/crew/manager.go b/internal/crew/manager.go index 97233cf1..111e7aae 100644 --- a/internal/crew/manager.go +++ b/internal/crew/manager.go @@ -188,6 +188,12 @@ func (m *Manager) Add(name string, createBranch bool) (*CrewWorker, error) { fmt.Printf("Warning: could not copy overlay files: %v\n", err) } + // Ensure .gitignore has required Gas Town patterns + if err := rig.EnsureGitignorePatterns(crewPath); err != nil { + // Non-fatal - log warning but continue + fmt.Printf("Warning: could not update .gitignore: %v\n", err) + } + // NOTE: Slash commands (.claude/commands/) are provisioned at town level by gt install. // All agents inherit them via Claude's directory traversal - no per-workspace copies needed. @@ -315,16 +321,15 @@ func (m *Manager) loadState(name string) (*CrewWorker, error) { return nil, fmt.Errorf("parsing state: %w", err) } - // Backfill essential fields if missing (handles empty or incomplete state.json) - if crew.Name == "" { - crew.Name = name - } + // Directory name is source of truth for Name and ClonePath. + // state.json can become stale after directory rename, copy, or corruption. + crew.Name = name + crew.ClonePath = m.crewDir(name) + + // Rig only needs backfill when empty (less likely to drift) if crew.Rig == "" { crew.Rig = m.rig.Name } - if crew.ClonePath == "" { - crew.ClonePath = m.crewDir(name) - } return &crew, nil } @@ -465,8 +470,9 @@ func (m *Manager) Start(name string, opts StartOptions) error { } if running { if opts.KillExisting { - // Restart mode - kill existing session - if err := t.KillSession(sessionID); err != nil { + // Restart mode - kill existing session. + // Use KillSessionWithProcesses to ensure all descendant processes are killed. + if err := t.KillSessionWithProcesses(sessionID); err != nil { return fmt.Errorf("killing existing session: %w", err) } } else { @@ -474,8 +480,9 @@ func (m *Manager) Start(name string, opts StartOptions) error { if t.IsClaudeRunning(sessionID) { return fmt.Errorf("%w: %s", ErrSessionRunning, sessionID) } - // Zombie session - kill and recreate - if err := t.KillSession(sessionID); err != nil { + // Zombie session - kill and recreate. + // Use KillSessionWithProcesses to ensure all descendant processes are killed. + if err := t.KillSessionWithProcesses(sessionID); err != nil { return fmt.Errorf("killing zombie session: %w", err) } } @@ -568,8 +575,10 @@ func (m *Manager) Stop(name string) error { return ErrSessionNotFound } - // Kill the session - if err := t.KillSession(sessionID); err != nil { + // Kill the session. + // Use KillSessionWithProcesses to ensure all descendant processes are killed. + // This prevents orphan bash processes from Claude's Bash tool surviving session termination. + if err := t.KillSessionWithProcesses(sessionID); err != nil { return fmt.Errorf("killing session: %w", err) } @@ -582,3 +591,4 @@ func (m *Manager) IsRunning(name string) (bool, error) { sessionID := m.SessionName(name) return t.HasSession(sessionID) } + diff --git a/internal/crew/manager_test.go b/internal/crew/manager_test.go index d1996945..2db2b773 100644 --- a/internal/crew/manager_test.go +++ b/internal/crew/manager_test.go @@ -342,6 +342,56 @@ func TestManagerRemove(t *testing.T) { } } +func TestManagerGetWithStaleStateName(t *testing.T) { + // Regression test: state.json with wrong name should not affect Get() result + // See: gt-h1w - gt crew list shows wrong names + tmpDir, err := os.MkdirTemp("", "crew-test-stale-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + defer func() { _ = os.RemoveAll(tmpDir) }() + + rigPath := filepath.Join(tmpDir, "test-rig") + if err := os.MkdirAll(rigPath, 0755); err != nil { + t.Fatalf("failed to create rig dir: %v", err) + } + + r := &rig.Rig{ + Name: "test-rig", + Path: rigPath, + } + + mgr := NewManager(r, git.NewGit(rigPath)) + + // Manually create a crew directory with wrong name in state.json + crewDir := filepath.Join(rigPath, "crew", "alice") + if err := os.MkdirAll(crewDir, 0755); err != nil { + t.Fatalf("failed to create crew dir: %v", err) + } + + // Write state.json with wrong name (simulates stale/copied state) + stateFile := filepath.Join(crewDir, "state.json") + staleState := `{"name": "bob", "rig": "test-rig", "clone_path": "/wrong/path"}` + if err := os.WriteFile(stateFile, []byte(staleState), 0644); err != nil { + t.Fatalf("failed to write state file: %v", err) + } + + // Get should return correct name (alice) not stale name (bob) + worker, err := mgr.Get("alice") + if err != nil { + t.Fatalf("Get failed: %v", err) + } + + if worker.Name != "alice" { + t.Errorf("expected name 'alice', got '%s' (stale state.json not overridden)", worker.Name) + } + + expectedPath := filepath.Join(rigPath, "crew", "alice") + if worker.ClonePath != expectedPath { + t.Errorf("expected clone_path '%s', got '%s'", expectedPath, worker.ClonePath) + } +} + // Helper to run commands func runCmd(name string, args ...string) error { cmd := exec.Command(name, args...) diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index 3f3b6a6b..24cb021e 100755 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -430,9 +430,10 @@ func (d *Daemon) checkDeaconHeartbeat() { // Session exists but heartbeat is stale - Deacon is stuck if age > 30*time.Minute { - // Very stuck - restart the session + // Very stuck - restart the session. + // Use KillSessionWithProcesses to ensure all descendant processes are killed. d.logger.Printf("Deacon stuck for %s - restarting session", age.Round(time.Minute)) - if err := d.tmux.KillSession(sessionName); err != nil { + if err := d.tmux.KillSessionWithProcesses(sessionName); err != nil { d.logger.Printf("Error killing stuck Deacon: %v", err) } // ensureDeaconRunning will restart on next heartbeat @@ -680,31 +681,62 @@ func IsRunning(townRoot string) (bool, int, error) { if os.IsNotExist(err) { return false, 0, nil } - return false, 0, err + // Return error for other failures (permissions, I/O) + return false, 0, fmt.Errorf("reading PID file: %w", err) } - pid, err := strconv.Atoi(string(data)) + pidStr := strings.TrimSpace(string(data)) + pid, err := strconv.Atoi(pidStr) if err != nil { - return false, 0, nil + // Corrupted PID file - return error, not silent false + return false, 0, fmt.Errorf("invalid PID in file %q: %w", pidStr, err) } - // Check if process is running + // Check if process is alive process, err := os.FindProcess(pid) if err != nil { return false, 0, nil } // On Unix, FindProcess always succeeds. Send signal 0 to check if alive. - err = process.Signal(syscall.Signal(0)) - if err != nil { + if err := process.Signal(syscall.Signal(0)); err != nil { // Process not running, clean up stale PID file - _ = os.Remove(pidFile) + if err := os.Remove(pidFile); err == nil { + // Successfully cleaned up stale file + return false, 0, fmt.Errorf("removed stale PID file (process %d not found)", pid) + } + return false, 0, nil + } + + // CRITICAL: Verify it's actually our daemon, not PID reuse + if !isGasTownDaemon(pid) { + // PID reused by different process + if err := os.Remove(pidFile); err == nil { + return false, 0, fmt.Errorf("removed stale PID file (PID %d is not gt daemon)", pid) + } return false, 0, nil } return true, pid, nil } +// isGasTownDaemon checks if a PID is actually a gt daemon run process. +// This prevents false positives from PID reuse. +// Uses ps command for cross-platform compatibility (Linux, macOS). +func isGasTownDaemon(pid int) bool { + // Use ps to get command for the PID (works on Linux and macOS) + cmd := exec.Command("ps", "-p", strconv.Itoa(pid), "-o", "command=") + output, err := cmd.Output() + if err != nil { + return false + } + + cmdline := strings.TrimSpace(string(output)) + + // Check if it's "gt daemon run" or "/path/to/gt daemon run" + return strings.Contains(cmdline, "gt") && strings.Contains(cmdline, "daemon") && strings.Contains(cmdline, "run") +} + // StopDaemon stops the running daemon for the given town. // Note: The file lock in Run() prevents multiple daemons per town, so we only // need to kill the process from the PID file. @@ -743,6 +775,74 @@ func StopDaemon(townRoot string) error { return nil } +// FindOrphanedDaemons finds all gt daemon run processes that aren't tracked by PID file. +// Returns list of orphaned PIDs. +func FindOrphanedDaemons() ([]int, error) { + // Use pgrep to find all "daemon run" processes (broad search, then verify with isGasTownDaemon) + cmd := exec.Command("pgrep", "-f", "daemon run") + output, err := cmd.Output() + if err != nil { + // Exit code 1 means no processes found - that's OK + if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 { + return nil, nil + } + return nil, fmt.Errorf("pgrep failed: %w", err) + } + + // Parse PIDs + var pids []int + for _, line := range strings.Split(strings.TrimSpace(string(output)), "\n") { + if line == "" { + continue + } + pid, err := strconv.Atoi(line) + if err != nil { + continue + } + // Verify it's actually gt daemon (filters out unrelated processes) + if isGasTownDaemon(pid) { + pids = append(pids, pid) + } + } + + return pids, nil +} + +// KillOrphanedDaemons finds and kills any orphaned gt daemon processes. +// Returns number of processes killed. +func KillOrphanedDaemons() (int, error) { + pids, err := FindOrphanedDaemons() + if err != nil { + return 0, err + } + + killed := 0 + for _, pid := range pids { + process, err := os.FindProcess(pid) + if err != nil { + continue + } + + // Try SIGTERM first + if err := process.Signal(syscall.SIGTERM); err != nil { + continue + } + + // Wait for graceful shutdown + time.Sleep(200 * time.Millisecond) + + // Check if still alive + if err := process.Signal(syscall.Signal(0)); err == nil { + // Still alive, force kill + _ = process.Signal(syscall.SIGKILL) + } + + killed++ + } + + return killed, nil +} + // checkPolecatSessionHealth proactively validates polecat tmux sessions. // This detects crashed polecats that: // 1. Have work-on-hook (assigned work) diff --git a/internal/daemon/lifecycle.go b/internal/daemon/lifecycle.go index a1324928..65ab1689 100644 --- a/internal/daemon/lifecycle.go +++ b/internal/daemon/lifecycle.go @@ -179,7 +179,9 @@ func (d *Daemon) executeLifecycleAction(request *LifecycleRequest) error { switch request.Action { case ActionShutdown: if running { - if err := d.tmux.KillSession(sessionName); err != nil { + // Use KillSessionWithProcesses to ensure all descendant processes are killed. + // This prevents orphan bash processes from Claude's Bash tool surviving session termination. + if err := d.tmux.KillSessionWithProcesses(sessionName); err != nil { return fmt.Errorf("killing session: %w", err) } d.logger.Printf("Killed session %s", sessionName) @@ -188,8 +190,8 @@ func (d *Daemon) executeLifecycleAction(request *LifecycleRequest) error { case ActionCycle, ActionRestart: if running { - // Kill the session first - if err := d.tmux.KillSession(sessionName); err != nil { + // Kill the session first - use KillSessionWithProcesses to prevent orphan processes. + if err := d.tmux.KillSessionWithProcesses(sessionName); err != nil { return fmt.Errorf("killing session: %w", err) } d.logger.Printf("Killed session %s for restart", sessionName) @@ -211,7 +213,7 @@ func (d *Daemon) executeLifecycleAction(request *LifecycleRequest) error { } // ParsedIdentity holds the components extracted from an agent identity string. -// This is used to look up the appropriate role bead for lifecycle config. +// This is used to look up the appropriate role config for lifecycle management. type ParsedIdentity struct { RoleType string // mayor, deacon, witness, refinery, crew, polecat RigName string // Empty for town-level agents (mayor, deacon) @@ -220,7 +222,7 @@ type ParsedIdentity struct { // parseIdentity extracts role type, rig name, and agent name from an identity string. // This is the ONLY place where identity string patterns are parsed. -// All other functions should use the extracted components to look up role beads. +// All other functions should use the extracted components to look up role config. func parseIdentity(identity string) (*ParsedIdentity, error) { switch identity { case "mayor": @@ -268,49 +270,50 @@ func parseIdentity(identity string) (*ParsedIdentity, error) { return nil, fmt.Errorf("unknown identity format: %s", identity) } -// getRoleConfigForIdentity looks up the role bead for an identity and returns its config. -// Falls back to default config if role bead doesn't exist or has no config. +// getRoleConfigForIdentity loads role configuration from the config-based role system. +// Uses config.LoadRoleDefinition() with layered override resolution (builtin → town → rig). +// Returns config in beads.RoleConfig format for backward compatibility. func (d *Daemon) getRoleConfigForIdentity(identity string) (*beads.RoleConfig, *ParsedIdentity, error) { parsed, err := parseIdentity(identity) if err != nil { return nil, nil, err } - // Look up role bead - b := beads.New(d.config.TownRoot) + // Determine rig path for rig-scoped roles + rigPath := "" + if parsed.RigName != "" { + rigPath = filepath.Join(d.config.TownRoot, parsed.RigName) + } - roleBeadID := beads.RoleBeadIDTown(parsed.RoleType) - roleConfig, err := b.GetRoleConfig(roleBeadID) + // Load role definition from config system (Phase 2: config-based roles) + roleDef, err := config.LoadRoleDefinition(d.config.TownRoot, rigPath, parsed.RoleType) if err != nil { - d.logger.Printf("Warning: failed to get role config for %s: %v", roleBeadID, err) + d.logger.Printf("Warning: failed to load role definition for %s: %v", parsed.RoleType, err) + // Return parsed identity even if config fails (caller can use defaults) + return nil, parsed, nil } - // Backward compatibility: fall back to legacy role bead IDs. - if roleConfig == nil { - legacyRoleBeadID := beads.RoleBeadID(parsed.RoleType) // gt-<role>-role - if legacyRoleBeadID != roleBeadID { - legacyCfg, legacyErr := b.GetRoleConfig(legacyRoleBeadID) - if legacyErr != nil { - d.logger.Printf("Warning: failed to get legacy role config for %s: %v", legacyRoleBeadID, legacyErr) - } else if legacyCfg != nil { - roleConfig = legacyCfg - } - } + // Convert to beads.RoleConfig for backward compatibility + roleConfig := &beads.RoleConfig{ + SessionPattern: roleDef.Session.Pattern, + WorkDirPattern: roleDef.Session.WorkDir, + NeedsPreSync: roleDef.Session.NeedsPreSync, + StartCommand: roleDef.Session.StartCommand, + EnvVars: roleDef.Env, } - // Return parsed identity even if config is nil (caller can use defaults) return roleConfig, parsed, nil } // identityToSession converts a beads identity to a tmux session name. -// Uses role bead config if available, falls back to hardcoded patterns. +// Uses role config if available, falls back to hardcoded patterns. func (d *Daemon) identityToSession(identity string) string { config, parsed, err := d.getRoleConfigForIdentity(identity) if err != nil { return "" } - // If role bead has session_pattern, use it + // If role config has session_pattern, use it if config != nil && config.SessionPattern != "" { return beads.ExpandRolePattern(config.SessionPattern, d.config.TownRoot, parsed.RigName, parsed.AgentName, parsed.RoleType) } @@ -333,7 +336,7 @@ func (d *Daemon) identityToSession(identity string) string { } // restartSession starts a new session for the given agent. -// Uses role bead config if available, falls back to hardcoded defaults. +// Uses role config if available, falls back to hardcoded defaults. func (d *Daemon) restartSession(sessionName, identity string) error { // Get role config for this identity config, parsed, err := d.getRoleConfigForIdentity(identity) @@ -409,9 +412,9 @@ func (d *Daemon) restartSession(sessionName, identity string) error { } // getWorkDir determines the working directory for an agent. -// Uses role bead config if available, falls back to hardcoded defaults. +// Uses role config if available, falls back to hardcoded defaults. func (d *Daemon) getWorkDir(config *beads.RoleConfig, parsed *ParsedIdentity) string { - // If role bead has work_dir_pattern, use it + // If role config has work_dir_pattern, use it if config != nil && config.WorkDirPattern != "" { return beads.ExpandRolePattern(config.WorkDirPattern, d.config.TownRoot, parsed.RigName, parsed.AgentName, parsed.RoleType) } @@ -442,9 +445,9 @@ func (d *Daemon) getWorkDir(config *beads.RoleConfig, parsed *ParsedIdentity) st } // getNeedsPreSync determines if a workspace needs git sync before starting. -// Uses role bead config if available, falls back to hardcoded defaults. +// Uses role config if available, falls back to hardcoded defaults. func (d *Daemon) getNeedsPreSync(config *beads.RoleConfig, parsed *ParsedIdentity) bool { - // If role bead has explicit config, use it + // If role config is available, use it if config != nil { return config.NeedsPreSync } @@ -459,9 +462,9 @@ func (d *Daemon) getNeedsPreSync(config *beads.RoleConfig, parsed *ParsedIdentit } // getStartCommand determines the startup command for an agent. -// Uses role bead config if available, then role-based agent selection, then hardcoded defaults. +// Uses role config if available, then role-based agent selection, then hardcoded defaults. func (d *Daemon) getStartCommand(roleConfig *beads.RoleConfig, parsed *ParsedIdentity) string { - // If role bead has explicit config, use it + // If role config is available, use it if roleConfig != nil && roleConfig.StartCommand != "" { // Expand any patterns in the command return beads.ExpandRolePattern(roleConfig.StartCommand, d.config.TownRoot, parsed.RigName, parsed.AgentName, parsed.RoleType) @@ -516,7 +519,7 @@ func (d *Daemon) getStartCommand(roleConfig *beads.RoleConfig, parsed *ParsedIde } // setSessionEnvironment sets environment variables for the tmux session. -// Uses centralized AgentEnv for consistency, plus role bead custom env vars if available. +// Uses centralized AgentEnv for consistency, plus custom env vars from role config if available. func (d *Daemon) setSessionEnvironment(sessionName string, roleConfig *beads.RoleConfig, parsed *ParsedIdentity) { // Use centralized AgentEnv for base environment variables envVars := config.AgentEnv(config.AgentEnvConfig{ @@ -529,7 +532,7 @@ func (d *Daemon) setSessionEnvironment(sessionName string, roleConfig *beads.Rol _ = d.tmux.SetEnvironment(sessionName, k, v) } - // Set any custom env vars from role config (bead-defined overrides) + // Set any custom env vars from role config if roleConfig != nil { for k, v := range roleConfig.EnvVars { expanded := beads.ExpandRolePattern(v, d.config.TownRoot, parsed.RigName, parsed.AgentName, parsed.RoleType) @@ -637,10 +640,10 @@ type AgentBeadInfo struct { Type string `json:"issue_type"` State string // Parsed from description: agent_state HookBead string // Parsed from description: hook_bead - RoleBead string // Parsed from description: role_bead RoleType string // Parsed from description: role_type Rig string // Parsed from description: rig LastUpdate string `json:"updated_at"` + // Note: RoleBead field removed - role definitions are now config-based } // getAgentBeadState reads non-observable agent state from an agent bead. @@ -699,7 +702,6 @@ func (d *Daemon) getAgentBeadInfo(agentBeadID string) (*AgentBeadInfo, error) { if fields != nil { info.State = fields.AgentState - info.RoleBead = fields.RoleBead info.RoleType = fields.RoleType info.Rig = fields.Rig } diff --git a/internal/daemon/role_config_integration_test.go b/internal/daemon/role_config_integration_test.go index 968d2a91..e893f339 100644 --- a/internal/daemon/role_config_integration_test.go +++ b/internal/daemon/role_config_integration_test.go @@ -5,41 +5,60 @@ package daemon import ( "io" "log" - "os/exec" - "strings" + "os" + "path/filepath" "testing" ) -func runBd(t *testing.T, dir string, args ...string) string { - t.Helper() - cmd := exec.Command("bd", args...) //nolint:gosec // bd is a trusted internal tool in this repo - cmd.Dir = dir - out, err := cmd.CombinedOutput() - if err != nil { - t.Fatalf("bd %s failed: %v\n%s", strings.Join(args, " "), err, string(out)) +// TestGetRoleConfigForIdentity_UsesBuiltinDefaults tests that the daemon +// uses built-in role definitions from embedded TOML files when no overrides exist. +func TestGetRoleConfigForIdentity_UsesBuiltinDefaults(t *testing.T) { + townRoot := t.TempDir() + + d := &Daemon{ + config: &Config{TownRoot: townRoot}, + logger: log.New(io.Discard, "", 0), + } + + // Should load witness role from built-in defaults + cfg, parsed, err := d.getRoleConfigForIdentity("myrig-witness") + if err != nil { + t.Fatalf("getRoleConfigForIdentity: %v", err) + } + if parsed == nil || parsed.RoleType != "witness" { + t.Fatalf("parsed = %#v, want roleType witness", parsed) + } + if cfg == nil { + t.Fatal("cfg is nil, expected built-in defaults") + } + // Built-in witness has session pattern "gt-{rig}-witness" + if cfg.SessionPattern != "gt-{rig}-witness" { + t.Errorf("cfg.SessionPattern = %q, want %q", cfg.SessionPattern, "gt-{rig}-witness") } - return string(out) } -func TestGetRoleConfigForIdentity_PrefersTownRoleBead(t *testing.T) { - if _, err := exec.LookPath("bd"); err != nil { - t.Skip("bd not installed") +// TestGetRoleConfigForIdentity_TownOverride tests that town-level TOML overrides +// are merged with built-in defaults. +func TestGetRoleConfigForIdentity_TownOverride(t *testing.T) { + townRoot := t.TempDir() + + // Create town-level override + rolesDir := filepath.Join(townRoot, "roles") + if err := os.MkdirAll(rolesDir, 0755); err != nil { + t.Fatalf("mkdir roles: %v", err) } - townRoot := t.TempDir() - runBd(t, townRoot, "init", "--quiet", "--prefix", "hq") + // Override start_command for witness role + witnessOverride := ` +role = "witness" +scope = "rig" - runBd(t, townRoot, "config", "set", "types.custom", "agent,role,rig,convoy,event") - - runBd(t, townRoot, "config", "set", "types.custom", "agent,role,rig,convoy,event") - - // Create canonical role bead. - runBd(t, townRoot, "create", - "--id", "hq-witness-role", - "--type", "role", - "--title", "Witness Role", - "--description", "start_command: exec echo hq\n", - ) +[session] +start_command = "exec echo custom-town-command" +` + if err := os.WriteFile(filepath.Join(rolesDir, "witness.toml"), []byte(witnessOverride), 0644); err != nil { + t.Fatalf("write witness.toml: %v", err) + } d := &Daemon{ config: &Config{TownRoot: townRoot}, @@ -53,30 +72,56 @@ func TestGetRoleConfigForIdentity_PrefersTownRoleBead(t *testing.T) { if parsed == nil || parsed.RoleType != "witness" { t.Fatalf("parsed = %#v, want roleType witness", parsed) } - if cfg == nil || cfg.StartCommand != "exec echo hq" { - t.Fatalf("cfg.StartCommand = %#v, want %q", cfg, "exec echo hq") + if cfg == nil { + t.Fatal("cfg is nil") + } + // Should have the overridden start_command + if cfg.StartCommand != "exec echo custom-town-command" { + t.Errorf("cfg.StartCommand = %q, want %q", cfg.StartCommand, "exec echo custom-town-command") + } + // Should still have built-in session pattern (not overridden) + if cfg.SessionPattern != "gt-{rig}-witness" { + t.Errorf("cfg.SessionPattern = %q, want %q", cfg.SessionPattern, "gt-{rig}-witness") } } -func TestGetRoleConfigForIdentity_FallsBackToLegacyRoleBead(t *testing.T) { - if _, err := exec.LookPath("bd"); err != nil { - t.Skip("bd not installed") +// TestGetRoleConfigForIdentity_RigOverride tests that rig-level TOML overrides +// take precedence over town-level overrides. +func TestGetRoleConfigForIdentity_RigOverride(t *testing.T) { + townRoot := t.TempDir() + rigPath := filepath.Join(townRoot, "myrig") + + // Create town-level override + townRolesDir := filepath.Join(townRoot, "roles") + if err := os.MkdirAll(townRolesDir, 0755); err != nil { + t.Fatalf("mkdir town roles: %v", err) + } + townOverride := ` +role = "witness" +scope = "rig" + +[session] +start_command = "exec echo town-command" +` + if err := os.WriteFile(filepath.Join(townRolesDir, "witness.toml"), []byte(townOverride), 0644); err != nil { + t.Fatalf("write town witness.toml: %v", err) } - townRoot := t.TempDir() - runBd(t, townRoot, "init", "--quiet", "--prefix", "gt") + // Create rig-level override (should take precedence) + rigRolesDir := filepath.Join(rigPath, "roles") + if err := os.MkdirAll(rigRolesDir, 0755); err != nil { + t.Fatalf("mkdir rig roles: %v", err) + } + rigOverride := ` +role = "witness" +scope = "rig" - runBd(t, townRoot, "config", "set", "types.custom", "agent,role,rig,convoy,event") - - runBd(t, townRoot, "config", "set", "types.custom", "agent,role,rig,convoy,event") - - // Only legacy role bead exists. - runBd(t, townRoot, "create", - "--id", "gt-witness-role", - "--type", "role", - "--title", "Witness Role (legacy)", - "--description", "start_command: exec echo gt\n", - ) +[session] +start_command = "exec echo rig-command" +` + if err := os.WriteFile(filepath.Join(rigRolesDir, "witness.toml"), []byte(rigOverride), 0644); err != nil { + t.Fatalf("write rig witness.toml: %v", err) + } d := &Daemon{ config: &Config{TownRoot: townRoot}, @@ -90,7 +135,11 @@ func TestGetRoleConfigForIdentity_FallsBackToLegacyRoleBead(t *testing.T) { if parsed == nil || parsed.RoleType != "witness" { t.Fatalf("parsed = %#v, want roleType witness", parsed) } - if cfg == nil || cfg.StartCommand != "exec echo gt" { - t.Fatalf("cfg.StartCommand = %#v, want %q", cfg, "exec echo gt") + if cfg == nil { + t.Fatal("cfg is nil") + } + // Should have the rig-level override (takes precedence over town) + if cfg.StartCommand != "exec echo rig-command" { + t.Errorf("cfg.StartCommand = %q, want %q", cfg.StartCommand, "exec echo rig-command") } } diff --git a/internal/deacon/manager.go b/internal/deacon/manager.go index ec16bbf7..8d9164c5 100644 --- a/internal/deacon/manager.go +++ b/internal/deacon/manager.go @@ -63,7 +63,8 @@ func (m *Manager) Start(agentOverride string) error { return ErrAlreadyRunning } // Zombie - tmux alive but Claude dead. Kill and recreate. - if err := t.KillSession(sessionID); err != nil { + // Use KillSessionWithProcesses to ensure all descendant processes are killed. + if err := t.KillSessionWithProcesses(sessionID); err != nil { return fmt.Errorf("killing zombie session: %w", err) } } @@ -79,9 +80,11 @@ func (m *Manager) Start(agentOverride string) error { return fmt.Errorf("ensuring Claude settings: %w", err) } - // Build startup command first - // Restarts are handled by daemon via ensureDeaconRunning on each heartbeat - startupCmd, err := config.BuildAgentStartupCommandWithAgentOverride("deacon", "", m.townRoot, "", "", agentOverride) + // Build startup command with initial prompt for autonomous patrol. + // The prompt triggers GUPP: deacon starts patrol immediately without waiting for input. + // This prevents the agent from sitting idle at the prompt after SessionStart hooks run. + initialPrompt := "I am Deacon. Start patrol: check gt hook, if empty create mol-deacon-patrol wisp and execute it." + startupCmd, err := config.BuildAgentStartupCommandWithAgentOverride("deacon", "", m.townRoot, "", initialPrompt, agentOverride) if err != nil { return fmt.Errorf("building startup command: %w", err) } @@ -152,8 +155,10 @@ func (m *Manager) Stop() error { _ = t.SendKeysRaw(sessionID, "C-c") time.Sleep(100 * time.Millisecond) - // Kill the session - if err := t.KillSession(sessionID); err != nil { + // Kill the session. + // Use KillSessionWithProcesses to ensure all descendant processes are killed. + // This prevents orphan bash processes from Claude's Bash tool surviving session termination. + if err := t.KillSessionWithProcesses(sessionID); err != nil { return fmt.Errorf("killing session: %w", err) } diff --git a/internal/deacon/stuck_test.go b/internal/deacon/stuck_test.go index 7d930c80..9e1e4cd9 100644 --- a/internal/deacon/stuck_test.go +++ b/internal/deacon/stuck_test.go @@ -24,7 +24,7 @@ func TestDefaultStuckConfig(t *testing.T) { func TestHealthCheckStateFile(t *testing.T) { path := HealthCheckStateFile("/tmp/test-town") expected := "/tmp/test-town/deacon/health-check-state.json" - if path != expected { + if filepath.ToSlash(path) != expected { t.Errorf("HealthCheckStateFile = %q, want %q", path, expected) } } diff --git a/internal/doctor/agent_beads_check.go b/internal/doctor/agent_beads_check.go index 6651849a..e64e16a0 100644 --- a/internal/doctor/agent_beads_check.go +++ b/internal/doctor/agent_beads_check.go @@ -170,7 +170,6 @@ func (c *AgentBeadsCheck) Fix(ctx *CheckContext) error { RoleType: "deacon", Rig: "", AgentState: "idle", - RoleBead: beads.DeaconRoleBeadIDTown(), } desc := "Deacon (daemon beacon) - receives mechanical heartbeats, runs town plugins and monitoring." if _, err := townBd.CreateAgentBead(deaconID, desc, fields); err != nil { @@ -184,7 +183,6 @@ func (c *AgentBeadsCheck) Fix(ctx *CheckContext) error { RoleType: "mayor", Rig: "", AgentState: "idle", - RoleBead: beads.MayorRoleBeadIDTown(), } desc := "Mayor - global coordinator, handles cross-rig communication and escalations." if _, err := townBd.CreateAgentBead(mayorID, desc, fields); err != nil { @@ -231,7 +229,6 @@ func (c *AgentBeadsCheck) Fix(ctx *CheckContext) error { RoleType: "witness", Rig: rigName, AgentState: "idle", - RoleBead: beads.RoleBeadIDTown("witness"), } desc := fmt.Sprintf("Witness for %s - monitors polecat health and progress.", rigName) if _, err := bd.CreateAgentBead(witnessID, desc, fields); err != nil { @@ -245,7 +242,6 @@ func (c *AgentBeadsCheck) Fix(ctx *CheckContext) error { RoleType: "refinery", Rig: rigName, AgentState: "idle", - RoleBead: beads.RoleBeadIDTown("refinery"), } desc := fmt.Sprintf("Refinery for %s - processes merge queue.", rigName) if _, err := bd.CreateAgentBead(refineryID, desc, fields); err != nil { @@ -262,7 +258,6 @@ func (c *AgentBeadsCheck) Fix(ctx *CheckContext) error { RoleType: "crew", Rig: rigName, AgentState: "idle", - RoleBead: beads.RoleBeadIDTown("crew"), } desc := fmt.Sprintf("Crew worker %s in %s - human-managed persistent workspace.", workerName, rigName) if _, err := bd.CreateAgentBead(crewID, desc, fields); err != nil { diff --git a/internal/doctor/claude_settings_check.go b/internal/doctor/claude_settings_check.go index d1319639..dda1b195 100644 --- a/internal/doctor/claude_settings_check.go +++ b/internal/doctor/claude_settings_check.go @@ -510,8 +510,9 @@ func (c *ClaudeSettingsCheck) Fix(ctx *CheckContext) error { sf.agentType == "deacon" || sf.agentType == "mayor" { running, _ := t.HasSession(sf.sessionName) if running { - // Cycle the agent by killing and letting gt up restart it - _ = t.KillSession(sf.sessionName) + // Cycle the agent by killing and letting gt up restart it. + // Use KillSessionWithProcesses to ensure all descendant processes are killed. + _ = t.KillSessionWithProcesses(sf.sessionName) } } } diff --git a/internal/doctor/misclassified_wisp_check.go b/internal/doctor/misclassified_wisp_check.go new file mode 100644 index 00000000..a4963c82 --- /dev/null +++ b/internal/doctor/misclassified_wisp_check.go @@ -0,0 +1,206 @@ +package doctor + +import ( + "bufio" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/steveyegge/gastown/internal/beads" +) + +// CheckMisclassifiedWisps detects issues that should be marked as wisps but aren't. +// Wisps are ephemeral issues for operational workflows (patrols, MRs, mail). +// This check finds issues that have wisp characteristics but lack the wisp:true flag. +type CheckMisclassifiedWisps struct { + FixableCheck + misclassified []misclassifiedWisp + misclassifiedRigs map[string]int // rig -> count +} + +type misclassifiedWisp struct { + rigName string + id string + title string + reason string +} + +// NewCheckMisclassifiedWisps creates a new misclassified wisp check. +func NewCheckMisclassifiedWisps() *CheckMisclassifiedWisps { + return &CheckMisclassifiedWisps{ + FixableCheck: FixableCheck{ + BaseCheck: BaseCheck{ + CheckName: "misclassified-wisps", + CheckDescription: "Detect issues that should be wisps but aren't marked as ephemeral", + CheckCategory: CategoryCleanup, + }, + }, + misclassifiedRigs: make(map[string]int), + } +} + +// Run checks for misclassified wisps in each rig. +func (c *CheckMisclassifiedWisps) Run(ctx *CheckContext) *CheckResult { + c.misclassified = nil + c.misclassifiedRigs = make(map[string]int) + + rigs, err := discoverRigs(ctx.TownRoot) + if err != nil { + return &CheckResult{ + Name: c.Name(), + Status: StatusError, + Message: "Failed to discover rigs", + Details: []string{err.Error()}, + } + } + + if len(rigs) == 0 { + return &CheckResult{ + Name: c.Name(), + Status: StatusOK, + Message: "No rigs configured", + } + } + + var details []string + + for _, rigName := range rigs { + rigPath := filepath.Join(ctx.TownRoot, rigName) + found := c.findMisclassifiedWisps(rigPath, rigName) + if len(found) > 0 { + c.misclassified = append(c.misclassified, found...) + c.misclassifiedRigs[rigName] = len(found) + details = append(details, fmt.Sprintf("%s: %d misclassified wisp(s)", rigName, len(found))) + } + } + + // Also check town-level beads + townFound := c.findMisclassifiedWisps(ctx.TownRoot, "town") + if len(townFound) > 0 { + c.misclassified = append(c.misclassified, townFound...) + c.misclassifiedRigs["town"] = len(townFound) + details = append(details, fmt.Sprintf("town: %d misclassified wisp(s)", len(townFound))) + } + + total := len(c.misclassified) + if total > 0 { + return &CheckResult{ + Name: c.Name(), + Status: StatusWarning, + Message: fmt.Sprintf("%d issue(s) should be marked as wisps", total), + Details: details, + FixHint: "Run 'gt doctor --fix' to mark these issues as ephemeral", + } + } + + return &CheckResult{ + Name: c.Name(), + Status: StatusOK, + Message: "No misclassified wisps found", + } +} + +// findMisclassifiedWisps finds issues that should be wisps but aren't in a single location. +func (c *CheckMisclassifiedWisps) findMisclassifiedWisps(path string, rigName string) []misclassifiedWisp { + beadsDir := beads.ResolveBeadsDir(path) + issuesPath := filepath.Join(beadsDir, "issues.jsonl") + file, err := os.Open(issuesPath) + if err != nil { + return nil // No issues file + } + defer file.Close() + + var found []misclassifiedWisp + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if line == "" { + continue + } + + var issue struct { + ID string `json:"id"` + Title string `json:"title"` + Status string `json:"status"` + Type string `json:"issue_type"` + Labels []string `json:"labels"` + Wisp bool `json:"wisp"` + } + if err := json.Unmarshal([]byte(line), &issue); err != nil { + continue + } + + // Skip issues already marked as wisps + if issue.Wisp { + continue + } + + // Skip closed issues - they're done, no need to reclassify + if issue.Status == "closed" { + continue + } + + // Check for wisp characteristics + if reason := c.shouldBeWisp(issue.ID, issue.Title, issue.Type, issue.Labels); reason != "" { + found = append(found, misclassifiedWisp{ + rigName: rigName, + id: issue.ID, + title: issue.Title, + reason: reason, + }) + } + } + + return found +} + +// shouldBeWisp checks if an issue has characteristics indicating it should be a wisp. +// Returns the reason string if it should be a wisp, empty string otherwise. +func (c *CheckMisclassifiedWisps) shouldBeWisp(id, title, issueType string, labels []string) string { + // Check for merge-request type - these should always be wisps + if issueType == "merge-request" { + return "merge-request type should be ephemeral" + } + + // Check for patrol-related labels + for _, label := range labels { + if strings.Contains(label, "patrol") { + return "patrol label indicates ephemeral workflow" + } + if label == "gt:mail" || label == "gt:handoff" { + return "mail/handoff label indicates ephemeral message" + } + } + + // Check for formula instance patterns in ID + // Formula instances typically have IDs like "mol-<formula>-<hash>" or "<formula>.<step>" + if strings.HasPrefix(id, "mol-") && strings.Contains(id, "-patrol") { + return "patrol molecule ID pattern" + } + + // Check for specific title patterns indicating operational work + lowerTitle := strings.ToLower(title) + if strings.Contains(lowerTitle, "patrol cycle") || + strings.Contains(lowerTitle, "witness patrol") || + strings.Contains(lowerTitle, "deacon patrol") || + strings.Contains(lowerTitle, "refinery patrol") { + return "patrol title indicates ephemeral workflow" + } + + return "" +} + +// Fix marks misclassified issues as wisps using bd update. +func (c *CheckMisclassifiedWisps) Fix(ctx *CheckContext) error { + // Note: bd doesn't have a direct flag to set wisp:true on existing issues. + // The proper fix is to ensure issues are created with --ephemeral flag. + // For now, we just report the issues - they'll be cleaned up by wisp-gc + // if they become abandoned, or manually closed. + // + // A true fix would require bd to support: bd update <id> --ephemeral + // Until then, this check serves as a diagnostic. + return nil +} diff --git a/internal/doctor/orphan_check.go b/internal/doctor/orphan_check.go index 4192fc98..95c86c56 100644 --- a/internal/doctor/orphan_check.go +++ b/internal/doctor/orphan_check.go @@ -5,7 +5,6 @@ import ( "os" "os/exec" "path/filepath" - "regexp" "strings" "github.com/steveyegge/gastown/internal/events" @@ -95,8 +94,8 @@ func (c *OrphanSessionCheck) Run(ctx *CheckContext) *CheckResult { continue } - // Only check gt-* sessions (Gas Town sessions) - if !strings.HasPrefix(sess, "gt-") { + // Only check gt-* and hq-* sessions (Gas Town sessions) + if !strings.HasPrefix(sess, "gt-") && !strings.HasPrefix(sess, "hq-") { continue } @@ -150,7 +149,8 @@ func (c *OrphanSessionCheck) Fix(ctx *CheckContext) error { // Log pre-death event for crash investigation (before killing) _ = events.LogFeed(events.TypeSessionDeath, sess, events.SessionDeathPayload(sess, "unknown", "orphan cleanup", "gt doctor")) - if err := t.KillSession(sess); err != nil { + // Use KillSessionWithProcesses to ensure all descendant processes are killed. + if err := t.KillSessionWithProcesses(sess); err != nil { lastErr = err } } @@ -200,8 +200,8 @@ func (c *OrphanSessionCheck) getValidRigs(townRoot string) []string { // isValidSession checks if a session name matches expected Gas Town patterns. // Valid patterns: -// - gt-{town}-mayor (dynamic based on town name) -// - gt-{town}-deacon (dynamic based on town name) +// - hq-mayor (headquarters mayor session) +// - hq-deacon (headquarters deacon session) // - gt-<rig>-witness // - gt-<rig>-refinery // - gt-<rig>-<polecat> (where polecat is any name) @@ -354,8 +354,9 @@ func (c *OrphanProcessCheck) getTmuxSessionPIDs() (map[int]bool, error) { //noli // Find tmux server processes using ps instead of pgrep. // pgrep -x tmux is unreliable on macOS - it often misses the actual server. - // We use ps with awk to find processes where comm is exactly "tmux". - out, err := exec.Command("sh", "-c", `ps ax -o pid,comm | awk '$2 == "tmux" || $2 ~ /\/tmux$/ { print $1 }'`).Output() + // We use ps with awk to find processes where comm is exactly "tmux" or starts with "tmux:". + // On Linux, tmux servers show as "tmux: server" in the comm field. + out, err := exec.Command("sh", "-c", `ps ax -o pid,comm | awk '$2 == "tmux" || $2 ~ /\/tmux$/ || $2 ~ /^tmux:/ { print $1 }'`).Output() if err != nil { // No tmux server running return pids, nil @@ -388,40 +389,41 @@ func (c *OrphanProcessCheck) getTmuxSessionPIDs() (map[int]bool, error) { //noli return pids, nil } -// findRuntimeProcesses finds all running runtime CLI processes. -// Excludes Claude.app desktop application and its helpers. +// findRuntimeProcesses finds Gas Town Claude processes (those with --dangerously-skip-permissions). +// Only detects processes started by Gas Town, not user's personal Claude sessions. func (c *OrphanProcessCheck) findRuntimeProcesses() ([]processInfo, error) { var procs []processInfo - // Use ps to find runtime processes - out, err := exec.Command("ps", "-eo", "pid,ppid,comm").Output() + // Use ps with args to get full command line (needed to check for Gas Town signature) + out, err := exec.Command("ps", "-eo", "pid,ppid,args").Output() if err != nil { return nil, err } - // Regex to match runtime CLI processes (not Claude.app) - // Match: "claude", "claude-code", or "codex" (or paths ending in those) - runtimePattern := regexp.MustCompile(`(?i)(^claude$|/claude$|^claude-code$|/claude-code$|^codex$|/codex$)`) - - // Pattern to exclude Claude.app and related desktop processes - excludePattern := regexp.MustCompile(`(?i)(Claude\.app|claude-native|chrome-native)`) - for _, line := range strings.Split(string(out), "\n") { fields := strings.Fields(line) if len(fields) < 3 { continue } - // Check if command matches runtime CLI - cmd := strings.Join(fields[2:], " ") + // Extract command name (without path) + cmd := fields[2] + if idx := strings.LastIndex(cmd, "/"); idx >= 0 { + cmd = cmd[idx+1:] + } - // Skip desktop app processes - if excludePattern.MatchString(cmd) { + // Only match claude/codex processes, not tmux or other launchers + // (tmux command line may contain --dangerously-skip-permissions as part of the launched command) + if cmd != "claude" && cmd != "claude-code" && cmd != "codex" { continue } - // Only match CLI runtime processes - if !runtimePattern.MatchString(cmd) { + // Get full args + args := strings.Join(fields[2:], " ") + + // Only match Gas Town Claude processes (have --dangerously-skip-permissions) + // This excludes user's personal Claude sessions + if !strings.Contains(args, "--dangerously-skip-permissions") { continue } @@ -436,7 +438,7 @@ func (c *OrphanProcessCheck) findRuntimeProcesses() ([]processInfo, error) { procs = append(procs, processInfo{ pid: pid, ppid: ppid, - cmd: cmd, + cmd: args, }) } diff --git a/internal/doctor/orphan_check_test.go b/internal/doctor/orphan_check_test.go index 19b8e000..f3820604 100644 --- a/internal/doctor/orphan_check_test.go +++ b/internal/doctor/orphan_check_test.go @@ -4,6 +4,7 @@ import ( "os" "path/filepath" "reflect" + "runtime" "testing" ) @@ -43,6 +44,10 @@ func TestNewOrphanProcessCheck(t *testing.T) { } func TestOrphanProcessCheck_Run(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("orphan process detection is not supported on Windows") + } + // This test verifies the check runs without error. // Results depend on whether Claude processes exist in the test environment. check := NewOrphanProcessCheck() @@ -353,6 +358,37 @@ func TestIsCrewSession_ComprehensivePatterns(t *testing.T) { } } +// TestOrphanSessionCheck_HQSessions tests that hq-* sessions are properly recognized as valid. +func TestOrphanSessionCheck_HQSessions(t *testing.T) { + townRoot := t.TempDir() + mayorDir := filepath.Join(townRoot, "mayor") + if err := os.MkdirAll(mayorDir, 0o755); err != nil { + t.Fatalf("create mayor dir: %v", err) + } + if err := os.WriteFile(filepath.Join(mayorDir, "rigs.json"), []byte("{}"), 0o644); err != nil { + t.Fatalf("create rigs.json: %v", err) + } + + lister := &mockSessionLister{ + sessions: []string{ + "hq-mayor", // valid: headquarters mayor session + "hq-deacon", // valid: headquarters deacon session + }, + } + check := NewOrphanSessionCheckWithSessionLister(lister) + result := check.Run(&CheckContext{TownRoot: townRoot}) + + if result.Status != StatusOK { + t.Fatalf("expected StatusOK for valid hq sessions, got %v: %s", result.Status, result.Message) + } + if result.Message != "All 2 Gas Town sessions are valid" { + t.Fatalf("unexpected message: %q", result.Message) + } + if len(check.orphanSessions) != 0 { + t.Fatalf("expected no orphan sessions, got %v", check.orphanSessions) + } +} + // TestOrphanSessionCheck_Run_Deterministic tests the full Run path with a mock session // lister, ensuring deterministic behavior without depending on real tmux state. func TestOrphanSessionCheck_Run_Deterministic(t *testing.T) { @@ -378,9 +414,11 @@ func TestOrphanSessionCheck_Run_Deterministic(t *testing.T) { "gt-gastown-witness", // valid: gastown rig exists "gt-gastown-polecat1", // valid: gastown rig exists "gt-beads-refinery", // valid: beads rig exists + "hq-mayor", // valid: hq-mayor is recognized + "hq-deacon", // valid: hq-deacon is recognized "gt-unknown-witness", // orphan: unknown rig doesn't exist "gt-missing-crew-joe", // orphan: missing rig doesn't exist - "random-session", // ignored: doesn't match gt-* pattern + "random-session", // ignored: doesn't match gt-*/hq-* pattern }, } check := NewOrphanSessionCheckWithSessionLister(lister) diff --git a/internal/doctor/role_beads_check.go b/internal/doctor/role_beads_check.go index aa9c9c77..b4a2c612 100644 --- a/internal/doctor/role_beads_check.go +++ b/internal/doctor/role_beads_check.go @@ -2,119 +2,116 @@ package doctor import ( "fmt" - "os/exec" - "strings" + "os" + "path/filepath" - "github.com/steveyegge/gastown/internal/beads" + "github.com/BurntSushi/toml" + "github.com/steveyegge/gastown/internal/config" ) -// RoleBeadsCheck verifies that role definition beads exist. -// Role beads are templates that define role characteristics and lifecycle hooks. -// They are stored in town beads (~/.beads/) with hq- prefix: -// - hq-mayor-role, hq-deacon-role, hq-dog-role -// - hq-witness-role, hq-refinery-role, hq-polecat-role, hq-crew-role -// -// Role beads are created by gt install, but creation may fail silently. -// Without role beads, agents fall back to defaults which may differ from -// user expectations. -type RoleBeadsCheck struct { - FixableCheck - missing []string // Track missing role beads for fix +// RoleConfigCheck verifies that role configuration is valid. +// Role definitions are now config-based (internal/config/roles/*.toml), +// not stored as beads. Built-in defaults are embedded in the binary. +// This check validates any user-provided overrides at: +// - <town>/roles/<role>.toml (town-level overrides) +// - <rig>/roles/<role>.toml (rig-level overrides) +type RoleConfigCheck struct { + BaseCheck } -// NewRoleBeadsCheck creates a new role beads check. -func NewRoleBeadsCheck() *RoleBeadsCheck { - return &RoleBeadsCheck{ - FixableCheck: FixableCheck{ - BaseCheck: BaseCheck{ - CheckName: "role-beads-exist", - CheckDescription: "Verify role definition beads exist", - CheckCategory: CategoryConfig, - }, +// NewRoleBeadsCheck creates a new role config check. +// Note: Function name kept as NewRoleBeadsCheck for backward compatibility +// with existing doctor.go registration code. +func NewRoleBeadsCheck() *RoleConfigCheck { + return &RoleConfigCheck{ + BaseCheck: BaseCheck{ + CheckName: "role-config-valid", + CheckDescription: "Verify role configuration is valid", + CheckCategory: CategoryConfig, }, } } -// Run checks if role beads exist. -func (c *RoleBeadsCheck) Run(ctx *CheckContext) *CheckResult { - c.missing = nil // Reset +// Run checks if role config is valid. +func (c *RoleConfigCheck) Run(ctx *CheckContext) *CheckResult { + var warnings []string + var overrideCount int - townBeadsPath := beads.GetTownBeadsPath(ctx.TownRoot) - bd := beads.New(townBeadsPath) - - var missing []string - roleDefs := beads.AllRoleBeadDefs() - - for _, role := range roleDefs { - if _, err := bd.Show(role.ID); err != nil { - missing = append(missing, role.ID) + // Check town-level overrides + townRolesDir := filepath.Join(ctx.TownRoot, "roles") + if entries, err := os.ReadDir(townRolesDir); err == nil { + for _, entry := range entries { + if !entry.IsDir() && filepath.Ext(entry.Name()) == ".toml" { + overrideCount++ + path := filepath.Join(townRolesDir, entry.Name()) + if err := validateRoleOverride(path); err != nil { + warnings = append(warnings, fmt.Sprintf("town override %s: %v", entry.Name(), err)) + } + } } } - c.missing = missing + // Check rig-level overrides for each rig + // Discover rigs by looking for directories with rig.json + if entries, err := os.ReadDir(ctx.TownRoot); err == nil { + for _, entry := range entries { + if !entry.IsDir() { + continue + } + rigName := entry.Name() + // Check if this is a rig (has rig.json) + if _, err := os.Stat(filepath.Join(ctx.TownRoot, rigName, "rig.json")); err != nil { + continue + } + rigRolesDir := filepath.Join(ctx.TownRoot, rigName, "roles") + if roleEntries, err := os.ReadDir(rigRolesDir); err == nil { + for _, roleEntry := range roleEntries { + if !roleEntry.IsDir() && filepath.Ext(roleEntry.Name()) == ".toml" { + overrideCount++ + path := filepath.Join(rigRolesDir, roleEntry.Name()) + if err := validateRoleOverride(path); err != nil { + warnings = append(warnings, fmt.Sprintf("rig %s override %s: %v", rigName, roleEntry.Name(), err)) + } + } + } + } + } + } - if len(missing) == 0 { + if len(warnings) > 0 { return &CheckResult{ Name: c.Name(), - Status: StatusOK, - Message: fmt.Sprintf("All %d role beads exist", len(roleDefs)), + Status: StatusWarning, + Message: fmt.Sprintf("%d role config override(s) have issues", len(warnings)), + Details: warnings, + FixHint: "Check TOML syntax in role override files", Category: c.Category(), } } + msg := "Role config uses built-in defaults" + if overrideCount > 0 { + msg = fmt.Sprintf("Role config valid (%d override file(s))", overrideCount) + } + return &CheckResult{ Name: c.Name(), - Status: StatusWarning, // Warning, not error - agents work without role beads - Message: fmt.Sprintf("%d role bead(s) missing (agents will use defaults)", len(missing)), - Details: missing, - FixHint: "Run 'gt doctor --fix' to create missing role beads", + Status: StatusOK, + Message: msg, Category: c.Category(), } } -// Fix creates missing role beads. -func (c *RoleBeadsCheck) Fix(ctx *CheckContext) error { - // Re-run check to populate missing if needed - if c.missing == nil { - result := c.Run(ctx) - if result.Status == StatusOK { - return nil // Nothing to fix - } +// validateRoleOverride checks if a role override file is valid TOML. +func validateRoleOverride(path string) error { + data, err := os.ReadFile(path) + if err != nil { + return err } - if len(c.missing) == 0 { - return nil - } - - // Build lookup map for role definitions - roleDefMap := make(map[string]beads.RoleBeadDef) - for _, role := range beads.AllRoleBeadDefs() { - roleDefMap[role.ID] = role - } - - // Create missing role beads - for _, id := range c.missing { - role, ok := roleDefMap[id] - if !ok { - continue // Shouldn't happen - } - - // Create role bead using bd create --type=role - args := []string{ - "create", - "--type=role", - "--id=" + role.ID, - "--title=" + role.Title, - "--description=" + role.Desc, - } - if beads.NeedsForceForID(role.ID) { - args = append(args, "--force") - } - cmd := exec.Command("bd", args...) - cmd.Dir = ctx.TownRoot - if output, err := cmd.CombinedOutput(); err != nil { - return fmt.Errorf("creating %s: %s", role.ID, strings.TrimSpace(string(output))) - } + var def config.RoleDefinition + if err := toml.Unmarshal(data, &def); err != nil { + return fmt.Errorf("invalid TOML: %w", err) } return nil diff --git a/internal/doctor/role_beads_check_test.go b/internal/doctor/role_beads_check_test.go index 83dbde23..6f5d63bb 100644 --- a/internal/doctor/role_beads_check_test.go +++ b/internal/doctor/role_beads_check_test.go @@ -4,15 +4,64 @@ import ( "os" "path/filepath" "testing" - - "github.com/steveyegge/gastown/internal/beads" ) -func TestRoleBeadsCheck_Run(t *testing.T) { - t.Run("no town beads returns warning", func(t *testing.T) { +func TestRoleConfigCheck_Run(t *testing.T) { + t.Run("no overrides returns OK with defaults message", func(t *testing.T) { tmpDir := t.TempDir() - // Create minimal town structure without .beads - if err := os.MkdirAll(filepath.Join(tmpDir, "mayor"), 0755); err != nil { + + check := NewRoleBeadsCheck() + ctx := &CheckContext{TownRoot: tmpDir} + result := check.Run(ctx) + + if result.Status != StatusOK { + t.Errorf("expected StatusOK, got %v: %s", result.Status, result.Message) + } + if result.Message != "Role config uses built-in defaults" { + t.Errorf("unexpected message: %s", result.Message) + } + }) + + t.Run("valid town override returns OK", func(t *testing.T) { + tmpDir := t.TempDir() + rolesDir := filepath.Join(tmpDir, "roles") + if err := os.MkdirAll(rolesDir, 0755); err != nil { + t.Fatal(err) + } + + // Create a valid TOML override + override := ` +role = "witness" +scope = "rig" + +[session] +start_command = "exec echo test" +` + if err := os.WriteFile(filepath.Join(rolesDir, "witness.toml"), []byte(override), 0644); err != nil { + t.Fatal(err) + } + + check := NewRoleBeadsCheck() + ctx := &CheckContext{TownRoot: tmpDir} + result := check.Run(ctx) + + if result.Status != StatusOK { + t.Errorf("expected StatusOK, got %v: %s", result.Status, result.Message) + } + if result.Message != "Role config valid (1 override file(s))" { + t.Errorf("unexpected message: %s", result.Message) + } + }) + + t.Run("invalid town override returns warning", func(t *testing.T) { + tmpDir := t.TempDir() + rolesDir := filepath.Join(tmpDir, "roles") + if err := os.MkdirAll(rolesDir, 0755); err != nil { + t.Fatal(err) + } + + // Create an invalid TOML file + if err := os.WriteFile(filepath.Join(rolesDir, "witness.toml"), []byte("invalid { toml"), 0644); err != nil { t.Fatal(err) } @@ -20,49 +69,53 @@ func TestRoleBeadsCheck_Run(t *testing.T) { ctx := &CheckContext{TownRoot: tmpDir} result := check.Run(ctx) - // Without .beads directory, all role beads are "missing" - expectedCount := len(beads.AllRoleBeadDefs()) if result.Status != StatusWarning { t.Errorf("expected StatusWarning, got %v: %s", result.Status, result.Message) } - if len(result.Details) != expectedCount { - t.Errorf("expected %d missing role beads, got %d: %v", expectedCount, len(result.Details), result.Details) + if len(result.Details) != 1 { + t.Errorf("expected 1 warning detail, got %d", len(result.Details)) } }) - t.Run("check is fixable", func(t *testing.T) { + t.Run("valid rig override returns OK", func(t *testing.T) { + tmpDir := t.TempDir() + rigName := "testrig" + rigDir := filepath.Join(tmpDir, rigName) + rigRolesDir := filepath.Join(rigDir, "roles") + if err := os.MkdirAll(rigRolesDir, 0755); err != nil { + t.Fatal(err) + } + + // Create rig.json to mark this as a rig + if err := os.WriteFile(filepath.Join(rigDir, "rig.json"), []byte(`{"name": "testrig"}`), 0644); err != nil { + t.Fatal(err) + } + + // Create a valid TOML override + override := ` +role = "refinery" +scope = "rig" + +[session] +needs_pre_sync = true +` + if err := os.WriteFile(filepath.Join(rigRolesDir, "refinery.toml"), []byte(override), 0644); err != nil { + t.Fatal(err) + } + check := NewRoleBeadsCheck() - if !check.CanFix() { - t.Error("RoleBeadsCheck should be fixable") + ctx := &CheckContext{TownRoot: tmpDir} + result := check.Run(ctx) + + if result.Status != StatusOK { + t.Errorf("expected StatusOK, got %v: %s", result.Status, result.Message) + } + }) + + t.Run("check is not fixable", func(t *testing.T) { + check := NewRoleBeadsCheck() + if check.CanFix() { + t.Error("RoleConfigCheck should not be fixable (config issues need manual fix)") } }) } - -func TestRoleBeadsCheck_usesSharedDefs(t *testing.T) { - // Verify the check uses beads.AllRoleBeadDefs() - roleDefs := beads.AllRoleBeadDefs() - - if len(roleDefs) < 7 { - t.Errorf("expected at least 7 role beads, got %d", len(roleDefs)) - } - - // Verify key roles are present - expectedIDs := map[string]bool{ - "hq-mayor-role": false, - "hq-deacon-role": false, - "hq-witness-role": false, - "hq-refinery-role": false, - } - - for _, role := range roleDefs { - if _, exists := expectedIDs[role.ID]; exists { - expectedIDs[role.ID] = true - } - } - - for id, found := range expectedIDs { - if !found { - t.Errorf("expected role %s not found in AllRoleBeadDefs()", id) - } - } -} diff --git a/internal/doctor/routing_mode_check.go b/internal/doctor/routing_mode_check.go new file mode 100644 index 00000000..3edf696b --- /dev/null +++ b/internal/doctor/routing_mode_check.go @@ -0,0 +1,147 @@ +package doctor + +import ( + "bytes" + "fmt" + "os/exec" + "path/filepath" + "strings" +) + +// RoutingModeCheck detects when beads routing.mode is set to "auto", which can +// cause issues to be unexpectedly routed to ~/.beads-planning instead of the +// local .beads directory. This happens because auto mode uses git remote URL +// to detect user role, and non-SSH URLs are interpreted as "contributor" mode. +// +// See: https://github.com/steveyegge/beads/issues/1165 +type RoutingModeCheck struct { + FixableCheck +} + +// NewRoutingModeCheck creates a new routing mode check. +func NewRoutingModeCheck() *RoutingModeCheck { + return &RoutingModeCheck{ + FixableCheck: FixableCheck{ + BaseCheck: BaseCheck{ + CheckName: "routing-mode", + CheckDescription: "Check beads routing.mode is explicit (prevents .beads-planning routing)", + CheckCategory: CategoryConfig, + }, + }, + } +} + +// Run checks if routing.mode is set to "explicit". +func (c *RoutingModeCheck) Run(ctx *CheckContext) *CheckResult { + // Check town-level beads config + townBeadsDir := filepath.Join(ctx.TownRoot, ".beads") + result := c.checkRoutingMode(townBeadsDir, "town") + if result.Status != StatusOK { + return result + } + + // Also check rig-level beads if specified + if ctx.RigName != "" { + rigBeadsDir := filepath.Join(ctx.RigPath(), ".beads") + rigResult := c.checkRoutingMode(rigBeadsDir, fmt.Sprintf("rig '%s'", ctx.RigName)) + if rigResult.Status != StatusOK { + return rigResult + } + } + + return &CheckResult{ + Name: c.Name(), + Status: StatusOK, + Message: "Beads routing.mode is explicit", + } +} + +// checkRoutingMode checks the routing mode in a specific beads directory. +func (c *RoutingModeCheck) checkRoutingMode(beadsDir, location string) *CheckResult { + // Run bd config get routing.mode + cmd := exec.Command("bd", "config", "get", "routing.mode") + cmd.Dir = filepath.Dir(beadsDir) + cmd.Env = append(cmd.Environ(), "BEADS_DIR="+beadsDir) + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + // If the config key doesn't exist, that means it defaults to "auto" + if strings.Contains(stderr.String(), "not found") || strings.Contains(stderr.String(), "not set") { + return &CheckResult{ + Name: c.Name(), + Status: StatusWarning, + Message: fmt.Sprintf("routing.mode not set at %s (defaults to auto)", location), + Details: []string{ + "Auto routing mode uses git remote URL to detect user role", + "Non-SSH URLs (HTTPS or file paths) trigger routing to ~/.beads-planning", + "This causes mail and issues to be stored in the wrong location", + "See: https://github.com/steveyegge/beads/issues/1165", + }, + FixHint: "Run 'gt doctor --fix' or 'bd config set routing.mode explicit'", + } + } + // Other error - report as warning + return &CheckResult{ + Name: c.Name(), + Status: StatusWarning, + Message: fmt.Sprintf("Could not check routing.mode at %s: %v", location, err), + } + } + + mode := strings.TrimSpace(stdout.String()) + if mode != "explicit" { + return &CheckResult{ + Name: c.Name(), + Status: StatusWarning, + Message: fmt.Sprintf("routing.mode is '%s' at %s (should be 'explicit')", mode, location), + Details: []string{ + "Auto routing mode uses git remote URL to detect user role", + "Non-SSH URLs (HTTPS or file paths) trigger routing to ~/.beads-planning", + "This causes mail and issues to be stored in the wrong location", + "See: https://github.com/steveyegge/beads/issues/1165", + }, + FixHint: "Run 'gt doctor --fix' or 'bd config set routing.mode explicit'", + } + } + + return &CheckResult{ + Name: c.Name(), + Status: StatusOK, + Message: fmt.Sprintf("routing.mode is explicit at %s", location), + } +} + +// Fix sets routing.mode to "explicit" in both town and rig beads. +func (c *RoutingModeCheck) Fix(ctx *CheckContext) error { + // Fix town-level beads + townBeadsDir := filepath.Join(ctx.TownRoot, ".beads") + if err := c.setRoutingMode(townBeadsDir); err != nil { + return fmt.Errorf("fixing town beads: %w", err) + } + + // Also fix rig-level beads if specified + if ctx.RigName != "" { + rigBeadsDir := filepath.Join(ctx.RigPath(), ".beads") + if err := c.setRoutingMode(rigBeadsDir); err != nil { + return fmt.Errorf("fixing rig %s beads: %w", ctx.RigName, err) + } + } + + return nil +} + +// setRoutingMode sets routing.mode to "explicit" in the specified beads directory. +func (c *RoutingModeCheck) setRoutingMode(beadsDir string) error { + cmd := exec.Command("bd", "config", "set", "routing.mode", "explicit") + cmd.Dir = filepath.Dir(beadsDir) + cmd.Env = append(cmd.Environ(), "BEADS_DIR="+beadsDir) + + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("bd config set failed: %s", strings.TrimSpace(string(output))) + } + + return nil +} diff --git a/internal/doctor/sparse_checkout_check_test.go b/internal/doctor/sparse_checkout_check_test.go index a98e232c..15b806cf 100644 --- a/internal/doctor/sparse_checkout_check_test.go +++ b/internal/doctor/sparse_checkout_check_test.go @@ -120,7 +120,7 @@ func TestSparseCheckoutCheck_MayorRigMissingSparseCheckout(t *testing.T) { if !strings.Contains(result.Message, "1 repo(s) missing") { t.Errorf("expected message about missing config, got %q", result.Message) } - if len(result.Details) != 1 || !strings.Contains(result.Details[0], "mayor/rig") { + if len(result.Details) != 1 || !strings.Contains(filepath.ToSlash(result.Details[0]), "mayor/rig") { t.Errorf("expected details to contain mayor/rig, got %v", result.Details) } } @@ -164,7 +164,7 @@ func TestSparseCheckoutCheck_CrewMissingSparseCheckout(t *testing.T) { if result.Status != StatusError { t.Errorf("expected StatusError for missing sparse checkout, got %v", result.Status) } - if len(result.Details) != 1 || !strings.Contains(result.Details[0], "crew/agent1") { + if len(result.Details) != 1 || !strings.Contains(filepath.ToSlash(result.Details[0]), "crew/agent1") { t.Errorf("expected details to contain crew/agent1, got %v", result.Details) } } @@ -186,7 +186,7 @@ func TestSparseCheckoutCheck_PolecatMissingSparseCheckout(t *testing.T) { if result.Status != StatusError { t.Errorf("expected StatusError for missing sparse checkout, got %v", result.Status) } - if len(result.Details) != 1 || !strings.Contains(result.Details[0], "polecats/pc1") { + if len(result.Details) != 1 || !strings.Contains(filepath.ToSlash(result.Details[0]), "polecats/pc1") { t.Errorf("expected details to contain polecats/pc1, got %v", result.Details) } } @@ -244,7 +244,7 @@ func TestSparseCheckoutCheck_MixedConfigured(t *testing.T) { if !strings.Contains(result.Message, "1 repo(s) missing") { t.Errorf("expected message about 1 missing repo, got %q", result.Message) } - if len(result.Details) != 1 || !strings.Contains(result.Details[0], "crew/agent1") { + if len(result.Details) != 1 || !strings.Contains(filepath.ToSlash(result.Details[0]), "crew/agent1") { t.Errorf("expected details to contain only crew/agent1, got %v", result.Details) } } diff --git a/internal/doctor/tmux_check.go b/internal/doctor/tmux_check.go index 0e46c2db..fffd1529 100644 --- a/internal/doctor/tmux_check.go +++ b/internal/doctor/tmux_check.go @@ -123,7 +123,8 @@ func (c *LinkedPaneCheck) Fix(ctx *CheckContext) error { var lastErr error for _, session := range c.linkedSessions { - if err := t.KillSession(session); err != nil { + // Use KillSessionWithProcesses to ensure all descendant processes are killed. + if err := t.KillSessionWithProcesses(session); err != nil { lastErr = err } } diff --git a/internal/doctor/zombie_check.go b/internal/doctor/zombie_check.go index e1444d73..7db0f28c 100644 --- a/internal/doctor/zombie_check.go +++ b/internal/doctor/zombie_check.go @@ -128,7 +128,8 @@ func (c *ZombieSessionCheck) Fix(ctx *CheckContext) error { _ = events.LogFeed(events.TypeSessionDeath, sess, events.SessionDeathPayload(sess, "unknown", "zombie cleanup", "gt doctor")) - if err := t.KillSession(sess); err != nil { + // Use KillSessionWithProcesses to ensure all descendant processes are killed. + if err := t.KillSessionWithProcesses(sess); err != nil { lastErr = err } } diff --git a/internal/dog/manager_test.go b/internal/dog/manager_test.go index 531756b1..af514a72 100644 --- a/internal/dog/manager_test.go +++ b/internal/dog/manager_test.go @@ -63,10 +63,10 @@ func TestManagerCreation(t *testing.T) { m := NewManager("/tmp/test-town", rigsConfig) - if m.townRoot != "/tmp/test-town" { + if filepath.ToSlash(m.townRoot) != "/tmp/test-town" { t.Errorf("expected townRoot '/tmp/test-town', got %q", m.townRoot) } - if m.kennelPath != "/tmp/test-town/deacon/dogs" { + if filepath.ToSlash(m.kennelPath) != "/tmp/test-town/deacon/dogs" { t.Errorf("expected kennelPath '/tmp/test-town/deacon/dogs', got %q", m.kennelPath) } } @@ -81,7 +81,7 @@ func TestDogDir(t *testing.T) { path := m.dogDir("alpha") expected := "/home/user/gt/deacon/dogs/alpha" - if path != expected { + if filepath.ToSlash(path) != expected { t.Errorf("expected %q, got %q", expected, path) } } diff --git a/internal/formula/formulas/gastown-release.formula.toml b/internal/formula/formulas/gastown-release.formula.toml index 9f5b53fb..5c57f55d 100644 --- a/internal/formula/formulas/gastown-release.formula.toml +++ b/internal/formula/formulas/gastown-release.formula.toml @@ -47,7 +47,7 @@ Check all crew workspaces and the mayor rig: ```bash # Check each workspace -for dir in ~/gt/gastown/crew/* ~/gt/gastown/mayor; do +for dir in $GT_ROOT/gastown/crew/* $GT_ROOT/gastown/mayor; do if [ -d "$dir/.git" ] || [ -d "$dir" ]; then echo "=== Checking $dir ===" cd "$dir" 2>/dev/null || continue diff --git a/internal/formula/formulas/mol-boot-triage.formula.toml b/internal/formula/formulas/mol-boot-triage.formula.toml index 38e5d248..66a472af 100644 --- a/internal/formula/formulas/mol-boot-triage.formula.toml +++ b/internal/formula/formulas/mol-boot-triage.formula.toml @@ -47,7 +47,7 @@ bd show hq-deacon 2>/dev/null gt feed --since 10m --plain | head -20 # Recent wisps (operational state) -ls -lt ~/gt/.beads-wisp/*.wisp.json 2>/dev/null | head -5 +ls -lt $GT_ROOT/.beads-wisp/*.wisp.json 2>/dev/null | head -5 ``` **Step 4: Check Deacon mail** @@ -221,7 +221,7 @@ Then exit. The next daemon tick will spawn a fresh Boot. **Update status file** ```bash # The gt boot command handles this automatically -# Status is written to ~/gt/deacon/dogs/boot/.boot-status.json +# Status is written to $GT_ROOT/deacon/dogs/boot/.boot-status.json ``` Boot is ephemeral by design. Each instance runs fresh. diff --git a/internal/formula/formulas/mol-deacon-patrol.formula.toml b/internal/formula/formulas/mol-deacon-patrol.formula.toml index 7ec83e38..f293c2b3 100644 --- a/internal/formula/formulas/mol-deacon-patrol.formula.toml +++ b/internal/formula/formulas/mol-deacon-patrol.formula.toml @@ -480,7 +480,7 @@ needs = ["zombie-scan"] description = """ Execute registered plugins. -Scan ~/gt/plugins/ for plugin directories. Each plugin has a plugin.md with TOML frontmatter defining its gate (when to run) and instructions (what to do). +Scan $GT_ROOT/plugins/ for plugin directories. Each plugin has a plugin.md with TOML frontmatter defining its gate (when to run) and instructions (what to do). See docs/deacon-plugins.md for full documentation. @@ -497,7 +497,7 @@ For each plugin: Plugins marked parallel: true can run concurrently using Task tool subagents. Sequential plugins run one at a time in directory order. -Skip this step if ~/gt/plugins/ does not exist or is empty.""" +Skip this step if $GT_ROOT/plugins/ does not exist or is empty.""" [[steps]] id = "dog-pool-maintenance" @@ -736,13 +736,13 @@ Maintain daemon logs and state files. **Step 1: Check daemon.log size** ```bash # Get log file size -ls -la ~/.beads/daemon*.log 2>/dev/null || ls -la ~/gt/.beads/daemon*.log 2>/dev/null +ls -la ~/.beads/daemon*.log 2>/dev/null || ls -la $GT_ROOT/.beads/daemon*.log 2>/dev/null ``` If daemon.log exceeds 10MB: ```bash # Rotate with date suffix and gzip -LOGFILE="$HOME/gt/.beads/daemon.log" +LOGFILE="$GT_ROOT/.beads/daemon.log" if [ -f "$LOGFILE" ] && [ $(stat -f%z "$LOGFILE" 2>/dev/null || stat -c%s "$LOGFILE") -gt 10485760 ]; then DATE=$(date +%Y-%m-%dT%H-%M-%S) mv "$LOGFILE" "${LOGFILE%.log}-${DATE}.log" @@ -754,7 +754,7 @@ fi Clean up daemon logs older than 7 days: ```bash -find ~/gt/.beads/ -name "daemon-*.log.gz" -mtime +7 -delete +find $GT_ROOT/.beads/ -name "daemon-*.log.gz" -mtime +7 -delete ``` **Step 3: Prune state.json of dead sessions** diff --git a/internal/formula/formulas/mol-shutdown-dance.formula.toml b/internal/formula/formulas/mol-shutdown-dance.formula.toml index 6f044db0..35ca1264 100644 --- a/internal/formula/formulas/mol-shutdown-dance.formula.toml +++ b/internal/formula/formulas/mol-shutdown-dance.formula.toml @@ -8,7 +8,7 @@ goroutine (NOT a Claude session) that runs the interrogation state machine. Dogs are lightweight workers in Boot's pool (see dog-pool-architecture.md): - Fixed pool of 5 goroutines (configurable via GT_DOG_POOL_SIZE) -- State persisted to ~/gt/deacon/dogs/active/<id>.json +- State persisted to $GT_ROOT/deacon/dogs/active/<id>.json - Recovery on Boot restart via orphan state files ## State Machine @@ -151,7 +151,7 @@ If target doesn't exist: - Skip to EPITAPH with outcome=already_dead **3. Initialize state file:** -Write initial state to ~/gt/deacon/dogs/active/{dog-id}.json +Write initial state to $GT_ROOT/deacon/dogs/active/{dog-id}.json **4. Set initial attempt counter:** attempt = 1 @@ -477,11 +477,11 @@ bd close {warrant_id} --reason "{epitaph_summary}" **3. Move state file to completed:** ```bash -mv ~/gt/deacon/dogs/active/{dog-id}.json ~/gt/deacon/dogs/completed/ +mv $GT_ROOT/deacon/dogs/active/{dog-id}.json $GT_ROOT/deacon/dogs/completed/ ``` **4. Report to Boot:** -Write completion file: ~/gt/deacon/dogs/active/{dog-id}.done +Write completion file: $GT_ROOT/deacon/dogs/active/{dog-id}.done ```json { "dog_id": "{dog-id}", diff --git a/internal/formula/formulas/mol-town-shutdown.formula.toml b/internal/formula/formulas/mol-town-shutdown.formula.toml index 82f30ab2..0e76c72a 100644 --- a/internal/formula/formulas/mol-town-shutdown.formula.toml +++ b/internal/formula/formulas/mol-town-shutdown.formula.toml @@ -132,7 +132,7 @@ gt daemon rotate-logs gt doctor --fix ``` -Old logs are moved to `~/gt/logs/archive/` with timestamps. +Old logs are moved to `$GT_ROOT/logs/archive/` with timestamps. """ [[steps]] diff --git a/internal/git/git.go b/internal/git/git.go index 2eb3339d..8a0775b1 100644 --- a/internal/git/git.go +++ b/internal/git/git.go @@ -254,18 +254,25 @@ func configureHooksPath(repoPath string) error { // and origin/main never appears in refs/remotes/origin/main. // See: https://github.com/anthropics/gastown/issues/286 func configureRefspec(repoPath string) error { - cmd := exec.Command("git", "-C", repoPath, "config", "remote.origin.fetch", "+refs/heads/*:refs/remotes/origin/*") + gitDir := repoPath + if _, err := os.Stat(filepath.Join(repoPath, ".git")); err == nil { + gitDir = filepath.Join(repoPath, ".git") + } + gitDir = filepath.Clean(gitDir) + var stderr bytes.Buffer - cmd.Stderr = &stderr - if err := cmd.Run(); err != nil { + configCmd := exec.Command("git", "--git-dir", gitDir, "config", "remote.origin.fetch", "+refs/heads/*:refs/remotes/origin/*") + configCmd.Stderr = &stderr + if err := configCmd.Run(); err != nil { return fmt.Errorf("configuring refspec: %s", strings.TrimSpace(stderr.String())) } - // Fetch to populate refs/remotes/origin/* so worktrees can use origin/main - fetchCmd := exec.Command("git", "-C", repoPath, "fetch", "origin") + + fetchCmd := exec.Command("git", "--git-dir", gitDir, "fetch", "origin") fetchCmd.Stderr = &stderr if err := fetchCmd.Run(); err != nil { return fmt.Errorf("fetching origin: %s", strings.TrimSpace(stderr.String())) } + return nil } diff --git a/internal/git/git_test.go b/internal/git/git_test.go index 3cc58834..860685a7 100644 --- a/internal/git/git_test.go +++ b/internal/git/git_test.go @@ -4,6 +4,7 @@ import ( "os" "os/exec" "path/filepath" + "strings" "testing" ) @@ -443,7 +444,7 @@ func TestCloneBareHasOriginRefs(t *testing.T) { if err != nil { t.Fatalf("git branch --show-current: %v", err) } - mainBranch := string(out[:len(out)-1]) // trim newline + mainBranch := strings.TrimSpace(string(out)) // Clone as bare repo using our CloneBare function bareDir := filepath.Join(tmp, "bare.git") @@ -454,8 +455,7 @@ func TestCloneBareHasOriginRefs(t *testing.T) { // Verify origin/main exists (this was the bug - it didn't exist before the fix) bareGit := NewGitWithDir(bareDir, "") - cmd = exec.Command("git", "branch", "-r") - cmd.Dir = bareDir + cmd = exec.Command("git", "--git-dir", bareDir, "branch", "-r") out, err = cmd.Output() if err != nil { t.Fatalf("git branch -r: %v", err) diff --git a/internal/lock/lock.go b/internal/lock/lock.go index f2e1a706..af54d694 100644 --- a/internal/lock/lock.go +++ b/internal/lock/lock.go @@ -16,7 +16,6 @@ import ( "os" "os/exec" "path/filepath" - "syscall" "time" ) @@ -193,23 +192,6 @@ func (l *Lock) write(sessionID string) error { return nil } -// processExists checks if a process with the given PID exists and is alive. -func processExists(pid int) bool { - if pid <= 0 { - return false - } - - // On Unix, sending signal 0 checks if process exists without affecting it - process, err := os.FindProcess(pid) - if err != nil { - return false - } - - // Try to send signal 0 - this will fail if process doesn't exist - err = process.Signal(syscall.Signal(0)) - return err == nil -} - // FindAllLocks scans a directory tree for agent.lock files. // Returns a map of worker directory -> LockInfo. func FindAllLocks(root string) (map[string]*LockInfo, error) { diff --git a/internal/lock/process_unix.go b/internal/lock/process_unix.go new file mode 100644 index 00000000..9601f2af --- /dev/null +++ b/internal/lock/process_unix.go @@ -0,0 +1,25 @@ +//go:build !windows + +package lock + +import ( + "os" + "syscall" +) + +// processExists checks if a process with the given PID exists and is alive. +func processExists(pid int) bool { + if pid <= 0 { + return false + } + + // On Unix, sending signal 0 checks if process exists without affecting it. + process, err := os.FindProcess(pid) + if err != nil { + return false + } + + // Try to send signal 0 - this will fail if process doesn't exist. + err = process.Signal(syscall.Signal(0)) + return err == nil +} diff --git a/internal/lock/process_windows.go b/internal/lock/process_windows.go new file mode 100644 index 00000000..e537cb14 --- /dev/null +++ b/internal/lock/process_windows.go @@ -0,0 +1,22 @@ +//go:build windows + +package lock + +import "golang.org/x/sys/windows" + +// processExists checks if a process with the given PID exists and is alive. +func processExists(pid int) bool { + if pid <= 0 { + return false + } + + handle, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(pid)) + if err != nil { + if err == windows.ERROR_ACCESS_DENIED { + return true + } + return false + } + _ = windows.CloseHandle(handle) + return true +} diff --git a/internal/mail/mailbox.go b/internal/mail/mailbox.go index 250ed304..e9808d50 100644 --- a/internal/mail/mailbox.go +++ b/internal/mail/mailbox.go @@ -56,7 +56,7 @@ func NewMailboxBeads(identity, workDir string) *Mailbox { func NewMailboxFromAddress(address, workDir string) *Mailbox { beadsDir := beads.ResolveBeadsDir(workDir) return &Mailbox{ - identity: addressToIdentity(address), + identity: AddressToIdentity(address), workDir: workDir, beadsDir: beadsDir, legacy: false, @@ -66,7 +66,7 @@ func NewMailboxFromAddress(address, workDir string) *Mailbox { // NewMailboxWithBeadsDir creates a mailbox with an explicit beads directory. func NewMailboxWithBeadsDir(address, workDir, beadsDir string) *Mailbox { return &Mailbox{ - identity: addressToIdentity(address), + identity: AddressToIdentity(address), workDir: workDir, beadsDir: beadsDir, legacy: false, diff --git a/internal/mail/mailbox_test.go b/internal/mail/mailbox_test.go index 5e7eb87b..e58977af 100644 --- a/internal/mail/mailbox_test.go +++ b/internal/mail/mailbox_test.go @@ -11,7 +11,7 @@ import ( func TestNewMailbox(t *testing.T) { m := NewMailbox("/tmp/test") - if m.path != "/tmp/test/inbox.jsonl" { + if filepath.ToSlash(m.path) != "/tmp/test/inbox.jsonl" { t.Errorf("NewMailbox path = %q, want %q", m.path, "/tmp/test/inbox.jsonl") } if !m.legacy { @@ -332,7 +332,7 @@ func TestMailboxIdentityAndPath(t *testing.T) { if legacy.Identity() != "" { t.Errorf("Legacy mailbox identity = %q, want empty", legacy.Identity()) } - if legacy.Path() != "/tmp/test/inbox.jsonl" { + if filepath.ToSlash(legacy.Path()) != "/tmp/test/inbox.jsonl" { t.Errorf("Legacy mailbox path = %q, want /tmp/test/inbox.jsonl", legacy.Path()) } @@ -379,7 +379,7 @@ func TestNewMailboxWithBeadsDir(t *testing.T) { if m.identity != "gastown/Toast" { t.Errorf("identity = %q, want 'gastown/Toast'", m.identity) } - if m.beadsDir != "/custom/.beads" { + if filepath.ToSlash(m.beadsDir) != "/custom/.beads" { t.Errorf("beadsDir = %q, want '/custom/.beads'", m.beadsDir) } } diff --git a/internal/mail/router.go b/internal/mail/router.go index af29c8a6..602f19b9 100644 --- a/internal/mail/router.go +++ b/internal/mail/router.go @@ -569,7 +569,7 @@ func (r *Router) sendToGroup(msg *Message) error { // sendToSingle sends a message to a single recipient. func (r *Router) sendToSingle(msg *Message) error { // Convert addresses to beads identities - toIdentity := addressToIdentity(msg.To) + toIdentity := AddressToIdentity(msg.To) // Build labels for from/thread/reply-to/cc var labels []string @@ -582,7 +582,7 @@ func (r *Router) sendToSingle(msg *Message) error { } // Add CC labels (one per recipient) for _, cc := range msg.CC { - ccIdentity := addressToIdentity(cc) + ccIdentity := AddressToIdentity(cc) labels = append(labels, "cc:"+ccIdentity) } @@ -692,7 +692,7 @@ func (r *Router) sendToQueue(msg *Message) error { labels = append(labels, "reply-to:"+msg.ReplyTo) } for _, cc := range msg.CC { - ccIdentity := addressToIdentity(cc) + ccIdentity := AddressToIdentity(cc) labels = append(labels, "cc:"+ccIdentity) } @@ -763,7 +763,7 @@ func (r *Router) sendToAnnounce(msg *Message) error { labels = append(labels, "reply-to:"+msg.ReplyTo) } for _, cc := range msg.CC { - ccIdentity := addressToIdentity(cc) + ccIdentity := AddressToIdentity(cc) labels = append(labels, "cc:"+ccIdentity) } @@ -836,7 +836,7 @@ func (r *Router) sendToChannel(msg *Message) error { labels = append(labels, "reply-to:"+msg.ReplyTo) } for _, cc := range msg.CC { - ccIdentity := addressToIdentity(cc) + ccIdentity := AddressToIdentity(cc) labels = append(labels, "cc:"+ccIdentity) } diff --git a/internal/mail/router_test.go b/internal/mail/router_test.go index 0b53e387..84e4c874 100644 --- a/internal/mail/router_test.go +++ b/internal/mail/router_test.go @@ -198,7 +198,7 @@ func TestResolveBeadsDir(t *testing.T) { r := NewRouterWithTownRoot("/work/dir", "/home/user/gt") got := r.resolveBeadsDir("gastown/Toast") want := "/home/user/gt/.beads" - if got != want { + if filepath.ToSlash(got) != want { t.Errorf("resolveBeadsDir with townRoot = %q, want %q", got, want) } @@ -206,17 +206,17 @@ func TestResolveBeadsDir(t *testing.T) { r2 := &Router{workDir: "/work/dir", townRoot: ""} got2 := r2.resolveBeadsDir("mayor/") want2 := "/work/dir/.beads" - if got2 != want2 { + if filepath.ToSlash(got2) != want2 { t.Errorf("resolveBeadsDir without townRoot = %q, want %q", got2, want2) } } func TestNewRouterWithTownRoot(t *testing.T) { r := NewRouterWithTownRoot("/work/rig", "/home/gt") - if r.workDir != "/work/rig" { + if filepath.ToSlash(r.workDir) != "/work/rig" { t.Errorf("workDir = %q, want '/work/rig'", r.workDir) } - if r.townRoot != "/home/gt" { + if filepath.ToSlash(r.townRoot) != "/home/gt" { t.Errorf("townRoot = %q, want '/home/gt'", r.townRoot) } } diff --git a/internal/mail/types.go b/internal/mail/types.go index b2a2346d..53ddaffa 100644 --- a/internal/mail/types.go +++ b/internal/mail/types.go @@ -488,7 +488,7 @@ func ParseMessageType(s string) MessageType { } } -// addressToIdentity converts a GGT address to a beads identity. +// AddressToIdentity converts a GGT address to a beads identity. // // Liberal normalization: accepts multiple address formats and normalizes // to canonical form (Postel's Law - be liberal in what you accept). @@ -504,7 +504,7 @@ func ParseMessageType(s string) MessageType { // - "gastown/Toast" → "gastown/Toast" (already canonical) // - "gastown/refinery" → "gastown/refinery" // - "gastown/" → "gastown" (rig broadcast) -func addressToIdentity(address string) string { +func AddressToIdentity(address string) string { // Overseer (human operator) - no trailing slash, distinct from agents if address == "overseer" { return "overseer" diff --git a/internal/mail/types_test.go b/internal/mail/types_test.go index 0be64ef7..85adb9d7 100644 --- a/internal/mail/types_test.go +++ b/internal/mail/types_test.go @@ -30,9 +30,9 @@ func TestAddressToIdentity(t *testing.T) { for _, tt := range tests { t.Run(tt.address, func(t *testing.T) { - got := addressToIdentity(tt.address) + got := AddressToIdentity(tt.address) if got != tt.expected { - t.Errorf("addressToIdentity(%q) = %q, want %q", tt.address, got, tt.expected) + t.Errorf("AddressToIdentity(%q) = %q, want %q", tt.address, got, tt.expected) } }) } diff --git a/internal/opencode/plugin_test.go b/internal/opencode/plugin_test.go index 4840bf09..97b3be01 100644 --- a/internal/opencode/plugin_test.go +++ b/internal/opencode/plugin_test.go @@ -3,6 +3,7 @@ package opencode import ( "os" "path/filepath" + "runtime" "testing" ) @@ -128,6 +129,10 @@ func TestEnsurePluginAt_CreatesDirectory(t *testing.T) { } func TestEnsurePluginAt_FilePermissions(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("file mode checks are not reliable on Windows") + } + // Create a temporary directory tmpDir := t.TempDir() diff --git a/internal/polecat/manager.go b/internal/polecat/manager.go index e23d7abc..ea10f686 100644 --- a/internal/polecat/manager.go +++ b/internal/polecat/manager.go @@ -334,6 +334,11 @@ func (m *Manager) AddWithOptions(name string, opts AddOptions) (*Polecat, error) fmt.Printf("Warning: could not copy overlay files: %v\n", err) } + // Ensure .gitignore has required Gas Town patterns + if err := rig.EnsureGitignorePatterns(clonePath); err != nil { + fmt.Printf("Warning: could not update .gitignore: %v\n", err) + } + // Run setup hooks from .runtime/setup-hooks/. // These hooks can inject local git config, copy secrets, or perform other setup tasks. if err := rig.RunSetupHooks(m.rig.Path, clonePath); err != nil { @@ -353,7 +358,6 @@ func (m *Manager) AddWithOptions(name string, opts AddOptions) (*Polecat, error) RoleType: "polecat", Rig: m.rig.Name, AgentState: "spawning", - RoleBead: beads.RoleBeadIDTown("polecat"), HookBead: opts.HookBead, // Set atomically at spawn time }) if err != nil { @@ -639,6 +643,11 @@ func (m *Manager) RepairWorktreeWithOptions(name string, force bool, opts AddOpt fmt.Printf("Warning: could not copy overlay files: %v\n", err) } + // Ensure .gitignore has required Gas Town patterns + if err := rig.EnsureGitignorePatterns(newClonePath); err != nil { + fmt.Printf("Warning: could not update .gitignore: %v\n", err) + } + // NOTE: Slash commands inherited from town level - no per-workspace copies needed. // Create or reopen agent bead for ZFC compliance @@ -648,7 +657,6 @@ func (m *Manager) RepairWorktreeWithOptions(name string, force bool, opts AddOpt RoleType: "polecat", Rig: m.rig.Name, AgentState: "spawning", - RoleBead: beads.RoleBeadIDTown("polecat"), HookBead: opts.HookBead, // Set atomically at spawn time }) if err != nil { @@ -721,12 +729,13 @@ func (m *Manager) ReconcilePoolWith(namesWithDirs, namesWithSessions []string) { dirSet[name] = true } - // Kill orphaned sessions (session exists but no directory) + // Kill orphaned sessions (session exists but no directory). + // Use KillSessionWithProcesses to ensure all descendant processes are killed. if m.tmux != nil { for _, name := range namesWithSessions { if !dirSet[name] { sessionName := fmt.Sprintf("gt-%s-%s", m.rig.Name, name) - _ = m.tmux.KillSession(sessionName) + _ = m.tmux.KillSessionWithProcesses(sessionName) } } } diff --git a/internal/polecat/manager_test.go b/internal/polecat/manager_test.go index 0f0abb73..72676f6e 100644 --- a/internal/polecat/manager_test.go +++ b/internal/polecat/manager_test.go @@ -5,6 +5,7 @@ import ( "os/exec" "path/filepath" "sort" + "strings" "testing" "github.com/steveyegge/gastown/internal/git" @@ -121,7 +122,7 @@ func TestPolecatDir(t *testing.T) { dir := m.polecatDir("Toast") expected := "/home/user/ai/test-rig/polecats/Toast" - if dir != expected { + if filepath.ToSlash(dir) != expected { t.Errorf("polecatDir = %q, want %q", dir, expected) } } @@ -354,8 +355,10 @@ func TestAddWithOptions_HasAgentsMD(t *testing.T) { if err != nil { t.Fatalf("read worktree AGENTS.md: %v", err) } - if string(content) != string(agentsMDContent) { - t.Errorf("AGENTS.md content = %q, want %q", string(content), string(agentsMDContent)) + gotContent := strings.ReplaceAll(string(content), "\r\n", "\n") + wantContent := strings.ReplaceAll(string(agentsMDContent), "\r\n", "\n") + if gotContent != wantContent { + t.Errorf("AGENTS.md content = %q, want %q", gotContent, wantContent) } } @@ -437,8 +440,10 @@ func TestAddWithOptions_AgentsMDFallback(t *testing.T) { if err != nil { t.Fatalf("read worktree AGENTS.md: %v", err) } - if string(content) != string(agentsMDContent) { - t.Errorf("AGENTS.md content = %q, want %q", string(content), string(agentsMDContent)) + gotContent := strings.ReplaceAll(string(content), "\r\n", "\n") + wantContent := strings.ReplaceAll(string(agentsMDContent), "\r\n", "\n") + if gotContent != wantContent { + t.Errorf("AGENTS.md content = %q, want %q", gotContent, wantContent) } } // TestReconcilePoolWith tests all permutations of directory and session existence. diff --git a/internal/polecat/namepool.go b/internal/polecat/namepool.go index bc71f030..ade0be3d 100644 --- a/internal/polecat/namepool.go +++ b/internal/polecat/namepool.go @@ -378,7 +378,7 @@ func ThemeForRig(rigName string) string { for _, b := range []byte(rigName) { hash = hash*31 + uint32(b) } - return themes[hash%uint32(len(themes))] + return themes[hash%uint32(len(themes))] //nolint:gosec // len(themes) is small constant } // GetThemeNames returns the names in a specific theme. diff --git a/internal/polecat/session_manager.go b/internal/polecat/session_manager.go index d5f93521..3bb7e078 100644 --- a/internal/polecat/session_manager.go +++ b/internal/polecat/session_manager.go @@ -289,7 +289,9 @@ func (m *SessionManager) Stop(polecat string, force bool) error { time.Sleep(100 * time.Millisecond) } - if err := m.tmux.KillSession(sessionID); err != nil { + // Use KillSessionWithProcesses to ensure all descendant processes are killed. + // This prevents orphan bash processes from Claude's Bash tool surviving session termination. + if err := m.tmux.KillSessionWithProcesses(sessionID); err != nil { return fmt.Errorf("killing session: %w", err) } diff --git a/internal/polecat/session_manager_test.go b/internal/polecat/session_manager_test.go index 30eaf769..4b9008aa 100644 --- a/internal/polecat/session_manager_test.go +++ b/internal/polecat/session_manager_test.go @@ -2,7 +2,9 @@ package polecat import ( "os" + "os/exec" "path/filepath" + "runtime" "strings" "testing" @@ -10,6 +12,17 @@ import ( "github.com/steveyegge/gastown/internal/tmux" ) +func requireTmux(t *testing.T) { + t.Helper() + + if runtime.GOOS == "windows" { + t.Skip("tmux not supported on Windows") + } + if _, err := exec.LookPath("tmux"); err != nil { + t.Skip("tmux not installed") + } +} + func TestSessionName(t *testing.T) { r := &rig.Rig{ Name: "gastown", @@ -33,7 +46,7 @@ func TestSessionManagerPolecatDir(t *testing.T) { dir := m.polecatDir("Toast") expected := "/home/user/ai/gastown/polecats/Toast" - if dir != expected { + if filepath.ToSlash(dir) != expected { t.Errorf("polecatDir = %q, want %q", dir, expected) } } @@ -79,6 +92,8 @@ func TestStartPolecatNotFound(t *testing.T) { } func TestIsRunningNoSession(t *testing.T) { + requireTmux(t) + r := &rig.Rig{ Name: "gastown", Polecats: []string{"Toast"}, @@ -95,6 +110,8 @@ func TestIsRunningNoSession(t *testing.T) { } func TestSessionManagerListEmpty(t *testing.T) { + requireTmux(t) + r := &rig.Rig{ Name: "test-rig-unlikely-name", Polecats: []string{}, @@ -111,6 +128,8 @@ func TestSessionManagerListEmpty(t *testing.T) { } func TestStopNotFound(t *testing.T) { + requireTmux(t) + r := &rig.Rig{ Name: "test-rig", Polecats: []string{"Toast"}, @@ -124,6 +143,8 @@ func TestStopNotFound(t *testing.T) { } func TestCaptureNotFound(t *testing.T) { + requireTmux(t) + r := &rig.Rig{ Name: "test-rig", Polecats: []string{"Toast"}, @@ -137,6 +158,8 @@ func TestCaptureNotFound(t *testing.T) { } func TestInjectNotFound(t *testing.T) { + requireTmux(t) + r := &rig.Rig{ Name: "test-rig", Polecats: []string{"Toast"}, diff --git a/internal/refinery/engineer.go b/internal/refinery/engineer.go index b0f7ffde..526aca87 100644 --- a/internal/refinery/engineer.go +++ b/internal/refinery/engineer.go @@ -14,6 +14,7 @@ import ( "time" "github.com/steveyegge/gastown/internal/beads" + "github.com/steveyegge/gastown/internal/convoy" "github.com/steveyegge/gastown/internal/git" "github.com/steveyegge/gastown/internal/mail" "github.com/steveyegge/gastown/internal/protocol" @@ -449,6 +450,12 @@ func (e *Engineer) handleSuccess(mr *beads.Issue, result ProcessResult) { _, _ = fmt.Fprintf(e.output, "[Engineer] Warning: failed to close source issue %s: %v\n", mrFields.SourceIssue, err) } else { _, _ = fmt.Fprintf(e.output, "[Engineer] Closed source issue: %s\n", mrFields.SourceIssue) + + // Redundant convoy observer: check if merged issue is tracked by a convoy + logger := func(format string, args ...interface{}) { + _, _ = fmt.Fprintf(e.output, "[Engineer] "+format+"\n", args...) + } + convoy.CheckConvoysForIssue(e.rig.Path, mrFields.SourceIssue, "refinery", logger) } } @@ -557,6 +564,12 @@ func (e *Engineer) HandleMRInfoSuccess(mr *MRInfo, result ProcessResult) { _, _ = fmt.Fprintf(e.output, "[Engineer] Warning: failed to close source issue %s: %v\n", mr.SourceIssue, err) } else { _, _ = fmt.Fprintf(e.output, "[Engineer] Closed source issue: %s\n", mr.SourceIssue) + + // Redundant convoy observer: check if merged issue is tracked by a convoy + logger := func(format string, args ...interface{}) { + _, _ = fmt.Fprintf(e.output, "[Engineer] "+format+"\n", args...) + } + convoy.CheckConvoysForIssue(e.rig.Path, mr.SourceIssue, "refinery", logger) } } diff --git a/internal/refinery/manager.go b/internal/refinery/manager.go index 0534cab4..b5bcbd8d 100644 --- a/internal/refinery/manager.go +++ b/internal/refinery/manager.go @@ -1,7 +1,6 @@ package refinery import ( - "encoding/json" "errors" "fmt" "io" @@ -52,89 +51,50 @@ func (m *Manager) SetOutput(w io.Writer) { m.output = w } -// stateFile returns the path to the refinery state file. -func (m *Manager) stateFile() string { - return filepath.Join(m.rig.Path, ".runtime", "refinery.json") -} - // SessionName returns the tmux session name for this refinery. func (m *Manager) SessionName() string { return fmt.Sprintf("gt-%s-refinery", m.rig.Name) } -// loadState loads refinery state from disk. -func (m *Manager) loadState() (*Refinery, error) { - data, err := os.ReadFile(m.stateFile()) +// IsRunning checks if the refinery session is active. +// ZFC: tmux session existence is the source of truth. +func (m *Manager) IsRunning() (bool, error) { + t := tmux.NewTmux() + return t.HasSession(m.SessionName()) +} + +// Status returns information about the refinery session. +// ZFC-compliant: tmux session is the source of truth. +func (m *Manager) Status() (*tmux.SessionInfo, error) { + t := tmux.NewTmux() + sessionID := m.SessionName() + + running, err := t.HasSession(sessionID) if err != nil { - if os.IsNotExist(err) { - return &Refinery{ - RigName: m.rig.Name, - State: StateStopped, - }, nil - } - return nil, err + return nil, fmt.Errorf("checking session: %w", err) + } + if !running { + return nil, ErrNotRunning } - var ref Refinery - if err := json.Unmarshal(data, &ref); err != nil { - return nil, err - } - - return &ref, nil -} - -// saveState persists refinery state to disk using atomic write. -func (m *Manager) saveState(ref *Refinery) error { - dir := filepath.Dir(m.stateFile()) - if err := os.MkdirAll(dir, 0755); err != nil { - return err - } - - return util.AtomicWriteJSON(m.stateFile(), ref) -} - -// Status returns the current refinery status. -// ZFC-compliant: trusts agent-reported state, no PID/tmux inference. -// The daemon reads agent bead state for liveness checks. -func (m *Manager) Status() (*Refinery, error) { - return m.loadState() + return t.GetSessionInfo(sessionID) } // Start starts the refinery. -// If foreground is true, runs in the current process (blocking) using the Go-based polling loop. +// If foreground is true, returns an error (foreground mode deprecated). // Otherwise, spawns a Claude agent in a tmux session to process the merge queue. // The agentOverride parameter allows specifying an agent alias to use instead of the town default. +// ZFC-compliant: no state file, tmux session is source of truth. func (m *Manager) Start(foreground bool, agentOverride string) error { - ref, err := m.loadState() - if err != nil { - return err - } - t := tmux.NewTmux() sessionID := m.SessionName() if foreground { - // In foreground mode, check tmux session (no PID inference per ZFC) - // Use IsClaudeRunning for robust detection (see gastown#566) - if running, _ := t.HasSession(sessionID); running && t.IsClaudeRunning(sessionID) { - return ErrAlreadyRunning - } - - // Running in foreground - update state and run the Go-based polling loop - now := time.Now() - ref.State = StateRunning - ref.StartedAt = &now - ref.PID = 0 // No longer track PID (ZFC) - - if err := m.saveState(ref); err != nil { - return err - } - - // Run the processing loop (blocking) - return m.run(ref) + // Foreground mode is deprecated - the Refinery agent handles merge processing + return fmt.Errorf("foreground mode is deprecated; use background mode (remove --foreground flag)") } - // Background mode: check if session already exists + // Check if session already exists running, _ := t.HasSession(sessionID) if running { // Session exists - check if Claude is actually running (healthy vs zombie) @@ -213,16 +173,6 @@ func (m *Manager) Start(foreground bool, agentOverride string) error { theme := tmux.AssignTheme(m.rig.Name) _ = t.ConfigureGasTownSession(sessionID, theme, m.rig.Name, "refinery", "refinery") - // Update state to running - now := time.Now() - ref.State = StateRunning - ref.StartedAt = &now - ref.PID = 0 // Claude agent doesn't have a PID we track - if err := m.saveState(ref); err != nil { - _ = t.KillSession(sessionID) // best-effort cleanup on state save failure - return fmt.Errorf("saving state: %w", err) - } - // Wait for Claude to start and show its prompt - fatal if Claude fails to launch // WaitForRuntimeReady waits for the runtime to be ready if err := t.WaitForRuntimeReady(sessionID, runtimeConfig, constants.ClaudeStartTimeout); err != nil { @@ -256,37 +206,24 @@ func (m *Manager) Start(foreground bool, agentOverride string) error { } // Stop stops the refinery. +// ZFC-compliant: tmux session is the source of truth. func (m *Manager) Stop() error { - ref, err := m.loadState() - if err != nil { - return err - } - - // Check if tmux session exists t := tmux.NewTmux() sessionID := m.SessionName() - sessionRunning, _ := t.HasSession(sessionID) - // If neither state nor session indicates running, it's not running - if ref.State != StateRunning && !sessionRunning { + // Check if tmux session exists + running, _ := t.HasSession(sessionID) + if !running { return ErrNotRunning } - // Kill tmux session if it exists (best-effort: may already be dead) - if sessionRunning { - _ = t.KillSession(sessionID) - } - - // Note: No PID-based stop per ZFC - tmux session kill is sufficient - - ref.State = StateStopped - ref.PID = 0 - - return m.saveState(ref) + // Kill the tmux session + return t.KillSession(sessionID) } // Queue returns the current merge queue. // Uses beads merge-request issues as the source of truth (not git branches). +// ZFC-compliant: beads is the source of truth, no state file. func (m *Manager) Queue() ([]QueueItem, error) { // Query beads for open merge-request type issues // BeadsPath() returns the git-synced beads location @@ -300,25 +237,6 @@ func (m *Manager) Queue() ([]QueueItem, error) { return nil, fmt.Errorf("querying merge queue from beads: %w", err) } - // Load any current processing state - ref, err := m.loadState() - if err != nil { - return nil, err - } - - // Build queue items - var items []QueueItem - pos := 1 - - // Add current processing item - if ref.CurrentMR != nil { - items = append(items, QueueItem{ - Position: 0, // 0 = currently processing - MR: ref.CurrentMR, - Age: formatAge(ref.CurrentMR.CreatedAt), - }) - } - // Score and sort issues by priority score (highest first) now := time.Now() type scoredIssue struct { @@ -336,13 +254,11 @@ func (m *Manager) Queue() ([]QueueItem, error) { }) // Convert scored issues to queue items + var items []QueueItem + pos := 1 for _, s := range scored { mr := m.issueToMR(s.issue) if mr != nil { - // Skip if this is the currently processing MR - if ref.CurrentMR != nil && ref.CurrentMR.ID == mr.ID { - continue - } items = append(items, QueueItem{ Position: pos, MR: mr, @@ -437,21 +353,6 @@ func parseTime(s string) time.Time { return t } -// run is deprecated - foreground mode now just prints a message. -// The Refinery agent (Claude) handles all merge processing. -// See: ZFC #5 - Move merge/conflict decisions from Go to Refinery agent -func (m *Manager) run(_ *Refinery) error { // ref unused: deprecated function - _, _ = fmt.Fprintln(m.output, "") - _, _ = fmt.Fprintln(m.output, "╔══════════════════════════════════════════════════════════════╗") - _, _ = fmt.Fprintln(m.output, "║ Foreground mode is deprecated. ║") - _, _ = fmt.Fprintln(m.output, "║ ║") - _, _ = fmt.Fprintln(m.output, "║ The Refinery agent (Claude) handles all merge decisions. ║") - _, _ = fmt.Fprintln(m.output, "║ Use 'gt refinery start' to run in background mode. ║") - _, _ = fmt.Fprintln(m.output, "╚══════════════════════════════════════════════════════════════╝") - _, _ = fmt.Fprintln(m.output, "") - return nil -} - // MergeResult contains the result of a merge attempt. type MergeResult struct { Success bool @@ -484,12 +385,10 @@ func (m *Manager) ProcessMR(mr *MergeRequest) MergeResult { // completeMR marks an MR as complete. // For success, pass closeReason (e.g., CloseReasonMerged). // For failures that should return to open, pass empty closeReason. +// ZFC-compliant: no state file, just updates MR and emits events. +// Deprecated: The Refinery agent handles merge processing (ZFC #5). func (m *Manager) completeMR(mr *MergeRequest, closeReason CloseReason, errMsg string) { - ref, _ := m.loadState() mr.Error = errMsg - ref.CurrentMR = nil - - now := time.Now() actor := fmt.Sprintf("%s/refinery", m.rig.Name) if closeReason != "" { @@ -498,10 +397,7 @@ func (m *Manager) completeMR(mr *MergeRequest, closeReason CloseReason, errMsg s // Log error but continue - this shouldn't happen _, _ = fmt.Fprintf(m.output, "Warning: failed to close MR: %v\n", err) } - switch closeReason { - case CloseReasonMerged: - ref.LastMergeAt = &now - case CloseReasonSuperseded: + if closeReason == CloseReasonSuperseded { // Emit merge_skipped event _ = events.LogFeed(events.TypeMergeSkipped, actor, events.MergePayload(mr.ID, mr.Worker, mr.Branch, "superseded")) } @@ -512,8 +408,6 @@ func (m *Manager) completeMR(mr *MergeRequest, closeReason CloseReason, errMsg s _, _ = fmt.Fprintf(m.output, "Warning: failed to reopen MR: %v\n", err) } } - - _ = m.saveState(ref) // non-fatal: state file update } // runTests executes the test command. @@ -634,26 +528,11 @@ var ( ErrMRNotFailed = errors.New("merge request has not failed") ) -// GetMR returns a merge request by ID from the state. +// GetMR returns a merge request by ID. +// ZFC-compliant: delegates to FindMR which uses beads as source of truth. +// Deprecated: Use FindMR directly for more flexible matching. func (m *Manager) GetMR(id string) (*MergeRequest, error) { - ref, err := m.loadState() - if err != nil { - return nil, err - } - - // Check if it's the current MR - if ref.CurrentMR != nil && ref.CurrentMR.ID == id { - return ref.CurrentMR, nil - } - - // Check pending MRs - if ref.PendingMRs != nil { - if mr, ok := ref.PendingMRs[id]; ok { - return mr, nil - } - } - - return nil, ErrMRNotFound + return m.FindMR(id) } // FindMR finds a merge request by ID or branch name in the queue. @@ -684,60 +563,19 @@ func (m *Manager) FindMR(idOrBranch string) (*MergeRequest, error) { return nil, ErrMRNotFound } -// Retry resets a failed merge request so it can be processed again. -// The processNow parameter is deprecated - the Refinery agent handles processing. -// Clearing the error is sufficient; the agent will pick up the MR in its next patrol cycle. -func (m *Manager) Retry(id string, processNow bool) error { - ref, err := m.loadState() - if err != nil { - return err - } - - // Find the MR - var mr *MergeRequest - if ref.PendingMRs != nil { - mr = ref.PendingMRs[id] - } - if mr == nil { - return ErrMRNotFound - } - - // Verify it's in a failed state (open with an error) - if mr.Status != MROpen || mr.Error == "" { - return ErrMRNotFailed - } - - // Clear the error to mark as ready for retry - mr.Error = "" - - // Save the state - if err := m.saveState(ref); err != nil { - return err - } - - // Note: processNow is deprecated (ZFC #5). - // The Refinery agent handles merge processing. - // It will pick up this MR in its next patrol cycle. - if processNow { - _, _ = fmt.Fprintln(m.output, "Note: --now is deprecated. The Refinery agent will process this MR in its next patrol cycle.") - } - +// Retry is deprecated - the Refinery agent handles retry logic autonomously. +// ZFC-compliant: no state file, agent uses beads issue status. +// The agent will automatically retry failed MRs in its patrol cycle. +func (m *Manager) Retry(_ string, _ bool) error { + _, _ = fmt.Fprintln(m.output, "Note: Retry is deprecated. The Refinery agent handles retries autonomously via beads.") return nil } -// RegisterMR adds a merge request to the pending queue. -func (m *Manager) RegisterMR(mr *MergeRequest) error { - ref, err := m.loadState() - if err != nil { - return err - } - - if ref.PendingMRs == nil { - ref.PendingMRs = make(map[string]*MergeRequest) - } - - ref.PendingMRs[mr.ID] = mr - return m.saveState(ref) +// RegisterMR is deprecated - MRs are registered via beads merge-request issues. +// ZFC-compliant: beads is the source of truth, not state file. +// Use 'gt mr create' or create a merge-request type bead directly. +func (m *Manager) RegisterMR(_ *MergeRequest) error { + return fmt.Errorf("RegisterMR is deprecated: use beads to create merge-request issues") } // RejectMR manually rejects a merge request. diff --git a/internal/refinery/manager_test.go b/internal/refinery/manager_test.go index f701a6d7..2dc2c18b 100644 --- a/internal/refinery/manager_test.go +++ b/internal/refinery/manager_test.go @@ -1,11 +1,9 @@ package refinery import ( - "encoding/json" "os" "path/filepath" "testing" - "time" "github.com/steveyegge/gastown/internal/rig" ) @@ -28,145 +26,96 @@ func setupTestManager(t *testing.T) (*Manager, string) { return NewManager(r), rigPath } -func TestManager_GetMR(t *testing.T) { +func TestManager_SessionName(t *testing.T) { mgr, _ := setupTestManager(t) - // Create a test MR in the pending queue - mr := &MergeRequest{ - ID: "gt-mr-abc123", - Branch: "polecat/Toast/gt-xyz", - Worker: "Toast", - IssueID: "gt-xyz", - Status: MROpen, - Error: "test failure", + want := "gt-testrig-refinery" + got := mgr.SessionName() + if got != want { + t.Errorf("SessionName() = %s, want %s", got, want) } - - if err := mgr.RegisterMR(mr); err != nil { - t.Fatalf("RegisterMR: %v", err) - } - - t.Run("find existing MR", func(t *testing.T) { - found, err := mgr.GetMR("gt-mr-abc123") - if err != nil { - t.Errorf("GetMR() unexpected error: %v", err) - } - if found == nil { - t.Fatal("GetMR() returned nil") - } - if found.ID != mr.ID { - t.Errorf("GetMR() ID = %s, want %s", found.ID, mr.ID) - } - }) - - t.Run("MR not found", func(t *testing.T) { - _, err := mgr.GetMR("nonexistent-mr") - if err != ErrMRNotFound { - t.Errorf("GetMR() error = %v, want %v", err, ErrMRNotFound) - } - }) } -func TestManager_Retry(t *testing.T) { - t.Run("retry failed MR clears error", func(t *testing.T) { - mgr, _ := setupTestManager(t) +func TestManager_IsRunning_NoSession(t *testing.T) { + mgr, _ := setupTestManager(t) - // Create a failed MR - mr := &MergeRequest{ - ID: "gt-mr-failed", - Branch: "polecat/Toast/gt-xyz", - Worker: "Toast", - Status: MROpen, - Error: "merge conflict", - } - - if err := mgr.RegisterMR(mr); err != nil { - t.Fatalf("RegisterMR: %v", err) - } - - // Retry without processing - err := mgr.Retry("gt-mr-failed", false) - if err != nil { - t.Errorf("Retry() unexpected error: %v", err) - } - - // Verify error was cleared - found, _ := mgr.GetMR("gt-mr-failed") - if found.Error != "" { - t.Errorf("Retry() error not cleared, got %s", found.Error) - } - }) - - t.Run("retry non-failed MR fails", func(t *testing.T) { - mgr, _ := setupTestManager(t) - - // Create a successful MR (no error) - mr := &MergeRequest{ - ID: "gt-mr-success", - Branch: "polecat/Toast/gt-abc", - Worker: "Toast", - Status: MROpen, - Error: "", // No error - } - - if err := mgr.RegisterMR(mr); err != nil { - t.Fatalf("RegisterMR: %v", err) - } - - err := mgr.Retry("gt-mr-success", false) - if err != ErrMRNotFailed { - t.Errorf("Retry() error = %v, want %v", err, ErrMRNotFailed) - } - }) - - t.Run("retry nonexistent MR fails", func(t *testing.T) { - mgr, _ := setupTestManager(t) - - err := mgr.Retry("nonexistent", false) - if err != ErrMRNotFound { - t.Errorf("Retry() error = %v, want %v", err, ErrMRNotFound) - } - }) -} - -func TestManager_RegisterMR(t *testing.T) { - mgr, rigPath := setupTestManager(t) - - mr := &MergeRequest{ - ID: "gt-mr-new", - Branch: "polecat/Cheedo/gt-123", - Worker: "Cheedo", - IssueID: "gt-123", - TargetBranch: "main", - CreatedAt: time.Now(), - Status: MROpen, - } - - if err := mgr.RegisterMR(mr); err != nil { - t.Fatalf("RegisterMR: %v", err) - } - - // Verify it was saved to disk - stateFile := filepath.Join(rigPath, ".runtime", "refinery.json") - data, err := os.ReadFile(stateFile) + // Without a tmux session, IsRunning should return false + // Note: this test doesn't create a tmux session, so it tests the "not running" case + running, err := mgr.IsRunning() if err != nil { - t.Fatalf("reading state file: %v", err) + // If tmux server isn't running, HasSession returns an error + // This is expected in test environments without tmux + t.Logf("IsRunning returned error (expected without tmux): %v", err) + return } - var ref Refinery - if err := json.Unmarshal(data, &ref); err != nil { - t.Fatalf("unmarshal state: %v", err) - } - - if ref.PendingMRs == nil { - t.Fatal("PendingMRs is nil") - } - - saved, ok := ref.PendingMRs["gt-mr-new"] - if !ok { - t.Fatal("MR not found in PendingMRs") - } - - if saved.Worker != "Cheedo" { - t.Errorf("saved MR worker = %s, want Cheedo", saved.Worker) + if running { + t.Error("IsRunning() = true, want false (no session created)") + } +} + +func TestManager_Status_NotRunning(t *testing.T) { + mgr, _ := setupTestManager(t) + + // Without a tmux session, Status should return ErrNotRunning + _, err := mgr.Status() + if err == nil { + t.Error("Status() expected error when not running") + } + // May return ErrNotRunning or a tmux server error + t.Logf("Status returned error (expected): %v", err) +} + +func TestManager_Queue_NoBeads(t *testing.T) { + mgr, _ := setupTestManager(t) + + // Queue returns error when no beads database exists + // This is expected - beads requires initialization + _, err := mgr.Queue() + if err == nil { + // If beads is somehow available, queue should be empty + t.Log("Queue() succeeded unexpectedly (beads may be available)") + return + } + // Error is expected when beads isn't initialized + t.Logf("Queue() returned error (expected without beads): %v", err) +} + +func TestManager_FindMR_NoBeads(t *testing.T) { + mgr, _ := setupTestManager(t) + + // FindMR returns error when no beads database exists + _, err := mgr.FindMR("nonexistent-mr") + if err == nil { + t.Error("FindMR() expected error") + } + // Any error is acceptable when beads isn't initialized + t.Logf("FindMR() returned error (expected): %v", err) +} + +func TestManager_RegisterMR_Deprecated(t *testing.T) { + mgr, _ := setupTestManager(t) + + mr := &MergeRequest{ + ID: "gt-mr-test", + Branch: "polecat/Test/gt-123", + Worker: "Test", + Status: MROpen, + } + + // RegisterMR should return an error indicating deprecation + err := mgr.RegisterMR(mr) + if err == nil { + t.Error("RegisterMR() expected error (deprecated)") + } +} + +func TestManager_Retry_Deprecated(t *testing.T) { + mgr, _ := setupTestManager(t) + + // Retry is deprecated and should not error, just print a message + err := mgr.Retry("any-id", false) + if err != nil { + t.Errorf("Retry() unexpected error: %v", err) } } diff --git a/internal/refinery/types.go b/internal/refinery/types.go index 97f8e6b6..179b3b16 100644 --- a/internal/refinery/types.go +++ b/internal/refinery/types.go @@ -1,49 +1,15 @@ // Package refinery provides the merge queue processing agent. +// +// ZFC-compliant: Running state is derived from tmux sessions, not stored in files. +// Merge queue is derived from beads merge-request issues. package refinery import ( "errors" "fmt" "time" - - "github.com/steveyegge/gastown/internal/agent" ) -// State is an alias for agent.State for backwards compatibility. -type State = agent.State - -// State constants - re-exported from agent package for backwards compatibility. -const ( - StateStopped = agent.StateStopped - StateRunning = agent.StateRunning - StatePaused = agent.StatePaused -) - -// Refinery represents a rig's merge queue processor. -type Refinery struct { - // RigName is the rig this refinery processes. - RigName string `json:"rig_name"` - - // State is the current running state. - State State `json:"state"` - - // PID is the process ID if running in background. - PID int `json:"pid,omitempty"` - - // StartedAt is when the refinery was started. - StartedAt *time.Time `json:"started_at,omitempty"` - - // CurrentMR is the merge request currently being processed. - CurrentMR *MergeRequest `json:"current_mr,omitempty"` - - // PendingMRs tracks merge requests that have been submitted. - // Key is the MR ID. - PendingMRs map[string]*MergeRequest `json:"pending_mrs,omitempty"` - - // LastMergeAt is when the last successful merge happened. - LastMergeAt *time.Time `json:"last_merge_at,omitempty"` -} - // MergeRequest represents a branch waiting to be merged. type MergeRequest struct { // ID is a unique identifier for this merge request. diff --git a/internal/rig/manager.go b/internal/rig/manager.go index a7e82833..46895a8d 100644 --- a/internal/rig/manager.go +++ b/internal/rig/manager.go @@ -750,14 +750,12 @@ func (m *Manager) initAgentBeads(rigPath, rigName, prefix string) error { continue // Already exists } - // RoleBead points to the shared role definition bead for this agent type. - // Role beads are in town beads with hq- prefix (e.g., hq-witness-role). + // Note: RoleBead field removed - role definitions are now config-based fields := &beads.AgentFields{ RoleType: agent.roleType, Rig: agent.rig, AgentState: "idle", HookBead: "", - RoleBead: beads.RoleBeadIDTown(agent.roleType), } if _, err := bd.CreateAgentBead(agent.id, agent.desc, fields); err != nil { diff --git a/internal/rig/manager_test.go b/internal/rig/manager_test.go index 797ad56c..82d99a8a 100644 --- a/internal/rig/manager_test.go +++ b/internal/rig/manager_test.go @@ -3,6 +3,7 @@ package rig import ( "os" "path/filepath" + "runtime" "slices" "strings" "testing" @@ -23,9 +24,21 @@ func setupTestTown(t *testing.T) (string, *config.RigsConfig) { return root, rigsConfig } -func writeFakeBD(t *testing.T, script string) string { +func writeFakeBD(t *testing.T, script string, windowsScript string) string { t.Helper() binDir := t.TempDir() + + if runtime.GOOS == "windows" { + if windowsScript == "" { + t.Fatal("windows script is required on Windows") + } + scriptPath := filepath.Join(binDir, "bd.cmd") + if err := os.WriteFile(scriptPath, []byte(windowsScript), 0644); err != nil { + t.Fatalf("write fake bd: %v", err) + } + return binDir + } + scriptPath := filepath.Join(binDir, "bd") if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { t.Fatalf("write fake bd: %v", err) @@ -44,8 +57,9 @@ func assertBeadsDirLog(t *testing.T, logPath, want string) { t.Fatalf("expected beads dir log entries, got none") } for _, line := range lines { - if line != want { - t.Fatalf("BEADS_DIR = %q, want %q", line, want) + trimmed := strings.TrimSuffix(line, "\r") + if trimmed != want { + t.Fatalf("BEADS_DIR = %q, want %q", trimmed, want) } } } @@ -367,7 +381,7 @@ func TestInitBeads_LocalBeads_CreatesDatabase(t *testing.T) { } // Use fake bd that succeeds - script := `#!/usr/bin/env bash +script := `#!/usr/bin/env bash set -e if [[ "$1" == "init" ]]; then # Simulate successful bd init @@ -375,7 +389,8 @@ if [[ "$1" == "init" ]]; then fi exit 0 ` - binDir := writeFakeBD(t, script) + windowsScript := "@echo off\r\nif \"%1\"==\"init\" exit /b 0\r\nexit /b 0\r\n" + binDir := writeFakeBD(t, script, windowsScript) t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH")) manager := &Manager{} @@ -400,7 +415,7 @@ func TestInitBeadsWritesConfigOnFailure(t *testing.T) { rigPath := t.TempDir() beadsDir := filepath.Join(rigPath, ".beads") - script := `#!/usr/bin/env bash +script := `#!/usr/bin/env bash set -e if [[ -n "$BEADS_DIR_LOG" ]]; then echo "${BEADS_DIR:-<unset>}" >> "$BEADS_DIR_LOG" @@ -414,8 +429,9 @@ fi echo "unexpected command: $cmd" >&2 exit 1 ` + windowsScript := "@echo off\r\nif defined BEADS_DIR_LOG (\r\n if defined BEADS_DIR (\r\n echo %BEADS_DIR%>>\"%BEADS_DIR_LOG%\"\r\n ) else (\r\n echo ^<unset^> >>\"%BEADS_DIR_LOG%\"\r\n )\r\n)\r\nif \"%1\"==\"init\" (\r\n exit /b 1\r\n)\r\nexit /b 1\r\n" - binDir := writeFakeBD(t, script) + binDir := writeFakeBD(t, script, windowsScript) beadsDirLog := filepath.Join(t.TempDir(), "beads-dir.log") t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH")) t.Setenv("BEADS_DIR_LOG", beadsDirLog) @@ -437,6 +453,10 @@ exit 1 } func TestInitAgentBeadsUsesRigBeadsDir(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("fake bd stub is not compatible with multiline descriptions on Windows") + } + // Rig-level agent beads (witness, refinery) are stored in rig beads. // Town-level agents (mayor, deacon) are created by gt install in town beads. // This test verifies that rig agent beads are created in the rig directory, @@ -452,7 +472,7 @@ func TestInitAgentBeadsUsesRigBeadsDir(t *testing.T) { // Track which agent IDs were created var createdAgents []string - script := `#!/usr/bin/env bash +script := `#!/usr/bin/env bash set -e if [[ -n "$BEADS_DIR_LOG" ]]; then echo "${BEADS_DIR:-<unset>}" >> "$BEADS_DIR_LOG" @@ -492,8 +512,9 @@ case "$cmd" in ;; esac ` + windowsScript := "@echo off\r\nsetlocal enabledelayedexpansion\r\nif defined BEADS_DIR_LOG (\r\n if defined BEADS_DIR (\r\n echo %BEADS_DIR%>>\"%BEADS_DIR_LOG%\"\r\n ) else (\r\n echo ^<unset^> >>\"%BEADS_DIR_LOG%\"\r\n )\r\n)\r\nset \"cmd=%1\"\r\nset \"arg2=%2\"\r\nset \"arg3=%3\"\r\nif \"%cmd%\"==\"--no-daemon\" (\r\n set \"cmd=%2\"\r\n set \"arg2=%3\"\r\n set \"arg3=%4\"\r\n)\r\nif \"%cmd%\"==\"--allow-stale\" (\r\n set \"cmd=%2\"\r\n set \"arg2=%3\"\r\n set \"arg3=%4\"\r\n)\r\nif \"%cmd%\"==\"show\" (\r\n echo []\r\n exit /b 0\r\n)\r\nif \"%cmd%\"==\"create\" (\r\n set \"id=\"\r\n set \"title=\"\r\n for %%A in (%*) do (\r\n set \"arg=%%~A\"\r\n if /i \"!arg:~0,5!\"==\"--id=\" set \"id=!arg:~5!\"\r\n if /i \"!arg:~0,8!\"==\"--title=\" set \"title=!arg:~8!\"\r\n )\r\n if defined AGENT_LOG (\r\n echo !id!>>\"%AGENT_LOG%\"\r\n )\r\n echo {\"id\":\"!id!\",\"title\":\"!title!\",\"description\":\"\",\"issue_type\":\"agent\"}\r\n exit /b 0\r\n)\r\nif \"%cmd%\"==\"slot\" exit /b 0\r\nexit /b 1\r\n" - binDir := writeFakeBD(t, script) + binDir := writeFakeBD(t, script, windowsScript) agentLog := filepath.Join(t.TempDir(), "agents.log") beadsDirLog := filepath.Join(t.TempDir(), "beads-dir.log") t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH")) diff --git a/internal/rig/overlay.go b/internal/rig/overlay.go index fa0d4a97..e764b40b 100644 --- a/internal/rig/overlay.go +++ b/internal/rig/overlay.go @@ -5,6 +5,7 @@ import ( "io" "os" "path/filepath" + "strings" ) // CopyOverlay copies files from <rigPath>/.runtime/overlay/ to the destination path. @@ -55,6 +56,75 @@ func CopyOverlay(rigPath, destPath string) error { return nil } +// EnsureGitignorePatterns ensures the .gitignore has required Gas Town patterns. +// This is called after cloning to add patterns that may be missing from the source repo. +func EnsureGitignorePatterns(worktreePath string) error { + gitignorePath := filepath.Join(worktreePath, ".gitignore") + + // Required patterns for Gas Town worktrees + requiredPatterns := []string{ + ".runtime/", + ".claude/", + ".beads/", + ".logs/", + } + + // Read existing gitignore content + var existingContent string + if data, err := os.ReadFile(gitignorePath); err == nil { + existingContent = string(data) + } + + // Find missing patterns + var missing []string + for _, pattern := range requiredPatterns { + // Check various forms: .runtime, .runtime/, /.runtime, etc. + found := false + for _, line := range strings.Split(existingContent, "\n") { + line = strings.TrimSpace(line) + if line == pattern || line == strings.TrimSuffix(pattern, "/") || + line == "/"+pattern || line == "/"+strings.TrimSuffix(pattern, "/") { + found = true + break + } + } + if !found { + missing = append(missing, pattern) + } + } + + if len(missing) == 0 { + return nil // All patterns present + } + + // Append missing patterns + f, err := os.OpenFile(gitignorePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("opening .gitignore: %w", err) + } + defer f.Close() + + // Add header if appending to existing file + if existingContent != "" && !strings.HasSuffix(existingContent, "\n") { + if _, err := f.WriteString("\n"); err != nil { + return err + } + } + if existingContent != "" { + if _, err := f.WriteString("\n# Gas Town (added by gt)\n"); err != nil { + return err + } + } + + for _, pattern := range missing { + if _, err := f.WriteString(pattern + "\n"); err != nil { + return err + } + } + + return nil +} + // copyFilePreserveMode copies a file from src to dst, preserving the source file's permissions. func copyFilePreserveMode(src, dst string) error { // Get source file info for permissions diff --git a/internal/rig/overlay_test.go b/internal/rig/overlay_test.go index b21768f5..faeccf66 100644 --- a/internal/rig/overlay_test.go +++ b/internal/rig/overlay_test.go @@ -249,3 +249,193 @@ func TestCopyFilePreserveMode_NonexistentSource(t *testing.T) { t.Error("copyFilePreserveMode() with nonexistent source should return error") } } + +func TestEnsureGitignorePatterns_CreatesNewFile(t *testing.T) { + tmpDir := t.TempDir() + + err := EnsureGitignorePatterns(tmpDir) + if err != nil { + t.Fatalf("EnsureGitignorePatterns() error = %v", err) + } + + content, err := os.ReadFile(filepath.Join(tmpDir, ".gitignore")) + if err != nil { + t.Fatalf("Failed to read .gitignore: %v", err) + } + + // Check all required patterns are present + patterns := []string{".runtime/", ".claude/", ".beads/", ".logs/"} + for _, pattern := range patterns { + if !containsLine(string(content), pattern) { + t.Errorf(".gitignore missing pattern %q", pattern) + } + } +} + +func TestEnsureGitignorePatterns_AppendsToExisting(t *testing.T) { + tmpDir := t.TempDir() + + // Create existing .gitignore with some content + existing := "node_modules/\n*.log\n" + if err := os.WriteFile(filepath.Join(tmpDir, ".gitignore"), []byte(existing), 0644); err != nil { + t.Fatalf("Failed to create .gitignore: %v", err) + } + + err := EnsureGitignorePatterns(tmpDir) + if err != nil { + t.Fatalf("EnsureGitignorePatterns() error = %v", err) + } + + content, err := os.ReadFile(filepath.Join(tmpDir, ".gitignore")) + if err != nil { + t.Fatalf("Failed to read .gitignore: %v", err) + } + + // Should preserve existing content + if !containsLine(string(content), "node_modules/") { + t.Error("Existing pattern node_modules/ was removed") + } + + // Should add header + if !containsLine(string(content), "# Gas Town (added by gt)") { + t.Error("Missing Gas Town header comment") + } + + // Should add required patterns + patterns := []string{".runtime/", ".claude/", ".beads/", ".logs/"} + for _, pattern := range patterns { + if !containsLine(string(content), pattern) { + t.Errorf(".gitignore missing pattern %q", pattern) + } + } +} + +func TestEnsureGitignorePatterns_SkipsExistingPatterns(t *testing.T) { + tmpDir := t.TempDir() + + // Create existing .gitignore with some Gas Town patterns already + existing := ".runtime/\n.claude/\n" + if err := os.WriteFile(filepath.Join(tmpDir, ".gitignore"), []byte(existing), 0644); err != nil { + t.Fatalf("Failed to create .gitignore: %v", err) + } + + err := EnsureGitignorePatterns(tmpDir) + if err != nil { + t.Fatalf("EnsureGitignorePatterns() error = %v", err) + } + + content, err := os.ReadFile(filepath.Join(tmpDir, ".gitignore")) + if err != nil { + t.Fatalf("Failed to read .gitignore: %v", err) + } + + // Should not duplicate existing patterns + count := countOccurrences(string(content), ".runtime/") + if count != 1 { + t.Errorf(".runtime/ appears %d times, expected 1", count) + } + + // Should add missing patterns + if !containsLine(string(content), ".beads/") { + t.Error(".gitignore missing pattern .beads/") + } + if !containsLine(string(content), ".logs/") { + t.Error(".gitignore missing pattern .logs/") + } +} + +func TestEnsureGitignorePatterns_RecognizesVariants(t *testing.T) { + tmpDir := t.TempDir() + + // Create existing .gitignore with variant patterns (without trailing slash) + existing := ".runtime\n/.claude\n" + if err := os.WriteFile(filepath.Join(tmpDir, ".gitignore"), []byte(existing), 0644); err != nil { + t.Fatalf("Failed to create .gitignore: %v", err) + } + + err := EnsureGitignorePatterns(tmpDir) + if err != nil { + t.Fatalf("EnsureGitignorePatterns() error = %v", err) + } + + content, err := os.ReadFile(filepath.Join(tmpDir, ".gitignore")) + if err != nil { + t.Fatalf("Failed to read .gitignore: %v", err) + } + + // Should recognize variants and not add duplicates + // .runtime (no slash) should count as .runtime/ + if containsLine(string(content), ".runtime/") && containsLine(string(content), ".runtime") { + // Only one should be present unless they're the same line + runtimeCount := countOccurrences(string(content), ".runtime") + if runtimeCount > 1 { + t.Errorf(".runtime appears %d times (variant detection failed)", runtimeCount) + } + } +} + +func TestEnsureGitignorePatterns_AllPatternsPresent(t *testing.T) { + tmpDir := t.TempDir() + + // Create existing .gitignore with all required patterns + existing := ".runtime/\n.claude/\n.beads/\n.logs/\n" + if err := os.WriteFile(filepath.Join(tmpDir, ".gitignore"), []byte(existing), 0644); err != nil { + t.Fatalf("Failed to create .gitignore: %v", err) + } + + err := EnsureGitignorePatterns(tmpDir) + if err != nil { + t.Fatalf("EnsureGitignorePatterns() error = %v", err) + } + + content, err := os.ReadFile(filepath.Join(tmpDir, ".gitignore")) + if err != nil { + t.Fatalf("Failed to read .gitignore: %v", err) + } + + // File should be unchanged (no header added) + if containsLine(string(content), "# Gas Town") { + t.Error("Should not add header when all patterns already present") + } + + // Content should match original + if string(content) != existing { + t.Errorf("File was modified when it shouldn't be.\nGot: %q\nWant: %q", string(content), existing) + } +} + +// Helper functions + +func containsLine(content, pattern string) bool { + for _, line := range splitLines(content) { + if line == pattern { + return true + } + } + return false +} + +func countOccurrences(content, pattern string) int { + count := 0 + for _, line := range splitLines(content) { + if line == pattern { + count++ + } + } + return count +} + +func splitLines(content string) []string { + var lines []string + start := 0 + for i, c := range content { + if c == '\n' { + lines = append(lines, content[start:i]) + start = i + 1 + } + } + if start < len(content) { + lines = append(lines, content[start:]) + } + return lines +} diff --git a/internal/session/startup.go b/internal/session/startup.go index 0dfd4724..be89c438 100644 --- a/internal/session/startup.go +++ b/internal/session/startup.go @@ -65,9 +65,9 @@ func FormatStartupNudge(cfg StartupNudgeConfig) string { beacon := fmt.Sprintf("[GAS TOWN] %s <- %s • %s • %s", cfg.Recipient, cfg.Sender, timestamp, topic) - // For handoff and cold-start, add explicit instructions so the agent knows what to do - // even if hooks haven't loaded CLAUDE.md yet - if cfg.Topic == "handoff" || cfg.Topic == "cold-start" { + // For handoff, cold-start, and attach, add explicit instructions so the agent knows + // what to do even if hooks haven't loaded CLAUDE.md yet + if cfg.Topic == "handoff" || cfg.Topic == "cold-start" || cfg.Topic == "attach" { beacon += "\n\nCheck your hook and mail, then act on the hook if present:\n" + "1. `gt hook` - shows hooked work (if any)\n" + "2. `gt mail inbox` - check for messages\n" + diff --git a/internal/session/town.go b/internal/session/town.go index d5dd1f15..f361867a 100644 --- a/internal/session/town.go +++ b/internal/session/town.go @@ -68,8 +68,9 @@ func stopTownSessionInternal(t *tmux.Tmux, ts TownSession, force bool) (bool, er _ = events.LogFeed(events.TypeSessionDeath, ts.SessionID, events.SessionDeathPayload(ts.SessionID, ts.Name, reason, "gt down")) - // Kill the session - if err := t.KillSession(ts.SessionID); err != nil { + // Kill the session. + // Use KillSessionWithProcesses to ensure all descendant processes are killed. + if err := t.KillSessionWithProcesses(ts.SessionID); err != nil { return false, fmt.Errorf("killing %s session: %w", ts.Name, err) } diff --git a/internal/state/state_test.go b/internal/state/state_test.go index 165acd7c..c3323e95 100644 --- a/internal/state/state_test.go +++ b/internal/state/state_test.go @@ -20,7 +20,7 @@ func TestStateDir(t *testing.T) { os.Setenv("XDG_STATE_HOME", "/custom/state") defer os.Unsetenv("XDG_STATE_HOME") - if got := StateDir(); got != "/custom/state/gastown" { + if got := filepath.ToSlash(StateDir()); got != "/custom/state/gastown" { t.Errorf("StateDir() with XDG = %q, want /custom/state/gastown", got) } } @@ -36,7 +36,7 @@ func TestConfigDir(t *testing.T) { os.Setenv("XDG_CONFIG_HOME", "/custom/config") defer os.Unsetenv("XDG_CONFIG_HOME") - if got := ConfigDir(); got != "/custom/config/gastown" { + if got := filepath.ToSlash(ConfigDir()); got != "/custom/config/gastown" { t.Errorf("ConfigDir() with XDG = %q, want /custom/config/gastown", got) } } @@ -52,7 +52,7 @@ func TestCacheDir(t *testing.T) { os.Setenv("XDG_CACHE_HOME", "/custom/cache") defer os.Unsetenv("XDG_CACHE_HOME") - if got := CacheDir(); got != "/custom/cache/gastown" { + if got := filepath.ToSlash(CacheDir()); got != "/custom/cache/gastown" { t.Errorf("CacheDir() with XDG = %q, want /custom/cache/gastown", got) } } diff --git a/internal/templates/roles/crew.md.tmpl b/internal/templates/roles/crew.md.tmpl index 42de7b38..92d1a426 100644 --- a/internal/templates/roles/crew.md.tmpl +++ b/internal/templates/roles/crew.md.tmpl @@ -195,6 +195,22 @@ gt worktree remove beads **Note**: Dogs are Deacon infrastructure helpers (like Boot). They're NOT for user-facing work. If you need to fix something in another rig, use worktrees, not dogs. +## Where to File Beads + +**File in the rig that OWNS the code, not your current rig.** + +You're working in **{{ .RigName }}** (prefix `{{ .IssuePrefix }}-`). Issues about THIS rig's code +go here by default. But if you discover bugs/issues in OTHER projects: + +| Issue is about... | File in | Command | +|-------------------|---------|---------| +| This rig's code ({{ .RigName }}) | Here (default) | `bd create "..."` | +| `bd` CLI (beads tool) | **beads** | `bd create --rig beads "..."` | +| `gt` CLI (gas town tool) | **gastown** | `bd create --rig gastown "..."` | +| Cross-rig coordination | **HQ** | `bd create --prefix hq- "..."` | + +**The test**: "Which repo would the fix be committed to?" + ## Gotchas when Filing Beads **Temporal language inverts dependencies.** "Phase 1 blocks Phase 2" is backwards. diff --git a/internal/templates/roles/deacon.md.tmpl b/internal/templates/roles/deacon.md.tmpl index 44bdbaf8..438f741c 100644 --- a/internal/templates/roles/deacon.md.tmpl +++ b/internal/templates/roles/deacon.md.tmpl @@ -111,6 +111,22 @@ beads clean while maintaining an audit trail. Routes defined in `~/gt/.beads/routes.jsonl`. Debug with: `BD_DEBUG_ROUTING=1 bd show <id>` +## Where to File Beads (CRITICAL) + +**File in the rig that OWNS the code, not HQ by default.** + +| Issue is about... | File in | Command | +|-------------------|---------|---------| +| `bd` CLI (beads tool bugs, features) | **beads** | `bd create --rig beads "..."` | +| `gt` CLI (gas town tool bugs, features) | **gastown** | `bd create --rig gastown "..."` | +| Deacon/witness/refinery/patrol code | **gastown** | `bd create --rig gastown "..."` | +| Cross-rig coordination, agent assignments | **HQ** | `bd create "..."` (default) | + +**The test**: "Which repo would the fix be committed to?" +- Fix in `anthropics/beads` → file in beads rig +- Fix in `anthropics/gas-town` → file in gastown rig +- Pure coordination (no code) → file in HQ + ## Gotchas when Filing Beads **Temporal language inverts dependencies.** "Phase 1 blocks Phase 2" is backwards. @@ -136,7 +152,7 @@ gt mail inbox gt mol attach-from-mail <mail-id> # Step 3: Still nothing? Create patrol wisp (two-step: create then hook) -bd mol wisp create mol-deacon-patrol +bd mol wisp mol-deacon-patrol bd update <wisp-id> --status=hooked --assignee=deacon ``` @@ -222,8 +238,8 @@ Then squash and decide: bd mol squash <wisp-id> --summary="Patrol complete: checked inbox, scanned health, no issues" # Option A: Loop (low context) -bd mol wisp create mol-deacon-patrol -bd update <wisp-id> --status=pinned --assignee=deacon +bd mol wisp mol-deacon-patrol +bd update <wisp-id> --status=hooked --assignee=deacon # Continue to first step... # Option B: Exit (high context) diff --git a/internal/templates/roles/mayor.md.tmpl b/internal/templates/roles/mayor.md.tmpl index 87e128c7..a6e80e3e 100644 --- a/internal/templates/roles/mayor.md.tmpl +++ b/internal/templates/roles/mayor.md.tmpl @@ -162,6 +162,27 @@ bd show hq-abc # Routes to town beads **Conflicts:** If two rigs share a prefix, use `bd rename-prefix <new>` to fix. +## Where to File Beads (CRITICAL) + +**File in the rig that OWNS the code, not where you're standing.** + +| Issue is about... | File in | Command | +|-------------------|---------|---------| +| `bd` CLI (beads tool bugs, features, docs) | **beads** | `bd create --rig beads "..."` | +| `gt` CLI (gas town tool bugs, features) | **gastown** | `bd create --rig gastown "..."` | +| Polecat/witness/refinery/convoy code | **gastown** | `bd create --rig gastown "..."` | +| Wyvern game features | **wyvern** | `bd create --rig wyvern "..."` | +| Cross-rig coordination, convoys, mail threads | **HQ** | `bd create "..."` (default) | +| Agent role descriptions, assignments | **HQ** | `bd create "..."` (default) | + +**The test**: "Which repo would the fix be committed to?" +- Fix in `anthropics/beads` → file in beads rig +- Fix in `anthropics/gas-town` → file in gastown rig +- Pure coordination (no code) → file in HQ + +**Common mistake**: Filing `bd` CLI issues in HQ because you're "coordinating." +Wrong. The issue is about beads code, so it goes in the beads rig. + ## Gotchas when Filing Beads **Temporal language inverts dependencies.** "Phase 1 blocks Phase 2" is backwards. diff --git a/internal/templates/roles/polecat.md.tmpl b/internal/templates/roles/polecat.md.tmpl index 641fccb1..c3eaf05f 100644 --- a/internal/templates/roles/polecat.md.tmpl +++ b/internal/templates/roles/polecat.md.tmpl @@ -180,6 +180,22 @@ bd show hq-abc # Routes to town beads - Each rig's prefix (e.g., `gt-`) maps to its beads location - Debug with: `BD_DEBUG_ROUTING=1 bd show <id>` +## Where to File Beads + +**File in the rig that OWNS the code, not your current rig.** + +You're working in **{{ .RigName }}** (prefix `{{ .IssuePrefix }}-`). Issues about THIS rig's code +go here by default. But if you discover bugs/issues in OTHER projects: + +| Issue is about... | File in | Command | +|-------------------|---------|---------| +| This rig's code ({{ .RigName }}) | Here (default) | `bd create "..."` | +| `bd` CLI (beads tool) | **beads** | `bd create --rig beads "..."` | +| `gt` CLI (gas town tool) | **gastown** | `bd create --rig gastown "..."` | +| Cross-rig coordination | **HQ** | `bd create --prefix hq- "..."` | + +**The test**: "Which repo would the fix be committed to?" + ## Gotchas when Filing Beads **Temporal language inverts dependencies.** "Phase 1 blocks Phase 2" is backwards. diff --git a/internal/templates/roles/refinery.md.tmpl b/internal/templates/roles/refinery.md.tmpl index 4e19dde3..caa9815d 100644 --- a/internal/templates/roles/refinery.md.tmpl +++ b/internal/templates/roles/refinery.md.tmpl @@ -80,6 +80,14 @@ queue for your rig, merging polecat work to main one at a time with sequential r **The Scotty Test**: Before proceeding past any failure, ask yourself: "Would Scotty walk past a warp core leak because it existed before his shift?" +## Working Directory + +**IMPORTANT**: Always work from `{{ .WorkDir }}` directory. + +Identity detection (for mail, mol status, etc.) depends on your current working +directory. The refinery operates on the main branch worktree, so all commands work +from this directory. + ## 🔧 ZFC Compliance: Agent-Driven Decisions **You are the decision maker.** All merge/conflict decisions are made by you, the agent, @@ -153,8 +161,9 @@ Then check your hook: gt hook # Shows hooked work (if any) bd list --status=in_progress --assignee=refinery -# Step 2: If no patrol, spawn one -bd mol spawn mol-refinery-patrol --wisp --assignee=refinery +# Step 2: If no patrol, spawn one (two-step: create then hook) +bd mol wisp mol-refinery-patrol +bd update <wisp-id> --status=hooked --assignee={{ .RigName }}/refinery ``` **No thinking. No "should I?" questions. Hook → Execute.** @@ -290,7 +299,8 @@ Then squash and decide: bd mol squash <wisp-id> --summary="Patrol: merged 3 branches, no issues" # Option A: Loop (low context, more branches) -bd mol spawn mol-refinery-patrol --wisp --assignee=refinery +bd mol wisp mol-refinery-patrol +bd update <wisp-id> --status=hooked --assignee={{ .RigName }}/refinery # Continue to inbox-check... # Option B: Exit (high context OR queue empty) @@ -335,7 +345,8 @@ gt mail send {{ .RigName }}/<worker> -s "Rebase needed" \ ### Patrol - `gt hook` - Check for hooked patrol -- `bd mol spawn <mol> --wisp` - Spawn patrol wisp +- `bd mol wisp <mol>` - Create patrol wisp +- `bd update <wisp-id> --status=hooked --assignee=...` - Hook the wisp - `bd mol squash <id> --summary="..."` - Squash completed patrol ### Git Operations diff --git a/internal/templates/roles/witness.md.tmpl b/internal/templates/roles/witness.md.tmpl index 8351c156..bbb370b6 100644 --- a/internal/templates/roles/witness.md.tmpl +++ b/internal/templates/roles/witness.md.tmpl @@ -111,6 +111,14 @@ Your job: - Close issues for work you didn't do - Skip mol steps or hallucinate completion +## Working Directory + +**IMPORTANT**: Always work from `{{ .WorkDir }}` directory. + +Identity detection (for mail, mol status, etc.) depends on your current working +directory. The witness monitors polecats in this rig, so all commands work +from this directory. + ## Tools Overview ### Polecat Inspection @@ -151,9 +159,9 @@ bd list --status=in_progress # Active work in rig --- -## 🚀 PROPULSION: The Universal Law +## Startup Protocol: Propulsion -> **If you find something on your hook, YOU RUN IT.** +> **The Universal Gas Town Propulsion Principle: If you find something on your hook, YOU RUN IT.** There is no decision logic. No "should I?" questions. Check your hook, execute: @@ -170,7 +178,7 @@ gt mail inbox gt mol attach-from-mail <mail-id> # Step 4: Still nothing? Create patrol wisp -bd mol wisp create mol-witness-patrol +bd mol wisp mol-witness-patrol bd update <wisp-id> --status=hooked --assignee={{ .RigName }}/witness ``` diff --git a/internal/tmux/tmux.go b/internal/tmux/tmux.go index 435e6126..4b4e6b1b 100644 --- a/internal/tmux/tmux.go +++ b/internal/tmux/tmux.go @@ -63,7 +63,8 @@ func (t *Tmux) wrapError(err error, stderr string, args []string) error { // Detect specific error types if strings.Contains(stderr, "no server running") || - strings.Contains(stderr, "error connecting to") { + strings.Contains(stderr, "error connecting to") || + strings.Contains(stderr, "no current target") { return ErrNoServer } if strings.Contains(stderr, "duplicate session") { @@ -190,7 +191,75 @@ func (t *Tmux) KillSessionWithProcesses(name string) error { } // Kill the tmux session - return t.KillSession(name) + // Ignore "session not found" - killing the pane process may have already + // caused tmux to destroy the session automatically + err = t.KillSession(name) + if err == ErrSessionNotFound { + return nil + } + return err +} + +// KillSessionWithProcessesExcluding is like KillSessionWithProcesses but excludes +// specified PIDs from being killed. This is essential for self-kill scenarios where +// the calling process (e.g., gt done) is running inside the session it's terminating. +// Without exclusion, the caller would be killed before completing the cleanup. +func (t *Tmux) KillSessionWithProcessesExcluding(name string, excludePIDs []string) error { + // Build exclusion set for O(1) lookup + exclude := make(map[string]bool) + for _, pid := range excludePIDs { + exclude[pid] = true + } + + // Get the pane PID + pid, err := t.GetPanePID(name) + if err != nil { + // Session might not exist or be in bad state, try direct kill + return t.KillSession(name) + } + + if pid != "" { + // Get all descendant PIDs recursively (returns deepest-first order) + descendants := getAllDescendants(pid) + + // Filter out excluded PIDs + var filtered []string + for _, dpid := range descendants { + if !exclude[dpid] { + filtered = append(filtered, dpid) + } + } + + // Send SIGTERM to all non-excluded descendants (deepest first to avoid orphaning) + for _, dpid := range filtered { + _ = exec.Command("kill", "-TERM", dpid).Run() + } + + // Wait for graceful shutdown + time.Sleep(100 * time.Millisecond) + + // Send SIGKILL to any remaining non-excluded descendants + for _, dpid := range filtered { + _ = exec.Command("kill", "-KILL", dpid).Run() + } + + // Kill the pane process itself (may have called setsid() and detached) + // Only if not excluded + if !exclude[pid] { + _ = exec.Command("kill", "-TERM", pid).Run() + time.Sleep(100 * time.Millisecond) + _ = exec.Command("kill", "-KILL", pid).Run() + } + } + + // Kill the tmux session - this will terminate the excluded process too + // Ignore "session not found" - if we killed all non-excluded processes, + // tmux may have already destroyed the session automatically + err = t.KillSession(name) + if err == ErrSessionNotFound { + return nil + } + return err } // getAllDescendants recursively finds all descendant PIDs of a process. @@ -215,6 +284,48 @@ func getAllDescendants(pid string) []string { return result } +// KillPaneProcesses explicitly kills all processes associated with a tmux pane. +// This prevents orphan processes that survive pane respawn due to SIGHUP being ignored. +// +// Process: +// 1. Get the pane's main process PID +// 2. Find all descendant processes recursively (not just direct children) +// 3. Send SIGTERM to all descendants (deepest first) +// 4. Wait 100ms for graceful shutdown +// 5. Send SIGKILL to any remaining descendants +// +// This ensures Claude processes and all their children are properly terminated +// before respawning the pane. +func (t *Tmux) KillPaneProcesses(pane string) error { + // Get the pane PID + pid, err := t.GetPanePID(pane) + if err != nil { + return fmt.Errorf("getting pane PID: %w", err) + } + + if pid == "" { + return fmt.Errorf("pane PID is empty") + } + + // Get all descendant PIDs recursively (returns deepest-first order) + descendants := getAllDescendants(pid) + + // Send SIGTERM to all descendants (deepest first to avoid orphaning) + for _, dpid := range descendants { + _ = exec.Command("kill", "-TERM", dpid).Run() + } + + // Wait for graceful shutdown + time.Sleep(100 * time.Millisecond) + + // Send SIGKILL to any remaining descendants + for _, dpid := range descendants { + _ = exec.Command("kill", "-KILL", dpid).Run() + } + + return nil +} + // KillServer terminates the entire tmux server and all sessions. func (t *Tmux) KillServer() error { _, err := t.run("kill-server") diff --git a/internal/tmux/tmux_test.go b/internal/tmux/tmux_test.go index a1f71f00..6b0262fe 100644 --- a/internal/tmux/tmux_test.go +++ b/internal/tmux/tmux_test.go @@ -198,6 +198,7 @@ func TestWrapError(t *testing.T) { }{ {"no server running on /tmp/tmux-...", ErrNoServer}, {"error connecting to /tmp/tmux-...", ErrNoServer}, + {"no current target", ErrNoServer}, {"duplicate session: test", ErrSessionExists}, {"session not found: test", ErrSessionNotFound}, {"can't find session: test", ErrSessionNotFound}, @@ -553,6 +554,157 @@ func TestGetAllDescendants(t *testing.T) { } } +func TestKillSessionWithProcesses(t *testing.T) { + if !hasTmux() { + t.Skip("tmux not installed") + } + + tm := NewTmux() + sessionName := "gt-test-killproc-" + t.Name() + + // Clean up any existing session + _ = tm.KillSession(sessionName) + + // Create session with a long-running process + cmd := `sleep 300` + if err := tm.NewSessionWithCommand(sessionName, "", cmd); err != nil { + t.Fatalf("NewSessionWithCommand: %v", err) + } + + // Verify session exists + has, err := tm.HasSession(sessionName) + if err != nil { + t.Fatalf("HasSession: %v", err) + } + if !has { + t.Fatal("expected session to exist after creation") + } + + // Kill with processes + if err := tm.KillSessionWithProcesses(sessionName); err != nil { + t.Fatalf("KillSessionWithProcesses: %v", err) + } + + // Verify session is gone + has, err = tm.HasSession(sessionName) + if err != nil { + t.Fatalf("HasSession after kill: %v", err) + } + if has { + t.Error("expected session to not exist after KillSessionWithProcesses") + _ = tm.KillSession(sessionName) // cleanup + } +} + +func TestKillSessionWithProcesses_NonexistentSession(t *testing.T) { + if !hasTmux() { + t.Skip("tmux not installed") + } + + tm := NewTmux() + + // Killing nonexistent session should not panic, just return error or nil + err := tm.KillSessionWithProcesses("nonexistent-session-xyz-12345") + // We don't care about the error value, just that it doesn't panic + _ = err +} + +func TestKillSessionWithProcessesExcluding(t *testing.T) { + if !hasTmux() { + t.Skip("tmux not installed") + } + + tm := NewTmux() + sessionName := "gt-test-killexcl-" + t.Name() + + // Clean up any existing session + _ = tm.KillSession(sessionName) + + // Create session with a long-running process + cmd := `sleep 300` + if err := tm.NewSessionWithCommand(sessionName, "", cmd); err != nil { + t.Fatalf("NewSessionWithCommand: %v", err) + } + + // Verify session exists + has, err := tm.HasSession(sessionName) + if err != nil { + t.Fatalf("HasSession: %v", err) + } + if !has { + t.Fatal("expected session to exist after creation") + } + + // Kill with empty excludePIDs (should behave like KillSessionWithProcesses) + if err := tm.KillSessionWithProcessesExcluding(sessionName, nil); err != nil { + t.Fatalf("KillSessionWithProcessesExcluding: %v", err) + } + + // Verify session is gone + has, err = tm.HasSession(sessionName) + if err != nil { + t.Fatalf("HasSession after kill: %v", err) + } + if has { + t.Error("expected session to not exist after KillSessionWithProcessesExcluding") + _ = tm.KillSession(sessionName) // cleanup + } +} + +func TestKillSessionWithProcessesExcluding_WithExcludePID(t *testing.T) { + if !hasTmux() { + t.Skip("tmux not installed") + } + + tm := NewTmux() + sessionName := "gt-test-killexcl2-" + t.Name() + + // Clean up any existing session + _ = tm.KillSession(sessionName) + + // Create session with a long-running process + cmd := `sleep 300` + if err := tm.NewSessionWithCommand(sessionName, "", cmd); err != nil { + t.Fatalf("NewSessionWithCommand: %v", err) + } + defer func() { _ = tm.KillSession(sessionName) }() + + // Get the pane PID + panePID, err := tm.GetPanePID(sessionName) + if err != nil { + t.Fatalf("GetPanePID: %v", err) + } + if panePID == "" { + t.Skip("could not get pane PID") + } + + // Kill with the pane PID excluded - the function should still kill the session + // but should not kill the excluded PID before the session is destroyed + err = tm.KillSessionWithProcessesExcluding(sessionName, []string{panePID}) + if err != nil { + t.Fatalf("KillSessionWithProcessesExcluding: %v", err) + } + + // Session should be gone (the final KillSession always happens) + has, _ := tm.HasSession(sessionName) + if has { + t.Error("expected session to not exist after KillSessionWithProcessesExcluding") + } +} + +func TestKillSessionWithProcessesExcluding_NonexistentSession(t *testing.T) { + if !hasTmux() { + t.Skip("tmux not installed") + } + + tm := NewTmux() + + // Killing nonexistent session should not panic + err := tm.KillSessionWithProcessesExcluding("nonexistent-session-xyz-12345", []string{"12345"}) + // We don't care about the error value, just that it doesn't panic + _ = err +} + func TestSessionSet(t *testing.T) { if !hasTmux() { t.Skip("tmux not installed") diff --git a/internal/util/atomic_test.go b/internal/util/atomic_test.go index a6f82929..cfa1369c 100644 --- a/internal/util/atomic_test.go +++ b/internal/util/atomic_test.go @@ -4,6 +4,7 @@ import ( "encoding/json" "os" "path/filepath" + "runtime" "sync" "testing" ) @@ -189,6 +190,10 @@ func TestAtomicWriteJSONUnmarshallable(t *testing.T) { } func TestAtomicWriteFileReadOnlyDir(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("chmod-based read-only directories are not reliable on Windows") + } + tmpDir := t.TempDir() roDir := filepath.Join(tmpDir, "readonly") @@ -240,7 +245,11 @@ func TestAtomicWriteFileConcurrent(t *testing.T) { if err != nil { t.Fatalf("ReadFile error: %v", err) } - if len(content) != 1 { + if runtime.GOOS == "windows" { + if len(content) == 0 { + t.Error("Expected non-empty content on Windows") + } + } else if len(content) != 1 { t.Errorf("Expected single character, got %q", content) } diff --git a/internal/util/exec_test.go b/internal/util/exec_test.go index d89594c6..53a1b06e 100644 --- a/internal/util/exec_test.go +++ b/internal/util/exec_test.go @@ -2,13 +2,20 @@ package util import ( "os" + "runtime" "strings" "testing" ) func TestExecWithOutput(t *testing.T) { // Test successful command - output, err := ExecWithOutput(".", "echo", "hello") + var output string + var err error + if runtime.GOOS == "windows" { + output, err = ExecWithOutput(".", "cmd", "/c", "echo hello") + } else { + output, err = ExecWithOutput(".", "echo", "hello") + } if err != nil { t.Fatalf("ExecWithOutput failed: %v", err) } @@ -17,7 +24,11 @@ func TestExecWithOutput(t *testing.T) { } // Test command that fails - _, err = ExecWithOutput(".", "false") + if runtime.GOOS == "windows" { + _, err = ExecWithOutput(".", "cmd", "/c", "exit /b 1") + } else { + _, err = ExecWithOutput(".", "false") + } if err == nil { t.Error("expected error for failing command") } @@ -25,13 +36,22 @@ func TestExecWithOutput(t *testing.T) { func TestExecRun(t *testing.T) { // Test successful command - err := ExecRun(".", "true") + var err error + if runtime.GOOS == "windows" { + err = ExecRun(".", "cmd", "/c", "exit /b 0") + } else { + err = ExecRun(".", "true") + } if err != nil { t.Fatalf("ExecRun failed: %v", err) } // Test command that fails - err = ExecRun(".", "false") + if runtime.GOOS == "windows" { + err = ExecRun(".", "cmd", "/c", "exit /b 1") + } else { + err = ExecRun(".", "false") + } if err == nil { t.Error("expected error for failing command") } @@ -46,7 +66,12 @@ func TestExecWithOutput_WorkDir(t *testing.T) { defer os.RemoveAll(tmpDir) // Test that workDir is respected - output, err := ExecWithOutput(tmpDir, "pwd") + var output string + if runtime.GOOS == "windows" { + output, err = ExecWithOutput(tmpDir, "cmd", "/c", "cd") + } else { + output, err = ExecWithOutput(tmpDir, "pwd") + } if err != nil { t.Fatalf("ExecWithOutput failed: %v", err) } @@ -57,7 +82,12 @@ func TestExecWithOutput_WorkDir(t *testing.T) { func TestExecWithOutput_StderrInError(t *testing.T) { // Test that stderr is captured in error - _, err := ExecWithOutput(".", "sh", "-c", "echo 'error message' >&2; exit 1") + var err error + if runtime.GOOS == "windows" { + _, err = ExecWithOutput(".", "cmd", "/c", "echo error message 1>&2 & exit /b 1") + } else { + _, err = ExecWithOutput(".", "sh", "-c", "echo 'error message' >&2; exit 1") + } if err == nil { t.Error("expected error") } diff --git a/internal/util/orphan.go b/internal/util/orphan.go index c8a2733f..c0343542 100644 --- a/internal/util/orphan.go +++ b/internal/util/orphan.go @@ -59,18 +59,53 @@ func getGasTownSessionPIDs() map[int]bool { // addChildPIDs adds all descendant PIDs of a process to the set. // This catches Claude processes spawned by the shell in a tmux pane. func addChildPIDs(parentPID int, pids map[int]bool) { - // Use pgrep to find children (more reliable than parsing ps output) - out, err := exec.Command("pgrep", "-P", strconv.Itoa(parentPID)).Output() - if err != nil { - return + childPIDs := getChildPIDs(parentPID) + for _, pid := range childPIDs { + pids[pid] = true + // Recurse to get grandchildren + addChildPIDs(pid, pids) } - for _, pidStr := range strings.Split(strings.TrimSpace(string(out)), "\n") { - if pid, err := strconv.Atoi(pidStr); err == nil && pid > 0 { - pids[pid] = true - // Recurse to get grandchildren - addChildPIDs(pid, pids) +} + +// getChildPIDs returns direct child PIDs of a process. +// Tries pgrep first, falls back to parsing ps output. +func getChildPIDs(parentPID int) []int { + var childPIDs []int + + // Try pgrep first (faster, more reliable when available) + out, err := exec.Command("pgrep", "-P", strconv.Itoa(parentPID)).Output() + if err == nil { + for _, pidStr := range strings.Split(strings.TrimSpace(string(out)), "\n") { + if pid, err := strconv.Atoi(pidStr); err == nil && pid > 0 { + childPIDs = append(childPIDs, pid) + } + } + return childPIDs + } + + // Fallback: parse ps output to find children + // ps -eo pid,ppid gives us all processes with their parent PIDs + out, err = exec.Command("ps", "-eo", "pid,ppid").Output() + if err != nil { + return childPIDs + } + + for _, line := range strings.Split(string(out), "\n") { + fields := strings.Fields(line) + if len(fields) < 2 { + continue + } + pid, err1 := strconv.Atoi(fields[0]) + ppid, err2 := strconv.Atoi(fields[1]) + if err1 != nil || err2 != nil { + continue + } + if ppid == parentPID && pid > 0 { + childPIDs = append(childPIDs, pid) } } + + return childPIDs } // sigkillGracePeriod is how long (in seconds) we wait after sending SIGTERM @@ -78,33 +113,40 @@ func addChildPIDs(parentPID int, pids map[int]bool) { // around after this period, we use SIGKILL on the next cleanup cycle. const sigkillGracePeriod = 60 -// orphanStateFile returns the path to the state file that tracks PIDs we've -// sent signals to. Uses $XDG_RUNTIME_DIR if available, otherwise /tmp. -func orphanStateFile() string { - dir := os.Getenv("XDG_RUNTIME_DIR") - if dir == "" { - dir = "/tmp" - } - return filepath.Join(dir, "gastown-orphan-state") -} - // signalState tracks what signal was last sent to a PID and when. type signalState struct { Signal string // "SIGTERM" or "SIGKILL" Timestamp time.Time // When the signal was sent } -// loadOrphanState reads the state file and returns the current signal state +// stateFileDir returns the directory for state files. +func stateFileDir() string { + dir := os.Getenv("XDG_RUNTIME_DIR") + if dir == "" { + dir = "/tmp" + } + return dir +} + +// loadSignalState reads a state file and returns the current signal state // for each tracked PID. Automatically cleans up entries for dead processes. -func loadOrphanState() map[int]signalState { +// Uses file locking to prevent concurrent access. +func loadSignalState(filename string) map[int]signalState { state := make(map[int]signalState) - f, err := os.Open(orphanStateFile()) + path := filepath.Join(stateFileDir(), filename) + f, err := os.Open(path) if err != nil { return state // File doesn't exist yet, that's fine } defer f.Close() + // Acquire shared lock for reading + if err := syscall.Flock(int(f.Fd()), syscall.LOCK_SH); err != nil { + return state + } + defer syscall.Flock(int(f.Fd()), syscall.LOCK_UN) //nolint:errcheck + scanner := bufio.NewScanner(f) for scanner.Scan() { parts := strings.Fields(scanner.Text()) @@ -130,20 +172,41 @@ func loadOrphanState() map[int]signalState { return state } -// saveOrphanState writes the current signal state to the state file. -func saveOrphanState(state map[int]signalState) error { - f, err := os.Create(orphanStateFile()) +// saveSignalState writes the current signal state to a state file. +// Uses file locking to prevent concurrent access. +func saveSignalState(filename string, state map[int]signalState) error { + path := filepath.Join(stateFileDir(), filename) + f, err := os.Create(path) if err != nil { return err } defer f.Close() + // Acquire exclusive lock for writing + if err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX); err != nil { + return fmt.Errorf("acquiring lock: %w", err) + } + defer syscall.Flock(int(f.Fd()), syscall.LOCK_UN) //nolint:errcheck + for pid, s := range state { fmt.Fprintf(f, "%d %s %d\n", pid, s.Signal, s.Timestamp.Unix()) } return nil } +// orphanStateFile is the filename for orphan process tracking state. +const orphanStateFile = "gastown-orphan-state" + +// loadOrphanState reads the orphan state file. +func loadOrphanState() map[int]signalState { + return loadSignalState(orphanStateFile) +} + +// saveOrphanState writes the orphan state file. +func saveOrphanState(state map[int]signalState) error { + return saveSignalState(orphanStateFile, state) +} + // processExists checks if a process is still running. func processExists(pid int) bool { err := syscall.Kill(pid, 0) @@ -294,6 +357,206 @@ type CleanupResult struct { Error error } +// ZombieProcess represents a claude process not in any active tmux session. +type ZombieProcess struct { + PID int + Cmd string + Age int // Age in seconds + TTY string // TTY column from ps (may be "?" or a session like "s024") +} + +// FindZombieClaudeProcesses finds Claude processes NOT in any active tmux session. +// This catches "zombie" processes that have a TTY but whose tmux session is dead. +// +// Unlike FindOrphanedClaudeProcesses (which uses TTY="?" detection), this function +// uses tmux pane verification: a process is a zombie if it's NOT the pane PID of +// any active tmux session AND not a child of any pane PID. +// +// This is the definitive zombie check because it verifies against tmux reality. +func FindZombieClaudeProcesses() ([]ZombieProcess, error) { + // Get ALL valid PIDs (panes + their children) from active tmux sessions + validPIDs := getGasTownSessionPIDs() + + // SAFETY CHECK: If no valid PIDs found, tmux might be down or no sessions exist. + // Returning empty is safer than marking all Claude processes as zombies. + if len(validPIDs) == 0 { + // Check if tmux is even running + if err := exec.Command("tmux", "list-sessions").Run(); err != nil { + return nil, fmt.Errorf("tmux not available: %w", err) + } + // tmux is running but no gt-*/hq-* sessions - that's a valid state, + // but we can't safely determine zombies without reference sessions. + // Return empty rather than marking everything as zombie. + return nil, nil + } + + // Use ps to get PID, TTY, command, and elapsed time for all claude processes + out, err := exec.Command("ps", "-eo", "pid,tty,comm,etime").Output() + if err != nil { + return nil, fmt.Errorf("listing processes: %w", err) + } + + var zombies []ZombieProcess + for _, line := range strings.Split(string(out), "\n") { + fields := strings.Fields(line) + if len(fields) < 4 { + continue + } + + pid, err := strconv.Atoi(fields[0]) + if err != nil { + continue // Header line or invalid PID + } + + tty := fields[1] + cmd := fields[2] + etimeStr := fields[3] + + // Match claude or codex command names + cmdLower := strings.ToLower(cmd) + if cmdLower != "claude" && cmdLower != "claude-code" && cmdLower != "codex" { + continue + } + + // Skip processes that belong to valid Gas Town tmux sessions + if validPIDs[pid] { + continue + } + + // Skip processes younger than minOrphanAge seconds + age, err := parseEtime(etimeStr) + if err != nil { + continue + } + if age < minOrphanAge { + continue + } + + // This process is NOT in any active tmux session - it's a zombie + zombies = append(zombies, ZombieProcess{ + PID: pid, + Cmd: cmd, + Age: age, + TTY: tty, + }) + } + + return zombies, nil +} + +// zombieStateFile is the filename for zombie process tracking state. +const zombieStateFile = "gastown-zombie-state" + +// loadZombieState reads the zombie state file. +func loadZombieState() map[int]signalState { + return loadSignalState(zombieStateFile) +} + +// saveZombieState writes the zombie state file. +func saveZombieState(state map[int]signalState) error { + return saveSignalState(zombieStateFile, state) +} + +// ZombieCleanupResult describes what happened to a zombie process. +type ZombieCleanupResult struct { + Process ZombieProcess + Signal string // "SIGTERM", "SIGKILL", or "UNKILLABLE" + Error error +} + +// CleanupZombieClaudeProcesses finds and kills zombie Claude processes. +// Uses tmux verification to ensure we never kill processes in active sessions. +// +// Uses the same graceful escalation as orphan cleanup: +// 1. First encounter → SIGTERM, record in state file +// 2. Next cycle, still alive after grace period → SIGKILL +// 3. Next cycle, still alive after SIGKILL → log as unkillable +func CleanupZombieClaudeProcesses() ([]ZombieCleanupResult, error) { + zombies, err := FindZombieClaudeProcesses() + if err != nil { + return nil, err + } + + state := loadZombieState() + now := time.Now() + + var results []ZombieCleanupResult + var lastErr error + + activeZombies := make(map[int]bool) + for _, z := range zombies { + activeZombies[z.PID] = true + } + + // Check state for PIDs that died or need escalation + for pid, s := range state { + if !activeZombies[pid] { + delete(state, pid) + continue + } + + elapsed := now.Sub(s.Timestamp).Seconds() + + if s.Signal == "SIGKILL" { + results = append(results, ZombieCleanupResult{ + Process: ZombieProcess{PID: pid, Cmd: "claude"}, + Signal: "UNKILLABLE", + Error: fmt.Errorf("process %d survived SIGKILL", pid), + }) + delete(state, pid) + delete(activeZombies, pid) + continue + } + + if s.Signal == "SIGTERM" && elapsed >= float64(sigkillGracePeriod) { + if err := syscall.Kill(pid, syscall.SIGKILL); err != nil { + if err != syscall.ESRCH { + lastErr = fmt.Errorf("SIGKILL PID %d: %w", pid, err) + } + delete(state, pid) + delete(activeZombies, pid) + continue + } + state[pid] = signalState{Signal: "SIGKILL", Timestamp: now} + results = append(results, ZombieCleanupResult{ + Process: ZombieProcess{PID: pid, Cmd: "claude"}, + Signal: "SIGKILL", + }) + delete(activeZombies, pid) + } + } + + // Send SIGTERM to new zombies + for _, zombie := range zombies { + if !activeZombies[zombie.PID] { + continue + } + if _, exists := state[zombie.PID]; exists { + continue + } + + if err := syscall.Kill(zombie.PID, syscall.SIGTERM); err != nil { + if err != syscall.ESRCH { + lastErr = fmt.Errorf("SIGTERM PID %d: %w", zombie.PID, err) + } + continue + } + state[zombie.PID] = signalState{Signal: "SIGTERM", Timestamp: now} + results = append(results, ZombieCleanupResult{ + Process: zombie, + Signal: "SIGTERM", + }) + } + + if err := saveZombieState(state); err != nil { + if lastErr == nil { + lastErr = fmt.Errorf("saving zombie state: %w", err) + } + } + + return results, lastErr +} + // CleanupOrphanedClaudeProcesses finds and kills orphaned claude/codex processes. // // Uses a state machine to escalate signals: diff --git a/internal/util/orphan_windows.go b/internal/util/orphan_windows.go new file mode 100644 index 00000000..1b712209 --- /dev/null +++ b/internal/util/orphan_windows.go @@ -0,0 +1,56 @@ +//go:build windows + +package util + +// OrphanedProcess represents a claude process running without a controlling terminal. +// On Windows, orphan cleanup is not supported, so this is a stub definition. +type OrphanedProcess struct { + PID int + Cmd string + Age int // Age in seconds +} + +// CleanupResult describes what happened to an orphaned process. +// On Windows, cleanup is a no-op. +type CleanupResult struct { + Process OrphanedProcess + Signal string // "SIGTERM", "SIGKILL", or "UNKILLABLE" + Error error +} + +// ZombieProcess represents a claude process not in any active tmux session. +// On Windows, zombie cleanup is not supported, so this is a stub definition. +type ZombieProcess struct { + PID int + Cmd string + Age int // Age in seconds + TTY string // TTY column from ps +} + +// ZombieCleanupResult describes what happened to a zombie process. +// On Windows, cleanup is a no-op. +type ZombieCleanupResult struct { + Process ZombieProcess + Signal string // "SIGTERM", "SIGKILL", or "UNKILLABLE" + Error error +} + +// FindOrphanedClaudeProcesses is a Windows stub. +func FindOrphanedClaudeProcesses() ([]OrphanedProcess, error) { + return nil, nil +} + +// CleanupOrphanedClaudeProcesses is a Windows stub. +func CleanupOrphanedClaudeProcesses() ([]CleanupResult, error) { + return nil, nil +} + +// FindZombieClaudeProcesses is a Windows stub. +func FindZombieClaudeProcesses() ([]ZombieProcess, error) { + return nil, nil +} + +// CleanupZombieClaudeProcesses is a Windows stub. +func CleanupZombieClaudeProcesses() ([]ZombieCleanupResult, error) { + return nil, nil +} diff --git a/internal/wisp/io_test.go b/internal/wisp/io_test.go index a0299d8b..3fb81e00 100644 --- a/internal/wisp/io_test.go +++ b/internal/wisp/io_test.go @@ -4,6 +4,7 @@ import ( "encoding/json" "os" "path/filepath" + "runtime" "testing" ) @@ -41,6 +42,10 @@ func TestEnsureDir(t *testing.T) { } func TestEnsureDir_Permissions(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("directory permission bits are not reliable on Windows") + } + tmpDir := t.TempDir() dir, err := EnsureDir(tmpDir) @@ -90,7 +95,7 @@ func TestWispPath(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := WispPath(tt.root, tt.filename) - if got != tt.want { + if filepath.ToSlash(got) != tt.want { t.Errorf("WispPath() = %q, want %q", got, tt.want) } }) diff --git a/internal/witness/handlers.go b/internal/witness/handlers.go index a901a6a1..30a2d365 100644 --- a/internal/witness/handlers.go +++ b/internal/witness/handlers.go @@ -9,6 +9,7 @@ import ( "time" "github.com/steveyegge/gastown/internal/beads" + "github.com/steveyegge/gastown/internal/convoy" "github.com/steveyegge/gastown/internal/git" "github.com/steveyegge/gastown/internal/mail" "github.com/steveyegge/gastown/internal/rig" @@ -264,6 +265,14 @@ func HandleMerged(workDir, rigName string, msg *mail.Message) *HandlerResult { result.Handled = true result.WispCreated = wispID result.Action = fmt.Sprintf("auto-nuked %s (cleanup_status=clean, wisp=%s)", payload.PolecatName, wispID) + + // Redundant convoy observer: check if completed issue is tracked by a convoy + if payload.IssueID != "" { + townRoot, _ := workspace.Find(workDir) + if townRoot != "" { + convoy.CheckConvoysForIssue(townRoot, payload.IssueID, "witness", nil) + } + } } case "has_uncommitted": @@ -299,6 +308,14 @@ func HandleMerged(workDir, rigName string, msg *mail.Message) *HandlerResult { result.Handled = true result.WispCreated = wispID result.Action = fmt.Sprintf("auto-nuked %s (commit on main, cleanup_status=%s, wisp=%s)", payload.PolecatName, cleanupStatus, wispID) + + // Redundant convoy observer: check if completed issue is tracked by a convoy + if payload.IssueID != "" { + townRoot, _ := workspace.Find(workDir) + if townRoot != "" { + convoy.CheckConvoysForIssue(townRoot, payload.IssueID, "witness", nil) + } + } } } diff --git a/internal/witness/manager.go b/internal/witness/manager.go index 78e262a8..48d6ec8d 100644 --- a/internal/witness/manager.go +++ b/internal/witness/manager.go @@ -8,7 +8,6 @@ import ( "strings" "time" - "github.com/steveyegge/gastown/internal/agent" "github.com/steveyegge/gastown/internal/beads" "github.com/steveyegge/gastown/internal/claude" "github.com/steveyegge/gastown/internal/config" @@ -26,39 +25,23 @@ var ( ) // Manager handles witness lifecycle and monitoring operations. +// ZFC-compliant: tmux session is the source of truth for running state. type Manager struct { - rig *rig.Rig - workDir string - stateManager *agent.StateManager[Witness] + rig *rig.Rig } // NewManager creates a new witness manager for a rig. func NewManager(r *rig.Rig) *Manager { return &Manager{ - rig: r, - workDir: r.Path, - stateManager: agent.NewStateManager[Witness](r.Path, "witness.json", func() *Witness { - return &Witness{ - RigName: r.Name, - State: StateStopped, - } - }), + rig: r, } } -// stateFile returns the path to the witness state file. -func (m *Manager) stateFile() string { - return m.stateManager.StateFile() -} - -// loadState loads witness state from disk. -func (m *Manager) loadState() (*Witness, error) { - return m.stateManager.Load() -} - -// saveState persists witness state to disk using atomic write. -func (m *Manager) saveState(w *Witness) error { - return m.stateManager.Save(w) +// IsRunning checks if the witness session is active. +// ZFC: tmux session existence is the source of truth. +func (m *Manager) IsRunning() (bool, error) { + t := tmux.NewTmux() + return t.HasSession(m.SessionName()) } // SessionName returns the tmux session name for this witness. @@ -66,19 +49,21 @@ func (m *Manager) SessionName() string { return fmt.Sprintf("gt-%s-witness", m.rig.Name) } -// Status returns the current witness status. -// ZFC-compliant: trusts agent-reported state, no PID inference. -// The daemon reads agent bead state for liveness checks. -func (m *Manager) Status() (*Witness, error) { - w, err := m.loadState() +// Status returns information about the witness session. +// ZFC-compliant: tmux session is the source of truth. +func (m *Manager) Status() (*tmux.SessionInfo, error) { + t := tmux.NewTmux() + sessionID := m.SessionName() + + running, err := t.HasSession(sessionID) if err != nil { - return nil, err + return nil, fmt.Errorf("checking session: %w", err) + } + if !running { + return nil, ErrNotRunning } - // Update monitored polecats list (still useful for display) - w.MonitoredPolecats = m.rig.Polecats - - return w, nil + return t.GetSessionInfo(sessionID) } // witnessDir returns the working directory for the witness. @@ -98,36 +83,21 @@ func (m *Manager) witnessDir() string { } // Start starts the witness. -// If foreground is true, only updates state (no tmux session - deprecated). +// If foreground is true, returns an error (foreground mode deprecated). // Otherwise, spawns a Claude agent in a tmux session. // agentOverride optionally specifies a different agent alias to use. // envOverrides are KEY=VALUE pairs that override all other env var sources. +// ZFC-compliant: no state file, tmux session is source of truth. func (m *Manager) Start(foreground bool, agentOverride string, envOverrides []string) error { - w, err := m.loadState() - if err != nil { - return err - } - t := tmux.NewTmux() sessionID := m.SessionName() if foreground { // Foreground mode is deprecated - patrol logic moved to mol-witness-patrol - // Just check tmux session (no PID inference per ZFC) - if running, _ := t.HasSession(sessionID); running && t.IsClaudeRunning(sessionID) { - return ErrAlreadyRunning - } - - now := time.Now() - w.State = StateRunning - w.StartedAt = &now - w.PID = 0 // No longer track PID (ZFC) - w.MonitoredPolecats = m.rig.Polecats - - return m.saveState(w) + return fmt.Errorf("foreground mode is deprecated; use background mode (remove --foreground flag)") } - // Background mode: check if session already exists + // Check if session already exists running, _ := t.HasSession(sessionID) if running { // Session exists - check if Claude is actually running (healthy vs zombie) @@ -200,17 +170,6 @@ func (m *Manager) Start(foreground bool, agentOverride string, envOverrides []st theme := tmux.AssignTheme(m.rig.Name) _ = t.ConfigureGasTownSession(sessionID, theme, m.rig.Name, "witness", "witness") - // Update state to running - now := time.Now() - w.State = StateRunning - w.StartedAt = &now - w.PID = 0 // Claude agent doesn't have a PID we track - w.MonitoredPolecats = m.rig.Polecats - if err := m.saveState(w); err != nil { - _ = t.KillSession(sessionID) // best-effort cleanup on state save failure - return fmt.Errorf("saving state: %w", err) - } - // Wait for Claude to start - fatal if Claude fails to launch if err := t.WaitForCommand(sessionID, constants.SupportedShells, constants.ClaudeStartTimeout); err != nil { // Kill the zombie session before returning error @@ -277,7 +236,10 @@ func buildWitnessStartCommand(rigPath, rigName, townRoot, agentOverride string, if roleConfig != nil && roleConfig.StartCommand != "" { return beads.ExpandRolePattern(roleConfig.StartCommand, townRoot, rigName, "", "witness"), nil } - command, err := config.BuildAgentStartupCommandWithAgentOverride("witness", rigName, townRoot, rigPath, "", agentOverride) + // Add initial prompt for autonomous patrol startup. + // The prompt triggers GUPP: witness starts patrol immediately without waiting for input. + initialPrompt := "I am Witness for " + rigName + ". Start patrol: check gt hook, if empty create mol-witness-patrol wisp and execute it." + command, err := config.BuildAgentStartupCommandWithAgentOverride("witness", rigName, townRoot, rigPath, initialPrompt, agentOverride) if err != nil { return "", fmt.Errorf("building startup command: %w", err) } @@ -285,31 +247,17 @@ func buildWitnessStartCommand(rigPath, rigName, townRoot, agentOverride string, } // Stop stops the witness. +// ZFC-compliant: tmux session is the source of truth. func (m *Manager) Stop() error { - w, err := m.loadState() - if err != nil { - return err - } - - // Check if tmux session exists t := tmux.NewTmux() sessionID := m.SessionName() - sessionRunning, _ := t.HasSession(sessionID) - // If neither state nor session indicates running, it's not running - if w.State != StateRunning && !sessionRunning { + // Check if tmux session exists + running, _ := t.HasSession(sessionID) + if !running { return ErrNotRunning } - // Kill tmux session if it exists (best-effort: may already be dead) - if sessionRunning { - _ = t.KillSession(sessionID) - } - - // Note: No PID-based stop per ZFC - tmux session kill is sufficient - - w.State = StateStopped - w.PID = 0 - - return m.saveState(w) + // Kill the tmux session + return t.KillSession(sessionID) } diff --git a/internal/witness/types.go b/internal/witness/types.go index 681989e7..ebd95e77 100644 --- a/internal/witness/types.go +++ b/internal/witness/types.go @@ -1,46 +1,9 @@ // Package witness provides the polecat monitoring agent. +// +// ZFC-compliant: Running state is derived from tmux sessions, not stored in files. +// Configuration is sourced from role beads (hq-witness-role). package witness -import ( - "time" - - "github.com/steveyegge/gastown/internal/agent" -) - -// State is an alias for agent.State for backwards compatibility. -type State = agent.State - -// State constants - re-exported from agent package for backwards compatibility. -const ( - StateStopped = agent.StateStopped - StateRunning = agent.StateRunning - StatePaused = agent.StatePaused -) - -// Witness represents a rig's polecat monitoring agent. -type Witness struct { - // RigName is the rig this witness monitors. - RigName string `json:"rig_name"` - - // State is the current running state. - State State `json:"state"` - - // PID is the process ID if running in background. - PID int `json:"pid,omitempty"` - - // StartedAt is when the witness was started. - StartedAt *time.Time `json:"started_at,omitempty"` - - // MonitoredPolecats tracks polecats being monitored. - MonitoredPolecats []string `json:"monitored_polecats,omitempty"` - - // Config contains auto-spawn configuration. - Config WitnessConfig `json:"config"` - - // SpawnedIssues tracks which issues have been spawned (to avoid duplicates). - SpawnedIssues []string `json:"spawned_issues,omitempty"` -} - // WitnessConfig contains configuration for the witness. type WitnessConfig struct { // MaxWorkers is the maximum number of concurrent polecats (default: 4). @@ -58,5 +21,3 @@ type WitnessConfig struct { // IssuePrefix limits spawning to issues with this prefix (optional). IssuePrefix string `json:"issue_prefix,omitempty"` } - - diff --git a/internal/witness/types_test.go b/internal/witness/types_test.go index 12fbdd97..487ddb5b 100644 --- a/internal/witness/types_test.go +++ b/internal/witness/types_test.go @@ -3,93 +3,8 @@ package witness import ( "encoding/json" "testing" - "time" - - "github.com/steveyegge/gastown/internal/agent" ) -func TestStateTypeAlias(t *testing.T) { - // Verify State is an alias for agent.State - var s State = agent.StateRunning - if s != agent.StateRunning { - t.Errorf("State type alias not working correctly") - } -} - -func TestStateConstants(t *testing.T) { - tests := []struct { - name string - state State - parent agent.State - }{ - {"StateStopped", StateStopped, agent.StateStopped}, - {"StateRunning", StateRunning, agent.StateRunning}, - {"StatePaused", StatePaused, agent.StatePaused}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if tt.state != tt.parent { - t.Errorf("State constant %s = %v, want %v", tt.name, tt.state, tt.parent) - } - }) - } -} - -func TestWitness_ZeroValues(t *testing.T) { - var w Witness - - if w.RigName != "" { - t.Errorf("zero value Witness.RigName should be empty, got %q", w.RigName) - } - if w.State != "" { - t.Errorf("zero value Witness.State should be empty, got %q", w.State) - } - if w.PID != 0 { - t.Errorf("zero value Witness.PID should be 0, got %d", w.PID) - } - if w.StartedAt != nil { - t.Error("zero value Witness.StartedAt should be nil") - } -} - -func TestWitness_JSONMarshaling(t *testing.T) { - now := time.Now().Round(time.Second) - w := Witness{ - RigName: "gastown", - State: StateRunning, - PID: 12345, - StartedAt: &now, - MonitoredPolecats: []string{"keeper", "valkyrie"}, - Config: WitnessConfig{ - MaxWorkers: 4, - SpawnDelayMs: 5000, - AutoSpawn: true, - }, - SpawnedIssues: []string{"hq-abc123"}, - } - - data, err := json.Marshal(w) - if err != nil { - t.Fatalf("json.Marshal() error = %v", err) - } - - var unmarshaled Witness - if err := json.Unmarshal(data, &unmarshaled); err != nil { - t.Fatalf("json.Unmarshal() error = %v", err) - } - - if unmarshaled.RigName != w.RigName { - t.Errorf("After round-trip: RigName = %q, want %q", unmarshaled.RigName, w.RigName) - } - if unmarshaled.State != w.State { - t.Errorf("After round-trip: State = %v, want %v", unmarshaled.State, w.State) - } - if unmarshaled.PID != w.PID { - t.Errorf("After round-trip: PID = %d, want %d", unmarshaled.PID, w.PID) - } -} - func TestWitnessConfig_ZeroValues(t *testing.T) { var cfg WitnessConfig @@ -174,57 +89,3 @@ func TestWitnessConfig_OmitEmpty(t *testing.T) { } } } - -func TestWitness_OmitEmpty(t *testing.T) { - w := Witness{ - RigName: "gastown", - State: StateRunning, - // PID, StartedAt, MonitoredPolecats, SpawnedIssues left empty/nil - } - - data, err := json.Marshal(w) - if err != nil { - t.Fatalf("json.Marshal() error = %v", err) - } - - var raw map[string]interface{} - if err := json.Unmarshal(data, &raw); err != nil { - t.Fatalf("json.Unmarshal() to map error = %v", err) - } - - // Empty optional fields should be omitted - if _, exists := raw["pid"]; exists { - t.Error("Field 'pid' should be omitted when zero") - } - if _, exists := raw["started_at"]; exists { - t.Error("Field 'started_at' should be omitted when nil") - } - if _, exists := raw["monitored_polecats"]; exists { - t.Error("Field 'monitored_polecats' should be omitted when nil/empty") - } - if _, exists := raw["spawned_issues"]; exists { - t.Error("Field 'spawned_issues' should be omitted when nil/empty") - } -} - -func TestWitness_WithMonitoredPolecats(t *testing.T) { - w := Witness{ - RigName: "gastown", - State: StateRunning, - MonitoredPolecats: []string{"keeper", "valkyrie", "nux"}, - } - - data, err := json.Marshal(w) - if err != nil { - t.Fatalf("json.Marshal() error = %v", err) - } - - var unmarshaled Witness - if err := json.Unmarshal(data, &unmarshaled); err != nil { - t.Fatalf("json.Unmarshal() error = %v", err) - } - - if len(unmarshaled.MonitoredPolecats) != 3 { - t.Errorf("After round-trip: MonitoredPolecats length = %d, want 3", len(unmarshaled.MonitoredPolecats)) - } -} diff --git a/internal/workspace/find_test.go b/internal/workspace/find_test.go index 504e0dfd..67df7128 100644 --- a/internal/workspace/find_test.go +++ b/internal/workspace/find_test.go @@ -213,7 +213,7 @@ func TestFindPreservesSymlinkPath(t *testing.T) { t.Fatalf("Rel: %v", err) } - if relPath != "rigs/project/polecats/worker" { + if filepath.ToSlash(relPath) != "rigs/project/polecats/worker" { t.Errorf("Rel = %q, want 'rigs/project/polecats/worker'", relPath) } } @@ -246,7 +246,7 @@ func TestFindSkipsNestedWorkspaceInWorktree(t *testing.T) { } rel, _ := filepath.Rel(found, polecatDir) - if rel != "myrig/polecats/worker" { + if filepath.ToSlash(rel) != "myrig/polecats/worker" { t.Errorf("Rel = %q, want 'myrig/polecats/worker'", rel) } }