Files
gastown/internal/cmd/molecule_step.go
Roland Tritsch 1b036aadf5 Fix deacon patrol process leak by killing pane processes before respawn (#745)
## Problem
The deacon patrol was leaking claude processes. Every patrol cycle (1-3 minutes),
a new claude process was spawned under the hq-deacon tmux session, but old processes
were never terminated. This resulted in 12+ accumulated claude processes consuming
resources.

## Root Cause
In molecule_step.go:331, handleStepContinue() used tmux respawn-pane -k to restart
the pane between patrol steps. The -k flag sends SIGHUP to the shell but does not
kill all descendant processes (claude and its node children).

## Solution
Added KillPaneProcesses() function in tmux.go that explicitly kills all descendant
processes before respawning the pane. This function:
- Gets all descendant PIDs recursively
- Sends SIGTERM to all (deepest first)
- Waits 100ms for graceful shutdown
- Sends SIGKILL to survivors

Updated handleStepContinue() to call KillPaneProcesses() before RespawnPane().

Co-authored-by: Roland Tritsch <roland@ailtir.com>
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-20 22:30:23 -08:00

409 lines
11 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package cmd
import (
"encoding/json"
"fmt"
"os"
"os/exec"
"strings"
"github.com/spf13/cobra"
"github.com/steveyegge/gastown/internal/beads"
"github.com/steveyegge/gastown/internal/style"
"github.com/steveyegge/gastown/internal/tmux"
"github.com/steveyegge/gastown/internal/workspace"
)
// moleculeStepDoneCmd is the "gt mol step done" command.
var moleculeStepDoneCmd = &cobra.Command{
Use: "done <step-id>",
Short: "Complete step and auto-continue to next",
Long: `Complete a molecule step and automatically continue to the next ready step.
This command handles the step-to-step transition for polecats:
1. Closes the completed step (bd close <step-id>)
2. Extracts the molecule ID from the step
3. Finds the next ready step (dependency-aware)
4. If next step exists:
- Updates the hook to point to the next step
- Respawns the pane for a fresh session
5. If molecule complete:
- Clears the hook
- Sends POLECAT_DONE to witness
- Exits the session
IMPORTANT: This is the canonical way to complete molecule steps. Do NOT manually
close steps with 'bd close' - it skips the auto-continuation logic.
Example:
gt mol step done gt-abc.1 # Complete step 1 of molecule gt-abc`,
Args: cobra.ExactArgs(1),
RunE: runMoleculeStepDone,
}
var (
moleculeStepDryRun bool
)
func init() {
moleculeStepDoneCmd.Flags().BoolVarP(&moleculeStepDryRun, "dry-run", "n", false, "Show what would be done without executing")
moleculeStepDoneCmd.Flags().BoolVar(&moleculeJSON, "json", false, "Output as JSON")
}
// StepDoneResult is the result of a step done operation.
type StepDoneResult struct {
StepID string `json:"step_id"`
MoleculeID string `json:"molecule_id"`
StepClosed bool `json:"step_closed"`
NextStepID string `json:"next_step_id,omitempty"`
NextStepTitle string `json:"next_step_title,omitempty"`
Complete bool `json:"complete"`
Action string `json:"action"` // "continue", "done", "no_more_ready"
}
func runMoleculeStepDone(cmd *cobra.Command, args []string) error {
stepID := args[0]
cwd, err := os.Getwd()
if err != nil {
return fmt.Errorf("getting current directory: %w", err)
}
// Find town root
townRoot, err := workspace.FindFromCwd()
if err != nil {
return fmt.Errorf("finding workspace: %w", err)
}
if townRoot == "" {
return fmt.Errorf("not in a Gas Town workspace")
}
// Find beads directory
workDir, err := findLocalBeadsDir()
if err != nil {
return fmt.Errorf("not in a beads workspace: %w", err)
}
b := beads.New(workDir)
// Step 1: Verify the step exists
step, err := b.Show(stepID)
if err != nil {
return fmt.Errorf("step not found: %w", err)
}
// Step 2: Extract molecule ID from step ID (gt-xxx.1 -> gt-xxx)
moleculeID := extractMoleculeIDFromStep(stepID)
if moleculeID == "" {
return fmt.Errorf("cannot extract molecule ID from step %s (expected format: gt-xxx.N)", stepID)
}
result := StepDoneResult{
StepID: stepID,
MoleculeID: moleculeID,
}
// Step 3: Close the step
if moleculeStepDryRun {
fmt.Printf("[dry-run] Would close step: %s\n", stepID)
result.StepClosed = true
} else {
if err := b.Close(stepID); err != nil {
return fmt.Errorf("closing step: %w", err)
}
result.StepClosed = true
fmt.Printf("%s Closed step %s: %s\n", style.Bold.Render("✓"), stepID, step.Title)
}
// Step 4: Find the next ready step
nextStep, allComplete, err := findNextReadyStep(b, moleculeID)
if err != nil {
return fmt.Errorf("finding next step: %w", err)
}
if allComplete {
result.Complete = true
result.Action = "done"
} else if nextStep != nil {
result.NextStepID = nextStep.ID
result.NextStepTitle = nextStep.Title
result.Action = "continue"
} else {
// There are more steps but none are ready (blocked on dependencies)
result.Action = "no_more_ready"
}
// JSON output
if moleculeJSON {
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
return enc.Encode(result)
}
// Step 5: Handle next action
switch result.Action {
case "continue":
return handleStepContinue(cwd, townRoot, workDir, nextStep, moleculeStepDryRun)
case "done":
return handleMoleculeComplete(cwd, townRoot, moleculeID, moleculeStepDryRun)
case "no_more_ready":
fmt.Printf("\n%s All remaining steps are blocked - waiting on dependencies\n",
style.Dim.Render(""))
fmt.Printf("Run 'gt mol progress %s' to see blocked steps\n", moleculeID)
return nil
}
return nil
}
// extractMoleculeIDFromStep extracts the molecule ID from a step ID.
// Step IDs have format: mol-id.N where N is the step number.
// Examples:
// gt-abc.1 -> gt-abc
// gt-xyz.3 -> gt-xyz
// bd-mol-abc.2 -> bd-mol-abc
func extractMoleculeIDFromStep(stepID string) string {
// Find the last dot
lastDot := strings.LastIndex(stepID, ".")
if lastDot == -1 {
return "" // No dot - not a step ID format
}
// Check if what's after the dot is a number (step suffix)
suffix := stepID[lastDot+1:]
if len(suffix) == 0 {
return "" // Trailing dot - no suffix
}
for _, c := range suffix {
if c < '0' || c > '9' {
return "" // Not a numeric suffix
}
}
return stepID[:lastDot]
}
// findNextReadyStep finds the next ready step in a molecule.
// Returns (nextStep, allComplete, error).
// If all steps are complete, returns (nil, true, nil).
// If no steps are ready but some are blocked/in_progress, returns (nil, false, nil).
func findNextReadyStep(b *beads.Beads, moleculeID string) (*beads.Issue, bool, error) {
// Get all children of the molecule
children, err := b.List(beads.ListOptions{
Parent: moleculeID,
Status: "all",
Priority: -1,
})
if err != nil {
return nil, false, fmt.Errorf("listing molecule steps: %w", err)
}
if len(children) == 0 {
return nil, true, nil // No steps = complete
}
// Build set of closed step IDs and collect open steps
// Note: "open" means not started. "in_progress" means someone's working on it.
// We only consider "open" steps as candidates for the next step.
closedIDs := make(map[string]bool)
var openSteps []*beads.Issue
hasNonClosedSteps := false
for _, child := range children {
switch child.Status {
case "closed":
closedIDs[child.ID] = true
case "open":
openSteps = append(openSteps, child)
hasNonClosedSteps = true
default:
// in_progress or other status - not closed, not available
hasNonClosedSteps = true
}
}
// Check if all complete
if !hasNonClosedSteps {
return nil, true, nil
}
// Find ready steps (open steps with all dependencies closed)
for _, step := range openSteps {
allDepsClosed := true
for _, depID := range step.DependsOn {
if !closedIDs[depID] {
allDepsClosed = false
break
}
}
if len(step.DependsOn) == 0 || allDepsClosed {
return step, false, nil
}
}
// No ready steps (all blocked or in_progress)
return nil, false, nil
}
// handleStepContinue handles continuing to the next step.
func handleStepContinue(cwd, townRoot, _ string, nextStep *beads.Issue, dryRun bool) error { // workDir unused but kept for signature consistency
fmt.Printf("\n%s Next step: %s\n", style.Bold.Render("→"), nextStep.ID)
fmt.Printf(" %s\n", nextStep.Title)
// Detect agent identity
roleInfo, err := GetRoleWithContext(cwd, townRoot)
if err != nil {
return fmt.Errorf("detecting role: %w", err)
}
roleCtx := RoleContext{
Role: roleInfo.Role,
Rig: roleInfo.Rig,
Polecat: roleInfo.Polecat,
TownRoot: townRoot,
WorkDir: cwd,
}
agentID := buildAgentIdentity(roleCtx)
if agentID == "" {
return fmt.Errorf("cannot determine agent identity (role: %s)", roleCtx.Role)
}
// Get git root for hook files
gitRoot, err := getGitRoot()
if err != nil {
return fmt.Errorf("finding git root: %w", err)
}
if dryRun {
fmt.Printf("\n[dry-run] Would pin next step: %s\n", nextStep.ID)
fmt.Printf("[dry-run] Would respawn pane\n")
return nil
}
// Pin the next step bead
pinCmd := exec.Command("bd", "update", nextStep.ID, "--status=pinned", "--assignee="+agentID)
pinCmd.Dir = gitRoot
pinCmd.Stderr = os.Stderr
if err := pinCmd.Run(); err != nil {
return fmt.Errorf("pinning next step: %w", err)
}
fmt.Printf("%s Next step pinned: %s\n", style.Bold.Render("📌"), nextStep.ID)
// Respawn the pane
if !tmux.IsInsideTmux() {
// Not in tmux - just print next action
fmt.Printf("\n%s Not in tmux - start new session with 'gt prime'\n",
style.Dim.Render(""))
return nil
}
pane := os.Getenv("TMUX_PANE")
if pane == "" {
return fmt.Errorf("TMUX_PANE not set")
}
// Get current session for restart command
currentSession, err := getCurrentTmuxSession()
if err != nil {
return fmt.Errorf("getting session name: %w", err)
}
restartCmd, err := buildRestartCommand(currentSession)
if err != nil {
return fmt.Errorf("building restart command: %w", err)
}
fmt.Printf("\n%s Respawning for next step...\n", style.Bold.Render("🔄"))
t := tmux.NewTmux()
// Kill all processes in the pane before respawning to prevent process leaks
if err := t.KillPaneProcesses(pane); err != nil {
// Non-fatal but log the warning
style.PrintWarning("could not kill pane processes: %v", err)
}
// Clear history before respawn
if err := t.ClearHistory(pane); err != nil {
// Non-fatal
style.PrintWarning("could not clear history: %v", err)
}
return t.RespawnPane(pane, restartCmd)
}
// handleMoleculeComplete handles when a molecule is complete.
func handleMoleculeComplete(cwd, townRoot, moleculeID string, dryRun bool) error {
fmt.Printf("\n%s Molecule complete!\n", style.Bold.Render("🎉"))
// Detect agent identity
roleInfo, err := GetRoleWithContext(cwd, townRoot)
if err != nil {
return fmt.Errorf("detecting role: %w", err)
}
roleCtx := RoleContext{
Role: roleInfo.Role,
Rig: roleInfo.Rig,
Polecat: roleInfo.Polecat,
TownRoot: townRoot,
WorkDir: cwd,
}
agentID := buildAgentIdentity(roleCtx)
// Get git root for hook files
gitRoot, err := getGitRoot()
if err != nil {
return fmt.Errorf("finding git root: %w", err)
}
if dryRun {
fmt.Printf("[dry-run] Would unpin work for %s\n", agentID)
fmt.Printf("[dry-run] Would send POLECAT_DONE to witness\n")
return nil
}
// Unpin the molecule bead (set status to open, will be closed by gt done or manually)
workDir, err := findLocalBeadsDir()
if err == nil {
b := beads.New(workDir)
pinnedBeads, err := b.List(beads.ListOptions{
Status: beads.StatusPinned,
Assignee: agentID,
Priority: -1,
})
if err == nil && len(pinnedBeads) > 0 {
// Unpin by setting status to open
unpinCmd := exec.Command("bd", "update", pinnedBeads[0].ID, "--status=open")
unpinCmd.Dir = gitRoot
unpinCmd.Stderr = os.Stderr
if err := unpinCmd.Run(); err != nil {
style.PrintWarning("could not unpin bead: %v", err)
} else {
fmt.Printf("%s Work unpinned\n", style.Bold.Render("✓"))
}
}
}
// For polecats, use gt done to signal completion
if roleCtx.Role == RolePolecat {
fmt.Printf("%s Signaling completion to witness...\n", style.Bold.Render("📤"))
doneCmd := exec.Command("gt", "done", "--exit", "DEFERRED")
doneCmd.Stdout = os.Stdout
doneCmd.Stderr = os.Stderr
return doneCmd.Run()
}
// For other roles, just print completion message
fmt.Printf("\nMolecule %s is complete. Ready for next assignment.\n", moleculeID)
return nil
}
// getGitRoot is defined in prime.go