fix(tmux): use NewSessionWithCommand to avoid send-keys race condition

Agent sessions would fail on startup because send-keys arrived before the
shell was ready, causing 'bad pattern' and 'command not found' errors.

Fix: Create sessions with the command directly using tmux new-session's
command argument. This runs the agent as the pane's initial process,
avoiding shell readiness timing issues entirely.

Updated all agent managers: mayor, deacon, witness, refinery, polecat, crew.

Also fixes pre-existing build error in polecat/manager.go (polecatPath →
clonePath/newClonePath).

Closes #280

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
jack
2026-01-08 23:35:31 -08:00
committed by Steve Yegge
parent a91e6cd643
commit afff85cdff
8 changed files with 99 additions and 138 deletions

View File

@@ -482,8 +482,34 @@ func (m *Manager) Start(name string, opts StartOptions) error {
return fmt.Errorf("ensuring Claude settings: %w", err)
}
// Create tmux session
if err := t.NewSession(sessionID, worker.ClonePath); err != nil {
// Build the startup beacon for predecessor discovery via /resume
// Pass it as Claude's initial prompt - processed when Claude is ready
address := fmt.Sprintf("%s/crew/%s", m.rig.Name, name)
topic := opts.Topic
if topic == "" {
topic = "start"
}
beacon := session.FormatStartupNudge(session.StartupNudgeConfig{
Recipient: address,
Sender: "human",
Topic: topic,
})
// Build startup command first
// SessionStart hook handles context loading (gt prime --hook)
claudeCmd, err := config.BuildCrewStartupCommandWithAgentOverride(m.rig.Name, name, m.rig.Path, beacon, opts.AgentOverride)
if err != nil {
return fmt.Errorf("building startup command: %w", err)
}
// For interactive/refresh mode, remove --dangerously-skip-permissions
if opts.Interactive {
claudeCmd = strings.Replace(claudeCmd, " --dangerously-skip-permissions", "", 1)
}
// Create session with command directly to avoid send-keys race condition.
// See: https://github.com/anthropics/gastown/issues/280
if err := t.NewSessionWithCommand(sessionID, worker.ClonePath, claudeCmd); err != nil {
return fmt.Errorf("creating session: %w", err)
}
@@ -504,41 +530,6 @@ func (m *Manager) Start(name string, opts StartOptions) error {
// Set up C-b n/p keybindings for crew session cycling (non-fatal)
_ = t.SetCrewCycleBindings(sessionID)
// Wait for shell to be ready
if err := t.WaitForShellReady(sessionID, constants.ShellReadyTimeout); err != nil {
return fmt.Errorf("waiting for shell: %w", err)
}
// Build the startup beacon for predecessor discovery via /resume
// Pass it as Claude's initial prompt - processed when Claude is ready
address := fmt.Sprintf("%s/crew/%s", m.rig.Name, name)
topic := opts.Topic
if topic == "" {
topic = "start"
}
beacon := session.FormatStartupNudge(session.StartupNudgeConfig{
Recipient: address,
Sender: "human",
Topic: topic,
})
// Start claude with environment exports and beacon as initial prompt
// SessionStart hook handles context loading (gt prime --hook)
claudeCmd, err := config.BuildCrewStartupCommandWithAgentOverride(m.rig.Name, name, m.rig.Path, beacon, opts.AgentOverride)
if err != nil {
_ = t.KillSession(sessionID)
return fmt.Errorf("building startup command: %w", err)
}
// For interactive/refresh mode, remove --dangerously-skip-permissions
if opts.Interactive {
claudeCmd = strings.Replace(claudeCmd, " --dangerously-skip-permissions", "", 1)
}
if err := t.SendKeys(sessionID, claudeCmd); err != nil {
_ = t.KillSession(sessionID) // best-effort cleanup
return fmt.Errorf("starting claude: %w", err)
}
// Wait for Claude to start (non-fatal: session continues even if this times out)
_ = t.WaitForCommand(sessionID, constants.SupportedShells, constants.ClaudeStartTimeout)

View File

@@ -79,8 +79,16 @@ func (m *Manager) Start(agentOverride string) error {
return fmt.Errorf("ensuring Claude settings: %w", err)
}
// Create new tmux session
if err := t.NewSession(sessionID, deaconDir); err != nil {
// Build startup command first
// Restarts are handled by daemon via ensureDeaconRunning on each heartbeat
startupCmd, err := config.BuildAgentStartupCommandWithAgentOverride("deacon", "deacon", "", "", agentOverride)
if err != nil {
return fmt.Errorf("building startup command: %w", err)
}
// Create session with command directly to avoid send-keys race condition.
// See: https://github.com/anthropics/gastown/issues/280
if err := t.NewSessionWithCommand(sessionID, deaconDir, startupCmd); err != nil {
return fmt.Errorf("creating tmux session: %w", err)
}
@@ -92,24 +100,6 @@ func (m *Manager) Start(agentOverride string) error {
theme := tmux.DeaconTheme()
_ = t.ConfigureGasTownSession(sessionID, theme, "", "Deacon", "health-check")
// Launch Claude directly (no shell respawn loop)
// Restarts are handled by daemon via ensureDeaconRunning on each heartbeat
startupCmd, err := config.BuildAgentStartupCommandWithAgentOverride("deacon", "deacon", "", "", agentOverride)
if err != nil {
_ = t.KillSession(sessionID)
return fmt.Errorf("building startup command: %w", err)
}
// Wait for shell to be ready before sending keys (prevents "can't find pane" under load)
if err := t.WaitForShellReady(sessionID, 5*time.Second); err != nil {
_ = t.KillSession(sessionID)
return fmt.Errorf("waiting for shell: %w", err)
}
if err := t.SendKeysDelayed(sessionID, startupCmd, 200); err != nil {
_ = t.KillSession(sessionID) // best-effort cleanup
return fmt.Errorf("starting Claude agent: %w", err)
}
// Wait for Claude to start (non-fatal)
if err := t.WaitForCommand(sessionID, constants.SupportedShells, constants.ClaudeStartTimeout); err != nil {
// Non-fatal - try to continue anyway

View File

@@ -78,8 +78,18 @@ func (m *Manager) Start(agentOverride string) error {
return fmt.Errorf("ensuring Claude settings: %w", err)
}
// Create new tmux session
if err := t.NewSession(sessionID, mayorDir); err != nil {
// Build startup command first - the startup hook handles 'gt prime' automatically
// Export GT_ROLE and BD_ACTOR in the command since tmux SetEnvironment only affects new panes
startupCmd, err := config.BuildAgentStartupCommandWithAgentOverride("mayor", "mayor", "", "", agentOverride)
if err != nil {
return fmt.Errorf("building startup command: %w", err)
}
// Create session with command directly to avoid send-keys race condition.
// This runs the command as the pane's initial process, avoiding the shell
// readiness timing issues that cause "bad pattern" and command-not-found errors.
// See: https://github.com/anthropics/gastown/issues/280
if err := t.NewSessionWithCommand(sessionID, mayorDir, startupCmd); err != nil {
return fmt.Errorf("creating tmux session: %w", err)
}
@@ -91,23 +101,6 @@ func (m *Manager) Start(agentOverride string) error {
theme := tmux.MayorTheme()
_ = t.ConfigureGasTownSession(sessionID, theme, "", "Mayor", "coordinator")
// Launch Claude - the startup hook handles 'gt prime' automatically
// Export GT_ROLE and BD_ACTOR in the command since tmux SetEnvironment only affects new panes
startupCmd, err := config.BuildAgentStartupCommandWithAgentOverride("mayor", "mayor", "", "", agentOverride)
if err != nil {
_ = t.KillSession(sessionID) // best-effort cleanup
return fmt.Errorf("building startup command: %w", err)
}
// Wait for shell to be ready before sending keys (prevents "can't find pane" under load)
if err := t.WaitForShellReady(sessionID, 5*time.Second); err != nil {
_ = t.KillSession(sessionID)
return fmt.Errorf("waiting for shell: %w", err)
}
if err := t.SendKeysDelayed(sessionID, startupCmd, 200); err != nil {
_ = t.KillSession(sessionID) // best-effort cleanup
return fmt.Errorf("starting Claude agent: %w", err)
}
// Wait for Claude to start (non-fatal)
if err := t.WaitForCommand(sessionID, constants.SupportedShells, constants.ClaudeStartTimeout); err != nil {
// Non-fatal - try to continue anyway

View File

@@ -281,7 +281,7 @@ func (m *Manager) AddWithOptions(name string, opts AddOptions) (*Polecat, error)
// Copy overlay files from .runtime/overlay/ to polecat root.
// This allows services to have .env and other config files at their root.
if err := rig.CopyOverlay(m.rig.Path, polecatPath); err != nil {
if err := rig.CopyOverlay(m.rig.Path, clonePath); err != nil {
// Non-fatal - log warning but continue
fmt.Printf("Warning: could not copy overlay files: %v\n", err)
}
@@ -538,7 +538,7 @@ func (m *Manager) RepairWorktreeWithOptions(name string, force bool, opts AddOpt
}
// Copy overlay files from .runtime/overlay/ to polecat root.
if err := rig.CopyOverlay(m.rig.Path, polecatPath); err != nil {
if err := rig.CopyOverlay(m.rig.Path, newClonePath); err != nil {
fmt.Printf("Warning: could not copy overlay files: %v\n", err)
}
@@ -787,9 +787,9 @@ func (m *Manager) loadFromBeads(name string) (*Polecat, error) {
// setupSharedBeads creates a redirect file so the polecat uses the rig's shared .beads database.
// This eliminates the need for git sync between polecat clones - all polecats share one database.
func (m *Manager) setupSharedBeads(polecatPath string) error {
func (m *Manager) setupSharedBeads(clonePath string) error {
townRoot := filepath.Dir(m.rig.Path)
return beads.SetupRedirect(townRoot, polecatPath)
return beads.SetupRedirect(townRoot, clonePath)
}
// CleanupStaleBranches removes orphaned polecat branches that are no longer in use.

View File

@@ -168,8 +168,19 @@ func (m *SessionManager) Start(polecat string, opts SessionStartOptions) error {
return fmt.Errorf("ensuring runtime settings: %w", err)
}
// Create session
if err := m.tmux.NewSession(sessionID, workDir); err != nil {
// Build startup command first
command := opts.Command
if command == "" {
command = config.BuildPolecatStartupCommand(m.rig.Name, polecat, m.rig.Path, "")
}
// Prepend runtime config dir env if needed
if runtimeConfig.Session != nil && runtimeConfig.Session.ConfigDirEnv != "" && opts.RuntimeConfigDir != "" {
command = config.PrependEnv(command, map[string]string{runtimeConfig.Session.ConfigDirEnv: opts.RuntimeConfigDir})
}
// Create session with command directly to avoid send-keys race condition.
// See: https://github.com/anthropics/gastown/issues/280
if err := m.tmux.NewSessionWithCommand(sessionID, workDir, command); err != nil {
return fmt.Errorf("creating session: %w", err)
}
@@ -205,24 +216,6 @@ func (m *SessionManager) Start(polecat string, opts SessionStartOptions) error {
agentID := fmt.Sprintf("%s/%s", m.rig.Name, polecat)
debugSession("SetPaneDiedHook", m.tmux.SetPaneDiedHook(sessionID, agentID))
// Send initial command with env vars exported inline
command := opts.Command
if command == "" {
command = config.BuildPolecatStartupCommand(m.rig.Name, polecat, m.rig.Path, "")
}
// Prepend runtime config dir env if needed
if runtimeConfig.Session != nil && runtimeConfig.Session.ConfigDirEnv != "" && opts.RuntimeConfigDir != "" {
command = config.PrependEnv(command, map[string]string{runtimeConfig.Session.ConfigDirEnv: opts.RuntimeConfigDir})
}
// Wait for shell to be ready before sending keys (prevents "can't find pane" under load)
if err := m.tmux.WaitForShellReady(sessionID, 5*time.Second); err != nil {
_ = m.tmux.KillSession(sessionID)
return fmt.Errorf("waiting for shell: %w", err)
}
if err := m.tmux.SendKeys(sessionID, command); err != nil {
return fmt.Errorf("sending command: %w", err)
}
// Wait for Claude to start (non-fatal)
debugSession("WaitForCommand", m.tmux.WaitForCommand(sessionID, constants.SupportedShells, constants.ClaudeStartTimeout))

View File

@@ -174,12 +174,17 @@ func (m *Manager) Start(foreground bool) error {
return fmt.Errorf("ensuring runtime settings: %w", err)
}
if err := t.NewSession(sessionID, refineryRigDir); err != nil {
// Build startup command first
bdActor := fmt.Sprintf("%s/refinery", m.rig.Name)
command := config.BuildAgentStartupCommand("refinery", bdActor, m.rig.Path, "")
// Create session with command directly to avoid send-keys race condition.
// See: https://github.com/anthropics/gastown/issues/280
if err := t.NewSessionWithCommand(sessionID, refineryRigDir, command); err != nil {
return fmt.Errorf("creating tmux session: %w", err)
}
// Set environment variables (non-fatal: session works without these)
bdActor := fmt.Sprintf("%s/refinery", m.rig.Name)
_ = t.SetEnvironment(sessionID, "GT_RIG", m.rig.Name)
_ = t.SetEnvironment(sessionID, "GT_REFINERY", "1")
_ = t.SetEnvironment(sessionID, "GT_ROLE", "refinery")
@@ -206,22 +211,6 @@ func (m *Manager) Start(foreground bool) error {
return fmt.Errorf("saving state: %w", err)
}
// Start Claude agent with full permissions (like polecats)
// NOTE: No gt prime injection needed - SessionStart hook handles it automatically
// Restarts are handled by daemon via LIFECYCLE mail, not shell loops
// Export GT_ROLE and BD_ACTOR in the command since tmux SetEnvironment only affects new panes
command := config.BuildAgentStartupCommand("refinery", bdActor, m.rig.Path, "")
// Wait for shell to be ready before sending keys (prevents "can't find pane" under load)
if err := t.WaitForShellReady(sessionID, 5*time.Second); err != nil {
_ = t.KillSession(sessionID)
return fmt.Errorf("waiting for shell: %w", err)
}
if err := t.SendKeys(sessionID, command); err != nil {
// Clean up the session on failure (best-effort cleanup)
_ = t.KillSession(sessionID)
return fmt.Errorf("starting Claude agent: %w", err)
}
// Wait for Claude to start and show its prompt (non-fatal)
// WaitForRuntimeReady waits for the runtime to be ready
if err := t.WaitForRuntimeReady(sessionID, runtimeConfig, constants.ClaudeStartTimeout); err != nil {

View File

@@ -78,6 +78,22 @@ func (t *Tmux) NewSession(name, workDir string) error {
return err
}
// NewSessionWithCommand creates a new detached tmux session that immediately runs a command.
// Unlike NewSession + SendKeys, this avoids race conditions where the shell isn't ready
// or the command arrives before the shell prompt. The command runs directly as the
// initial process of the pane.
// See: https://github.com/anthropics/gastown/issues/280
func (t *Tmux) NewSessionWithCommand(name, workDir, command string) error {
args := []string{"new-session", "-d", "-s", name}
if workDir != "" {
args = append(args, "-c", workDir)
}
// Add the command as the last argument - tmux runs it as the pane's initial process
args = append(args, command)
_, err := t.run(args...)
return err
}
// EnsureSessionFresh ensures a session is available and healthy.
// If the session exists but is a zombie (Claude not running), it kills the session first.
// This prevents "session already exists" errors when trying to restart dead agents.

View File

@@ -151,13 +151,19 @@ func (m *Manager) Start(foreground bool) error {
return fmt.Errorf("ensuring Claude settings: %w", err)
}
// Create new tmux session
if err := t.NewSession(sessionID, witnessDir); err != nil {
// Build startup command first
// Pass m.rig.Path so rig agent settings are honored (not town-level defaults)
bdActor := fmt.Sprintf("%s/witness", m.rig.Name)
command := config.BuildAgentStartupCommand("witness", bdActor, m.rig.Path, "")
runtimeConfig := config.LoadRuntimeConfig(m.rig.Path)
// Create session with command directly to avoid send-keys race condition.
// See: https://github.com/anthropics/gastown/issues/280
if err := t.NewSessionWithCommand(sessionID, witnessDir, command); err != nil {
return fmt.Errorf("creating tmux session: %w", err)
}
// Set environment variables (non-fatal: session works without these)
bdActor := fmt.Sprintf("%s/witness", m.rig.Name)
_ = t.SetEnvironment(sessionID, "GT_ROLE", "witness")
_ = t.SetEnvironment(sessionID, "GT_RIG", m.rig.Name)
_ = t.SetEnvironment(sessionID, "BD_ACTOR", bdActor)
@@ -177,23 +183,6 @@ func (m *Manager) Start(foreground bool) error {
return fmt.Errorf("saving state: %w", err)
}
// Launch Claude directly (no shell respawn loop)
// Restarts are handled by daemon via LIFECYCLE mail or deacon health-scan
// NOTE: No gt prime injection needed - SessionStart hook handles it automatically
// Export GT_ROLE and BD_ACTOR in the command since tmux SetEnvironment only affects new panes
// Pass m.rig.Path so rig agent settings are honored (not town-level defaults)
command := config.BuildAgentStartupCommand("witness", bdActor, m.rig.Path, "")
runtimeConfig := config.LoadRuntimeConfig(m.rig.Path)
// Wait for shell to be ready before sending keys (prevents "can't find pane" under load)
if err := t.WaitForShellReady(sessionID, 5*time.Second); err != nil {
_ = t.KillSession(sessionID)
return fmt.Errorf("waiting for shell: %w", err)
}
if err := t.SendKeys(sessionID, command); err != nil {
_ = t.KillSession(sessionID) // best-effort cleanup
return fmt.Errorf("starting Claude agent: %w", err)
}
// Wait for runtime to start and show its prompt (non-fatal)
if err := t.WaitForRuntimeReady(sessionID, runtimeConfig, constants.ClaudeStartTimeout); err != nil {
// Non-fatal - try to continue anyway