fix(tmux): use NewSessionWithCommand to avoid send-keys race condition
Agent sessions would fail on startup because send-keys arrived before the shell was ready, causing 'bad pattern' and 'command not found' errors. Fix: Create sessions with the command directly using tmux new-session's command argument. This runs the agent as the pane's initial process, avoiding shell readiness timing issues entirely. Updated all agent managers: mayor, deacon, witness, refinery, polecat, crew. Also fixes pre-existing build error in polecat/manager.go (polecatPath → clonePath/newClonePath). Closes #280 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -482,8 +482,34 @@ func (m *Manager) Start(name string, opts StartOptions) error {
|
||||
return fmt.Errorf("ensuring Claude settings: %w", err)
|
||||
}
|
||||
|
||||
// Create tmux session
|
||||
if err := t.NewSession(sessionID, worker.ClonePath); err != nil {
|
||||
// Build the startup beacon for predecessor discovery via /resume
|
||||
// Pass it as Claude's initial prompt - processed when Claude is ready
|
||||
address := fmt.Sprintf("%s/crew/%s", m.rig.Name, name)
|
||||
topic := opts.Topic
|
||||
if topic == "" {
|
||||
topic = "start"
|
||||
}
|
||||
beacon := session.FormatStartupNudge(session.StartupNudgeConfig{
|
||||
Recipient: address,
|
||||
Sender: "human",
|
||||
Topic: topic,
|
||||
})
|
||||
|
||||
// Build startup command first
|
||||
// SessionStart hook handles context loading (gt prime --hook)
|
||||
claudeCmd, err := config.BuildCrewStartupCommandWithAgentOverride(m.rig.Name, name, m.rig.Path, beacon, opts.AgentOverride)
|
||||
if err != nil {
|
||||
return fmt.Errorf("building startup command: %w", err)
|
||||
}
|
||||
|
||||
// For interactive/refresh mode, remove --dangerously-skip-permissions
|
||||
if opts.Interactive {
|
||||
claudeCmd = strings.Replace(claudeCmd, " --dangerously-skip-permissions", "", 1)
|
||||
}
|
||||
|
||||
// Create session with command directly to avoid send-keys race condition.
|
||||
// See: https://github.com/anthropics/gastown/issues/280
|
||||
if err := t.NewSessionWithCommand(sessionID, worker.ClonePath, claudeCmd); err != nil {
|
||||
return fmt.Errorf("creating session: %w", err)
|
||||
}
|
||||
|
||||
@@ -504,41 +530,6 @@ func (m *Manager) Start(name string, opts StartOptions) error {
|
||||
// Set up C-b n/p keybindings for crew session cycling (non-fatal)
|
||||
_ = t.SetCrewCycleBindings(sessionID)
|
||||
|
||||
// Wait for shell to be ready
|
||||
if err := t.WaitForShellReady(sessionID, constants.ShellReadyTimeout); err != nil {
|
||||
return fmt.Errorf("waiting for shell: %w", err)
|
||||
}
|
||||
|
||||
// Build the startup beacon for predecessor discovery via /resume
|
||||
// Pass it as Claude's initial prompt - processed when Claude is ready
|
||||
address := fmt.Sprintf("%s/crew/%s", m.rig.Name, name)
|
||||
topic := opts.Topic
|
||||
if topic == "" {
|
||||
topic = "start"
|
||||
}
|
||||
beacon := session.FormatStartupNudge(session.StartupNudgeConfig{
|
||||
Recipient: address,
|
||||
Sender: "human",
|
||||
Topic: topic,
|
||||
})
|
||||
|
||||
// Start claude with environment exports and beacon as initial prompt
|
||||
// SessionStart hook handles context loading (gt prime --hook)
|
||||
claudeCmd, err := config.BuildCrewStartupCommandWithAgentOverride(m.rig.Name, name, m.rig.Path, beacon, opts.AgentOverride)
|
||||
if err != nil {
|
||||
_ = t.KillSession(sessionID)
|
||||
return fmt.Errorf("building startup command: %w", err)
|
||||
}
|
||||
|
||||
// For interactive/refresh mode, remove --dangerously-skip-permissions
|
||||
if opts.Interactive {
|
||||
claudeCmd = strings.Replace(claudeCmd, " --dangerously-skip-permissions", "", 1)
|
||||
}
|
||||
if err := t.SendKeys(sessionID, claudeCmd); err != nil {
|
||||
_ = t.KillSession(sessionID) // best-effort cleanup
|
||||
return fmt.Errorf("starting claude: %w", err)
|
||||
}
|
||||
|
||||
// Wait for Claude to start (non-fatal: session continues even if this times out)
|
||||
_ = t.WaitForCommand(sessionID, constants.SupportedShells, constants.ClaudeStartTimeout)
|
||||
|
||||
|
||||
@@ -79,8 +79,16 @@ func (m *Manager) Start(agentOverride string) error {
|
||||
return fmt.Errorf("ensuring Claude settings: %w", err)
|
||||
}
|
||||
|
||||
// Create new tmux session
|
||||
if err := t.NewSession(sessionID, deaconDir); err != nil {
|
||||
// Build startup command first
|
||||
// Restarts are handled by daemon via ensureDeaconRunning on each heartbeat
|
||||
startupCmd, err := config.BuildAgentStartupCommandWithAgentOverride("deacon", "deacon", "", "", agentOverride)
|
||||
if err != nil {
|
||||
return fmt.Errorf("building startup command: %w", err)
|
||||
}
|
||||
|
||||
// Create session with command directly to avoid send-keys race condition.
|
||||
// See: https://github.com/anthropics/gastown/issues/280
|
||||
if err := t.NewSessionWithCommand(sessionID, deaconDir, startupCmd); err != nil {
|
||||
return fmt.Errorf("creating tmux session: %w", err)
|
||||
}
|
||||
|
||||
@@ -92,24 +100,6 @@ func (m *Manager) Start(agentOverride string) error {
|
||||
theme := tmux.DeaconTheme()
|
||||
_ = t.ConfigureGasTownSession(sessionID, theme, "", "Deacon", "health-check")
|
||||
|
||||
// Launch Claude directly (no shell respawn loop)
|
||||
// Restarts are handled by daemon via ensureDeaconRunning on each heartbeat
|
||||
startupCmd, err := config.BuildAgentStartupCommandWithAgentOverride("deacon", "deacon", "", "", agentOverride)
|
||||
if err != nil {
|
||||
_ = t.KillSession(sessionID)
|
||||
return fmt.Errorf("building startup command: %w", err)
|
||||
}
|
||||
|
||||
// Wait for shell to be ready before sending keys (prevents "can't find pane" under load)
|
||||
if err := t.WaitForShellReady(sessionID, 5*time.Second); err != nil {
|
||||
_ = t.KillSession(sessionID)
|
||||
return fmt.Errorf("waiting for shell: %w", err)
|
||||
}
|
||||
if err := t.SendKeysDelayed(sessionID, startupCmd, 200); err != nil {
|
||||
_ = t.KillSession(sessionID) // best-effort cleanup
|
||||
return fmt.Errorf("starting Claude agent: %w", err)
|
||||
}
|
||||
|
||||
// Wait for Claude to start (non-fatal)
|
||||
if err := t.WaitForCommand(sessionID, constants.SupportedShells, constants.ClaudeStartTimeout); err != nil {
|
||||
// Non-fatal - try to continue anyway
|
||||
|
||||
@@ -78,8 +78,18 @@ func (m *Manager) Start(agentOverride string) error {
|
||||
return fmt.Errorf("ensuring Claude settings: %w", err)
|
||||
}
|
||||
|
||||
// Create new tmux session
|
||||
if err := t.NewSession(sessionID, mayorDir); err != nil {
|
||||
// Build startup command first - the startup hook handles 'gt prime' automatically
|
||||
// Export GT_ROLE and BD_ACTOR in the command since tmux SetEnvironment only affects new panes
|
||||
startupCmd, err := config.BuildAgentStartupCommandWithAgentOverride("mayor", "mayor", "", "", agentOverride)
|
||||
if err != nil {
|
||||
return fmt.Errorf("building startup command: %w", err)
|
||||
}
|
||||
|
||||
// Create session with command directly to avoid send-keys race condition.
|
||||
// This runs the command as the pane's initial process, avoiding the shell
|
||||
// readiness timing issues that cause "bad pattern" and command-not-found errors.
|
||||
// See: https://github.com/anthropics/gastown/issues/280
|
||||
if err := t.NewSessionWithCommand(sessionID, mayorDir, startupCmd); err != nil {
|
||||
return fmt.Errorf("creating tmux session: %w", err)
|
||||
}
|
||||
|
||||
@@ -91,23 +101,6 @@ func (m *Manager) Start(agentOverride string) error {
|
||||
theme := tmux.MayorTheme()
|
||||
_ = t.ConfigureGasTownSession(sessionID, theme, "", "Mayor", "coordinator")
|
||||
|
||||
// Launch Claude - the startup hook handles 'gt prime' automatically
|
||||
// Export GT_ROLE and BD_ACTOR in the command since tmux SetEnvironment only affects new panes
|
||||
startupCmd, err := config.BuildAgentStartupCommandWithAgentOverride("mayor", "mayor", "", "", agentOverride)
|
||||
if err != nil {
|
||||
_ = t.KillSession(sessionID) // best-effort cleanup
|
||||
return fmt.Errorf("building startup command: %w", err)
|
||||
}
|
||||
// Wait for shell to be ready before sending keys (prevents "can't find pane" under load)
|
||||
if err := t.WaitForShellReady(sessionID, 5*time.Second); err != nil {
|
||||
_ = t.KillSession(sessionID)
|
||||
return fmt.Errorf("waiting for shell: %w", err)
|
||||
}
|
||||
if err := t.SendKeysDelayed(sessionID, startupCmd, 200); err != nil {
|
||||
_ = t.KillSession(sessionID) // best-effort cleanup
|
||||
return fmt.Errorf("starting Claude agent: %w", err)
|
||||
}
|
||||
|
||||
// Wait for Claude to start (non-fatal)
|
||||
if err := t.WaitForCommand(sessionID, constants.SupportedShells, constants.ClaudeStartTimeout); err != nil {
|
||||
// Non-fatal - try to continue anyway
|
||||
|
||||
@@ -281,7 +281,7 @@ func (m *Manager) AddWithOptions(name string, opts AddOptions) (*Polecat, error)
|
||||
|
||||
// Copy overlay files from .runtime/overlay/ to polecat root.
|
||||
// This allows services to have .env and other config files at their root.
|
||||
if err := rig.CopyOverlay(m.rig.Path, polecatPath); err != nil {
|
||||
if err := rig.CopyOverlay(m.rig.Path, clonePath); err != nil {
|
||||
// Non-fatal - log warning but continue
|
||||
fmt.Printf("Warning: could not copy overlay files: %v\n", err)
|
||||
}
|
||||
@@ -538,7 +538,7 @@ func (m *Manager) RepairWorktreeWithOptions(name string, force bool, opts AddOpt
|
||||
}
|
||||
|
||||
// Copy overlay files from .runtime/overlay/ to polecat root.
|
||||
if err := rig.CopyOverlay(m.rig.Path, polecatPath); err != nil {
|
||||
if err := rig.CopyOverlay(m.rig.Path, newClonePath); err != nil {
|
||||
fmt.Printf("Warning: could not copy overlay files: %v\n", err)
|
||||
}
|
||||
|
||||
@@ -787,9 +787,9 @@ func (m *Manager) loadFromBeads(name string) (*Polecat, error) {
|
||||
|
||||
// setupSharedBeads creates a redirect file so the polecat uses the rig's shared .beads database.
|
||||
// This eliminates the need for git sync between polecat clones - all polecats share one database.
|
||||
func (m *Manager) setupSharedBeads(polecatPath string) error {
|
||||
func (m *Manager) setupSharedBeads(clonePath string) error {
|
||||
townRoot := filepath.Dir(m.rig.Path)
|
||||
return beads.SetupRedirect(townRoot, polecatPath)
|
||||
return beads.SetupRedirect(townRoot, clonePath)
|
||||
}
|
||||
|
||||
// CleanupStaleBranches removes orphaned polecat branches that are no longer in use.
|
||||
|
||||
@@ -168,8 +168,19 @@ func (m *SessionManager) Start(polecat string, opts SessionStartOptions) error {
|
||||
return fmt.Errorf("ensuring runtime settings: %w", err)
|
||||
}
|
||||
|
||||
// Create session
|
||||
if err := m.tmux.NewSession(sessionID, workDir); err != nil {
|
||||
// Build startup command first
|
||||
command := opts.Command
|
||||
if command == "" {
|
||||
command = config.BuildPolecatStartupCommand(m.rig.Name, polecat, m.rig.Path, "")
|
||||
}
|
||||
// Prepend runtime config dir env if needed
|
||||
if runtimeConfig.Session != nil && runtimeConfig.Session.ConfigDirEnv != "" && opts.RuntimeConfigDir != "" {
|
||||
command = config.PrependEnv(command, map[string]string{runtimeConfig.Session.ConfigDirEnv: opts.RuntimeConfigDir})
|
||||
}
|
||||
|
||||
// Create session with command directly to avoid send-keys race condition.
|
||||
// See: https://github.com/anthropics/gastown/issues/280
|
||||
if err := m.tmux.NewSessionWithCommand(sessionID, workDir, command); err != nil {
|
||||
return fmt.Errorf("creating session: %w", err)
|
||||
}
|
||||
|
||||
@@ -205,24 +216,6 @@ func (m *SessionManager) Start(polecat string, opts SessionStartOptions) error {
|
||||
agentID := fmt.Sprintf("%s/%s", m.rig.Name, polecat)
|
||||
debugSession("SetPaneDiedHook", m.tmux.SetPaneDiedHook(sessionID, agentID))
|
||||
|
||||
// Send initial command with env vars exported inline
|
||||
command := opts.Command
|
||||
if command == "" {
|
||||
command = config.BuildPolecatStartupCommand(m.rig.Name, polecat, m.rig.Path, "")
|
||||
}
|
||||
// Prepend runtime config dir env if needed
|
||||
if runtimeConfig.Session != nil && runtimeConfig.Session.ConfigDirEnv != "" && opts.RuntimeConfigDir != "" {
|
||||
command = config.PrependEnv(command, map[string]string{runtimeConfig.Session.ConfigDirEnv: opts.RuntimeConfigDir})
|
||||
}
|
||||
// Wait for shell to be ready before sending keys (prevents "can't find pane" under load)
|
||||
if err := m.tmux.WaitForShellReady(sessionID, 5*time.Second); err != nil {
|
||||
_ = m.tmux.KillSession(sessionID)
|
||||
return fmt.Errorf("waiting for shell: %w", err)
|
||||
}
|
||||
if err := m.tmux.SendKeys(sessionID, command); err != nil {
|
||||
return fmt.Errorf("sending command: %w", err)
|
||||
}
|
||||
|
||||
// Wait for Claude to start (non-fatal)
|
||||
debugSession("WaitForCommand", m.tmux.WaitForCommand(sessionID, constants.SupportedShells, constants.ClaudeStartTimeout))
|
||||
|
||||
|
||||
@@ -174,12 +174,17 @@ func (m *Manager) Start(foreground bool) error {
|
||||
return fmt.Errorf("ensuring runtime settings: %w", err)
|
||||
}
|
||||
|
||||
if err := t.NewSession(sessionID, refineryRigDir); err != nil {
|
||||
// Build startup command first
|
||||
bdActor := fmt.Sprintf("%s/refinery", m.rig.Name)
|
||||
command := config.BuildAgentStartupCommand("refinery", bdActor, m.rig.Path, "")
|
||||
|
||||
// Create session with command directly to avoid send-keys race condition.
|
||||
// See: https://github.com/anthropics/gastown/issues/280
|
||||
if err := t.NewSessionWithCommand(sessionID, refineryRigDir, command); err != nil {
|
||||
return fmt.Errorf("creating tmux session: %w", err)
|
||||
}
|
||||
|
||||
// Set environment variables (non-fatal: session works without these)
|
||||
bdActor := fmt.Sprintf("%s/refinery", m.rig.Name)
|
||||
_ = t.SetEnvironment(sessionID, "GT_RIG", m.rig.Name)
|
||||
_ = t.SetEnvironment(sessionID, "GT_REFINERY", "1")
|
||||
_ = t.SetEnvironment(sessionID, "GT_ROLE", "refinery")
|
||||
@@ -206,22 +211,6 @@ func (m *Manager) Start(foreground bool) error {
|
||||
return fmt.Errorf("saving state: %w", err)
|
||||
}
|
||||
|
||||
// Start Claude agent with full permissions (like polecats)
|
||||
// NOTE: No gt prime injection needed - SessionStart hook handles it automatically
|
||||
// Restarts are handled by daemon via LIFECYCLE mail, not shell loops
|
||||
// Export GT_ROLE and BD_ACTOR in the command since tmux SetEnvironment only affects new panes
|
||||
command := config.BuildAgentStartupCommand("refinery", bdActor, m.rig.Path, "")
|
||||
// Wait for shell to be ready before sending keys (prevents "can't find pane" under load)
|
||||
if err := t.WaitForShellReady(sessionID, 5*time.Second); err != nil {
|
||||
_ = t.KillSession(sessionID)
|
||||
return fmt.Errorf("waiting for shell: %w", err)
|
||||
}
|
||||
if err := t.SendKeys(sessionID, command); err != nil {
|
||||
// Clean up the session on failure (best-effort cleanup)
|
||||
_ = t.KillSession(sessionID)
|
||||
return fmt.Errorf("starting Claude agent: %w", err)
|
||||
}
|
||||
|
||||
// Wait for Claude to start and show its prompt (non-fatal)
|
||||
// WaitForRuntimeReady waits for the runtime to be ready
|
||||
if err := t.WaitForRuntimeReady(sessionID, runtimeConfig, constants.ClaudeStartTimeout); err != nil {
|
||||
|
||||
@@ -78,6 +78,22 @@ func (t *Tmux) NewSession(name, workDir string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// NewSessionWithCommand creates a new detached tmux session that immediately runs a command.
|
||||
// Unlike NewSession + SendKeys, this avoids race conditions where the shell isn't ready
|
||||
// or the command arrives before the shell prompt. The command runs directly as the
|
||||
// initial process of the pane.
|
||||
// See: https://github.com/anthropics/gastown/issues/280
|
||||
func (t *Tmux) NewSessionWithCommand(name, workDir, command string) error {
|
||||
args := []string{"new-session", "-d", "-s", name}
|
||||
if workDir != "" {
|
||||
args = append(args, "-c", workDir)
|
||||
}
|
||||
// Add the command as the last argument - tmux runs it as the pane's initial process
|
||||
args = append(args, command)
|
||||
_, err := t.run(args...)
|
||||
return err
|
||||
}
|
||||
|
||||
// EnsureSessionFresh ensures a session is available and healthy.
|
||||
// If the session exists but is a zombie (Claude not running), it kills the session first.
|
||||
// This prevents "session already exists" errors when trying to restart dead agents.
|
||||
|
||||
@@ -151,13 +151,19 @@ func (m *Manager) Start(foreground bool) error {
|
||||
return fmt.Errorf("ensuring Claude settings: %w", err)
|
||||
}
|
||||
|
||||
// Create new tmux session
|
||||
if err := t.NewSession(sessionID, witnessDir); err != nil {
|
||||
// Build startup command first
|
||||
// Pass m.rig.Path so rig agent settings are honored (not town-level defaults)
|
||||
bdActor := fmt.Sprintf("%s/witness", m.rig.Name)
|
||||
command := config.BuildAgentStartupCommand("witness", bdActor, m.rig.Path, "")
|
||||
runtimeConfig := config.LoadRuntimeConfig(m.rig.Path)
|
||||
|
||||
// Create session with command directly to avoid send-keys race condition.
|
||||
// See: https://github.com/anthropics/gastown/issues/280
|
||||
if err := t.NewSessionWithCommand(sessionID, witnessDir, command); err != nil {
|
||||
return fmt.Errorf("creating tmux session: %w", err)
|
||||
}
|
||||
|
||||
// Set environment variables (non-fatal: session works without these)
|
||||
bdActor := fmt.Sprintf("%s/witness", m.rig.Name)
|
||||
_ = t.SetEnvironment(sessionID, "GT_ROLE", "witness")
|
||||
_ = t.SetEnvironment(sessionID, "GT_RIG", m.rig.Name)
|
||||
_ = t.SetEnvironment(sessionID, "BD_ACTOR", bdActor)
|
||||
@@ -177,23 +183,6 @@ func (m *Manager) Start(foreground bool) error {
|
||||
return fmt.Errorf("saving state: %w", err)
|
||||
}
|
||||
|
||||
// Launch Claude directly (no shell respawn loop)
|
||||
// Restarts are handled by daemon via LIFECYCLE mail or deacon health-scan
|
||||
// NOTE: No gt prime injection needed - SessionStart hook handles it automatically
|
||||
// Export GT_ROLE and BD_ACTOR in the command since tmux SetEnvironment only affects new panes
|
||||
// Pass m.rig.Path so rig agent settings are honored (not town-level defaults)
|
||||
command := config.BuildAgentStartupCommand("witness", bdActor, m.rig.Path, "")
|
||||
runtimeConfig := config.LoadRuntimeConfig(m.rig.Path)
|
||||
// Wait for shell to be ready before sending keys (prevents "can't find pane" under load)
|
||||
if err := t.WaitForShellReady(sessionID, 5*time.Second); err != nil {
|
||||
_ = t.KillSession(sessionID)
|
||||
return fmt.Errorf("waiting for shell: %w", err)
|
||||
}
|
||||
if err := t.SendKeys(sessionID, command); err != nil {
|
||||
_ = t.KillSession(sessionID) // best-effort cleanup
|
||||
return fmt.Errorf("starting Claude agent: %w", err)
|
||||
}
|
||||
|
||||
// Wait for runtime to start and show its prompt (non-fatal)
|
||||
if err := t.WaitForRuntimeReady(sessionID, runtimeConfig, constants.ClaudeStartTimeout); err != nil {
|
||||
// Non-fatal - try to continue anyway
|
||||
|
||||
Reference in New Issue
Block a user