feat: Add tmux crash detection hooks (gt-i9s7o)

- Add SetPaneDiedHook to tmux package for crash detection
- Add gt log crash subcommand for hook callback
- Set pane-died hook when starting polecat sessions
- Distinguish exit types: 0=done, 130=kill (Ctrl+C), other=crash
- Rename townlog/townlog.go to townlog/logger.go

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Steve Yegge
2025-12-26 16:18:44 -08:00
parent 490c8b6e24
commit 2117eb66f5
5 changed files with 103 additions and 0 deletions

View File

@@ -20,6 +20,11 @@ var (
logAgent string
logSince string
logFollow bool
// log crash flags
crashAgent string
crashSession string
crashExitCode int
)
var logCmd = &cobra.Command{
@@ -47,6 +52,23 @@ Examples:
RunE: runLog,
}
var logCrashCmd = &cobra.Command{
Use: "crash",
Short: "Record a crash event (called by tmux pane-died hook)",
Long: `Record a crash event to the town log.
This command is called automatically by tmux when a pane exits unexpectedly.
It's not typically run manually.
The exit code determines if this was a crash or expected exit:
- Exit code 0: Expected exit (logged as 'done' if no other done was recorded)
- Exit code non-zero: Crash (logged as 'crash')
Examples:
gt log crash --agent gastown/Toast --session gt-gastown-Toast --exit-code 1`,
RunE: runLogCrash,
}
func init() {
logCmd.Flags().IntVarP(&logTail, "tail", "n", 20, "Number of events to show")
logCmd.Flags().StringVarP(&logType, "type", "t", "", "Filter by event type (spawn,wake,nudge,handoff,done,crash,kill)")
@@ -54,6 +76,13 @@ func init() {
logCmd.Flags().StringVar(&logSince, "since", "", "Show events since duration (e.g., 1h, 30m, 24h)")
logCmd.Flags().BoolVarP(&logFollow, "follow", "f", false, "Follow log output (like tail -f)")
// crash subcommand flags
logCrashCmd.Flags().StringVar(&crashAgent, "agent", "", "Agent ID (e.g., gastown/Toast)")
logCrashCmd.Flags().StringVar(&crashSession, "session", "", "Tmux session name")
logCrashCmd.Flags().IntVar(&crashExitCode, "exit-code", -1, "Exit code from pane")
_ = logCrashCmd.MarkFlagRequired("agent")
logCmd.AddCommand(logCrashCmd)
rootCmd.AddCommand(logCmd)
}
@@ -231,6 +260,61 @@ func truncateStr(s string, maxLen int) string {
return s[:maxLen-3] + "..."
}
// runLogCrash handles the "gt log crash" command from tmux pane-died hooks.
func runLogCrash(cmd *cobra.Command, args []string) error {
townRoot, err := workspace.FindFromCwd()
if err != nil || townRoot == "" {
// Try to find town root from common locations
// This is called from tmux hook which may not have proper cwd
home := os.Getenv("HOME")
possibleRoots := []string{
home + "/gt",
home + "/gastown",
}
for _, root := range possibleRoots {
if _, statErr := os.Stat(root + "/mayor"); statErr == nil {
townRoot = root
break
}
}
if townRoot == "" {
return fmt.Errorf("cannot find town root")
}
}
// Determine event type based on exit code
var eventType townlog.EventType
var context string
if crashExitCode == 0 {
// Exit code 0 = normal exit
// Could be handoff, done, or user quit - we log as "done" if no prior done event
// The Witness can analyze further if needed
eventType = townlog.EventDone
context = "exited normally"
} else if crashExitCode == 130 {
// Exit code 130 = Ctrl+C (SIGINT)
// This is typically intentional user interrupt
eventType = townlog.EventKill
context = fmt.Sprintf("interrupted (exit %d)", crashExitCode)
} else {
// Non-zero exit = crash
eventType = townlog.EventCrash
context = fmt.Sprintf("exit code %d", crashExitCode)
if crashSession != "" {
context += fmt.Sprintf(" (session: %s)", crashSession)
}
}
// Log the event
logger := townlog.NewLogger(townRoot)
if err := logger.Log(eventType, crashAgent, context); err != nil {
return fmt.Errorf("logging event: %w", err)
}
return nil
}
// LogEvent is a helper that logs an event from anywhere in the codebase.
// It finds the town root and logs the event.
func LogEvent(eventType townlog.EventType, agent, context string) error {

View File

@@ -157,6 +157,10 @@ func (m *Manager) Start(polecat string, opts StartOptions) error {
theme := tmux.AssignTheme(m.rig.Name)
_ = m.tmux.ConfigureGasTownSession(sessionID, theme, m.rig.Name, polecat, "polecat")
// Set pane-died hook for crash detection (non-fatal)
agentID := fmt.Sprintf("%s/%s", m.rig.Name, polecat)
_ = m.tmux.SetPaneDiedHook(sessionID, agentID)
// Send initial command
command := opts.Command
if command == "" {

View File

@@ -634,3 +634,18 @@ func (t *Tmux) SetCrewCycleBindings(session string) error {
}
return nil
}
// SetPaneDiedHook sets a pane-died hook on a session to detect crashes.
// When the pane exits, tmux runs the hook command with exit status info.
// The agentID is used to identify the agent in crash logs (e.g., "gastown/Toast").
func (t *Tmux) SetPaneDiedHook(session, agentID string) error {
// Hook command logs the crash with exit status
// #{pane_dead_status} is the exit code of the process that died
// We run gt log crash which records to the town log
hookCmd := fmt.Sprintf(`run-shell "gt log crash --agent '%s' --session '%s' --exit-code #{pane_dead_status}"`,
agentID, session)
// Set the hook on this specific session
_, err := t.run("set-hook", "-t", session, "pane-died", hookCmd)
return err
}