diff --git a/internal/cmd/log.go b/internal/cmd/log.go index bb523a2a..3d3ba243 100644 --- a/internal/cmd/log.go +++ b/internal/cmd/log.go @@ -20,6 +20,11 @@ var ( logAgent string logSince string logFollow bool + + // log crash flags + crashAgent string + crashSession string + crashExitCode int ) var logCmd = &cobra.Command{ @@ -47,6 +52,23 @@ Examples: RunE: runLog, } +var logCrashCmd = &cobra.Command{ + Use: "crash", + Short: "Record a crash event (called by tmux pane-died hook)", + Long: `Record a crash event to the town log. + +This command is called automatically by tmux when a pane exits unexpectedly. +It's not typically run manually. + +The exit code determines if this was a crash or expected exit: + - Exit code 0: Expected exit (logged as 'done' if no other done was recorded) + - Exit code non-zero: Crash (logged as 'crash') + +Examples: + gt log crash --agent gastown/Toast --session gt-gastown-Toast --exit-code 1`, + RunE: runLogCrash, +} + func init() { logCmd.Flags().IntVarP(&logTail, "tail", "n", 20, "Number of events to show") logCmd.Flags().StringVarP(&logType, "type", "t", "", "Filter by event type (spawn,wake,nudge,handoff,done,crash,kill)") @@ -54,6 +76,13 @@ func init() { logCmd.Flags().StringVar(&logSince, "since", "", "Show events since duration (e.g., 1h, 30m, 24h)") logCmd.Flags().BoolVarP(&logFollow, "follow", "f", false, "Follow log output (like tail -f)") + // crash subcommand flags + logCrashCmd.Flags().StringVar(&crashAgent, "agent", "", "Agent ID (e.g., gastown/Toast)") + logCrashCmd.Flags().StringVar(&crashSession, "session", "", "Tmux session name") + logCrashCmd.Flags().IntVar(&crashExitCode, "exit-code", -1, "Exit code from pane") + _ = logCrashCmd.MarkFlagRequired("agent") + + logCmd.AddCommand(logCrashCmd) rootCmd.AddCommand(logCmd) } @@ -231,6 +260,61 @@ func truncateStr(s string, maxLen int) string { return s[:maxLen-3] + "..." } +// runLogCrash handles the "gt log crash" command from tmux pane-died hooks. +func runLogCrash(cmd *cobra.Command, args []string) error { + townRoot, err := workspace.FindFromCwd() + if err != nil || townRoot == "" { + // Try to find town root from common locations + // This is called from tmux hook which may not have proper cwd + home := os.Getenv("HOME") + possibleRoots := []string{ + home + "/gt", + home + "/gastown", + } + for _, root := range possibleRoots { + if _, statErr := os.Stat(root + "/mayor"); statErr == nil { + townRoot = root + break + } + } + if townRoot == "" { + return fmt.Errorf("cannot find town root") + } + } + + // Determine event type based on exit code + var eventType townlog.EventType + var context string + + if crashExitCode == 0 { + // Exit code 0 = normal exit + // Could be handoff, done, or user quit - we log as "done" if no prior done event + // The Witness can analyze further if needed + eventType = townlog.EventDone + context = "exited normally" + } else if crashExitCode == 130 { + // Exit code 130 = Ctrl+C (SIGINT) + // This is typically intentional user interrupt + eventType = townlog.EventKill + context = fmt.Sprintf("interrupted (exit %d)", crashExitCode) + } else { + // Non-zero exit = crash + eventType = townlog.EventCrash + context = fmt.Sprintf("exit code %d", crashExitCode) + if crashSession != "" { + context += fmt.Sprintf(" (session: %s)", crashSession) + } + } + + // Log the event + logger := townlog.NewLogger(townRoot) + if err := logger.Log(eventType, crashAgent, context); err != nil { + return fmt.Errorf("logging event: %w", err) + } + + return nil +} + // LogEvent is a helper that logs an event from anywhere in the codebase. // It finds the town root and logs the event. func LogEvent(eventType townlog.EventType, agent, context string) error { diff --git a/internal/session/manager.go b/internal/session/manager.go index 07b39ef3..c5ac47f2 100644 --- a/internal/session/manager.go +++ b/internal/session/manager.go @@ -157,6 +157,10 @@ func (m *Manager) Start(polecat string, opts StartOptions) error { theme := tmux.AssignTheme(m.rig.Name) _ = m.tmux.ConfigureGasTownSession(sessionID, theme, m.rig.Name, polecat, "polecat") + // Set pane-died hook for crash detection (non-fatal) + agentID := fmt.Sprintf("%s/%s", m.rig.Name, polecat) + _ = m.tmux.SetPaneDiedHook(sessionID, agentID) + // Send initial command command := opts.Command if command == "" { diff --git a/internal/tmux/tmux.go b/internal/tmux/tmux.go index d48f2527..baffbd95 100644 --- a/internal/tmux/tmux.go +++ b/internal/tmux/tmux.go @@ -634,3 +634,18 @@ func (t *Tmux) SetCrewCycleBindings(session string) error { } return nil } + +// SetPaneDiedHook sets a pane-died hook on a session to detect crashes. +// When the pane exits, tmux runs the hook command with exit status info. +// The agentID is used to identify the agent in crash logs (e.g., "gastown/Toast"). +func (t *Tmux) SetPaneDiedHook(session, agentID string) error { + // Hook command logs the crash with exit status + // #{pane_dead_status} is the exit code of the process that died + // We run gt log crash which records to the town log + hookCmd := fmt.Sprintf(`run-shell "gt log crash --agent '%s' --session '%s' --exit-code #{pane_dead_status}"`, + agentID, session) + + // Set the hook on this specific session + _, err := t.run("set-hook", "-t", session, "pane-died", hookCmd) + return err +} diff --git a/internal/townlog/townlog.go b/internal/townlog/logger.go similarity index 100% rename from internal/townlog/townlog.go rename to internal/townlog/logger.go diff --git a/internal/townlog/townlog_test.go b/internal/townlog/logger_test.go similarity index 100% rename from internal/townlog/townlog_test.go rename to internal/townlog/logger_test.go