diff --git a/internal/checkpoint/checkpoint.go b/internal/checkpoint/checkpoint.go new file mode 100644 index 00000000..ee26117f --- /dev/null +++ b/internal/checkpoint/checkpoint.go @@ -0,0 +1,216 @@ +// Package checkpoint provides session checkpointing for crash recovery. +// When a polecat session dies (context limit, crash, timeout), checkpoints +// allow the next session to recover state and resume work. +package checkpoint + +import ( + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" +) + +// Filename is the checkpoint file name within the polecat directory. +const Filename = ".polecat-checkpoint.json" + +// Checkpoint represents a session recovery checkpoint. +type Checkpoint struct { + // MoleculeID is the current molecule being worked. + MoleculeID string `json:"molecule_id,omitempty"` + + // CurrentStep is the step ID currently in progress. + CurrentStep string `json:"current_step,omitempty"` + + // StepTitle is the human-readable title of the current step. + StepTitle string `json:"step_title,omitempty"` + + // ModifiedFiles lists files modified since the last commit. + ModifiedFiles []string `json:"modified_files,omitempty"` + + // LastCommit is the SHA of the last commit. + LastCommit string `json:"last_commit,omitempty"` + + // Branch is the current git branch. + Branch string `json:"branch,omitempty"` + + // HookedBead is the bead ID on the agent's hook. + HookedBead string `json:"hooked_bead,omitempty"` + + // Timestamp is when the checkpoint was written. + Timestamp time.Time `json:"timestamp"` + + // SessionID identifies the session that wrote the checkpoint. + SessionID string `json:"session_id,omitempty"` + + // Notes contains optional context from the session. + Notes string `json:"notes,omitempty"` +} + +// Path returns the checkpoint file path for a given polecat directory. +func Path(polecatDir string) string { + return filepath.Join(polecatDir, Filename) +} + +// Read loads a checkpoint from the polecat directory. +// Returns nil, nil if no checkpoint exists. +func Read(polecatDir string) (*Checkpoint, error) { + path := Path(polecatDir) + + data, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("reading checkpoint: %w", err) + } + + var cp Checkpoint + if err := json.Unmarshal(data, &cp); err != nil { + return nil, fmt.Errorf("parsing checkpoint: %w", err) + } + + return &cp, nil +} + +// Write saves a checkpoint to the polecat directory. +func Write(polecatDir string, cp *Checkpoint) error { + // Set timestamp if not already set + if cp.Timestamp.IsZero() { + cp.Timestamp = time.Now() + } + + // Set session ID from environment if available + if cp.SessionID == "" { + cp.SessionID = os.Getenv("CLAUDE_SESSION_ID") + if cp.SessionID == "" { + cp.SessionID = fmt.Sprintf("pid-%d", os.Getpid()) + } + } + + data, err := json.MarshalIndent(cp, "", " ") + if err != nil { + return fmt.Errorf("marshaling checkpoint: %w", err) + } + + path := Path(polecatDir) + if err := os.WriteFile(path, data, 0644); err != nil { + return fmt.Errorf("writing checkpoint: %w", err) + } + + return nil +} + +// Remove deletes the checkpoint file. +func Remove(polecatDir string) error { + path := Path(polecatDir) + if err := os.Remove(path); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("removing checkpoint: %w", err) + } + return nil +} + +// Capture creates a checkpoint by capturing current git and work state. +func Capture(polecatDir string) (*Checkpoint, error) { + cp := &Checkpoint{ + Timestamp: time.Now(), + } + + // Get modified files from git status + cmd := exec.Command("git", "status", "--porcelain") + cmd.Dir = polecatDir + output, err := cmd.Output() + if err == nil { + lines := strings.Split(strings.TrimSpace(string(output)), "\n") + for _, line := range lines { + if len(line) > 3 { + // Format: XY filename + file := strings.TrimSpace(line[3:]) + if file != "" { + cp.ModifiedFiles = append(cp.ModifiedFiles, file) + } + } + } + } + + // Get last commit SHA + cmd = exec.Command("git", "rev-parse", "HEAD") + cmd.Dir = polecatDir + output, err = cmd.Output() + if err == nil { + cp.LastCommit = strings.TrimSpace(string(output)) + } + + // Get current branch + cmd = exec.Command("git", "rev-parse", "--abbrev-ref", "HEAD") + cmd.Dir = polecatDir + output, err = cmd.Output() + if err == nil { + cp.Branch = strings.TrimSpace(string(output)) + } + + return cp, nil +} + +// WithMolecule adds molecule context to a checkpoint. +func (cp *Checkpoint) WithMolecule(moleculeID, stepID, stepTitle string) *Checkpoint { + cp.MoleculeID = moleculeID + cp.CurrentStep = stepID + cp.StepTitle = stepTitle + return cp +} + +// WithHookedBead adds hooked bead context to a checkpoint. +func (cp *Checkpoint) WithHookedBead(beadID string) *Checkpoint { + cp.HookedBead = beadID + return cp +} + +// WithNotes adds context notes to a checkpoint. +func (cp *Checkpoint) WithNotes(notes string) *Checkpoint { + cp.Notes = notes + return cp +} + +// Age returns how long ago the checkpoint was written. +func (cp *Checkpoint) Age() time.Duration { + return time.Since(cp.Timestamp) +} + +// IsStale returns true if the checkpoint is older than the threshold. +func (cp *Checkpoint) IsStale(threshold time.Duration) bool { + return cp.Age() > threshold +} + +// Summary returns a concise summary of the checkpoint. +func (cp *Checkpoint) Summary() string { + var parts []string + + if cp.MoleculeID != "" { + if cp.CurrentStep != "" { + parts = append(parts, fmt.Sprintf("molecule %s, step %s", cp.MoleculeID, cp.CurrentStep)) + } else { + parts = append(parts, fmt.Sprintf("molecule %s", cp.MoleculeID)) + } + } + + if cp.HookedBead != "" { + parts = append(parts, fmt.Sprintf("hooked: %s", cp.HookedBead)) + } + + if len(cp.ModifiedFiles) > 0 { + parts = append(parts, fmt.Sprintf("%d modified files", len(cp.ModifiedFiles))) + } + + if cp.Branch != "" { + parts = append(parts, fmt.Sprintf("branch: %s", cp.Branch)) + } + + if len(parts) == 0 { + return "no significant state" + } + + return strings.Join(parts, ", ") +} diff --git a/internal/cmd/checkpoint_cmd.go b/internal/cmd/checkpoint_cmd.go new file mode 100644 index 00000000..c4e8f3fa --- /dev/null +++ b/internal/cmd/checkpoint_cmd.go @@ -0,0 +1,297 @@ +package cmd + +import ( + "fmt" + "os" + "strings" + + "github.com/spf13/cobra" + "github.com/steveyegge/gastown/internal/beads" + "github.com/steveyegge/gastown/internal/checkpoint" + "github.com/steveyegge/gastown/internal/style" + "github.com/steveyegge/gastown/internal/workspace" +) + +var checkpointCmd = &cobra.Command{ + Use: "checkpoint", + GroupID: GroupDiag, + Short: "Manage session checkpoints for crash recovery", + Long: `Manage checkpoints for polecat session crash recovery. + +Checkpoints capture the current work state so that if a session crashes, +the next session can resume from where it left off. + +Checkpoint data includes: +- Current molecule and step +- Hooked bead +- Modified files list +- Git branch and last commit +- Timestamp + +Checkpoints are stored in .polecat-checkpoint.json in the polecat directory.`, +} + +var checkpointWriteCmd = &cobra.Command{ + Use: "write", + Short: "Write a checkpoint of current session state", + Long: `Capture and write the current session state to a checkpoint file. + +This is typically called: +- After closing a molecule step +- Periodically during long work sessions +- Before handoff to another session + +The checkpoint captures git state, molecule progress, and hooked work.`, + RunE: runCheckpointWrite, +} + +var checkpointReadCmd = &cobra.Command{ + Use: "read", + Short: "Read and display the current checkpoint", + Long: `Read and display the checkpoint file if one exists.`, + RunE: runCheckpointRead, +} + +var checkpointClearCmd = &cobra.Command{ + Use: "clear", + Short: "Clear the checkpoint file", + Long: `Remove the checkpoint file. Use after work is complete or checkpoint is no longer needed.`, + RunE: runCheckpointClear, +} + +var ( + checkpointNotes string + checkpointMolecule string + checkpointStep string +) + +func init() { + checkpointCmd.AddCommand(checkpointWriteCmd) + checkpointCmd.AddCommand(checkpointReadCmd) + checkpointCmd.AddCommand(checkpointClearCmd) + + checkpointWriteCmd.Flags().StringVar(&checkpointNotes, "notes", "", + "Add notes to the checkpoint") + checkpointWriteCmd.Flags().StringVar(&checkpointMolecule, "molecule", "", + "Override molecule ID (auto-detected if not specified)") + checkpointWriteCmd.Flags().StringVar(&checkpointStep, "step", "", + "Override step ID (auto-detected if not specified)") + + rootCmd.AddCommand(checkpointCmd) +} + +func runCheckpointWrite(cmd *cobra.Command, args []string) error { + cwd, err := os.Getwd() + if err != nil { + return fmt.Errorf("getting current directory: %w", err) + } + + // Detect role context + townRoot, err := workspace.FindFromCwd() + if err != nil || townRoot == "" { + return fmt.Errorf("not in a Gas Town workspace") + } + + roleInfo, err := GetRoleWithContext(cwd, townRoot) + if err != nil { + return fmt.Errorf("detecting role: %w", err) + } + + // Only polecats and crew workers use checkpoints + if roleInfo.Role != RolePolecat && roleInfo.Role != RoleCrew { + fmt.Printf("%s Checkpoints only apply to polecats and crew workers\n", + style.Dim.Render("○")) + return nil + } + + // Capture current state + cp, err := checkpoint.Capture(cwd) + if err != nil { + return fmt.Errorf("capturing checkpoint: %w", err) + } + + // Add notes if provided + if checkpointNotes != "" { + cp.WithNotes(checkpointNotes) + } + + // Try to detect molecule context if not overridden + if checkpointMolecule == "" || checkpointStep == "" { + moleculeID, stepID, stepTitle := detectMoleculeContext(cwd, roleInfo) + if checkpointMolecule == "" { + checkpointMolecule = moleculeID + } + if checkpointStep == "" { + checkpointStep = stepID + } + if stepTitle != "" { + cp.WithMolecule(checkpointMolecule, checkpointStep, stepTitle) + } + } + + // Add molecule context + if checkpointMolecule != "" { + cp.WithMolecule(checkpointMolecule, checkpointStep, "") + } + + // Detect hooked bead + hookedBead := detectHookedBead(cwd, roleInfo) + if hookedBead != "" { + cp.WithHookedBead(hookedBead) + } + + // Write checkpoint + if err := checkpoint.Write(cwd, cp); err != nil { + return fmt.Errorf("writing checkpoint: %w", err) + } + + fmt.Printf("%s Checkpoint written\n", style.Bold.Render("✓")) + fmt.Printf(" %s\n", cp.Summary()) + + return nil +} + +func runCheckpointRead(cmd *cobra.Command, args []string) error { + cwd, err := os.Getwd() + if err != nil { + return fmt.Errorf("getting current directory: %w", err) + } + + cp, err := checkpoint.Read(cwd) + if err != nil { + return fmt.Errorf("reading checkpoint: %w", err) + } + + if cp == nil { + fmt.Printf("%s No checkpoint exists\n", style.Dim.Render("○")) + return nil + } + + fmt.Printf("%s\n\n", style.Bold.Render("Checkpoint")) + fmt.Printf("Timestamp: %s (%s ago)\n", cp.Timestamp.Format("2006-01-02 15:04:05"), cp.Age().Round(1)) + + if cp.MoleculeID != "" { + fmt.Printf("Molecule: %s\n", cp.MoleculeID) + } + if cp.CurrentStep != "" { + fmt.Printf("Step: %s\n", cp.CurrentStep) + } + if cp.StepTitle != "" { + fmt.Printf("Step Title: %s\n", cp.StepTitle) + } + if cp.HookedBead != "" { + fmt.Printf("Hooked Bead: %s\n", cp.HookedBead) + } + if cp.Branch != "" { + fmt.Printf("Branch: %s\n", cp.Branch) + } + if cp.LastCommit != "" { + fmt.Printf("Last Commit: %s\n", cp.LastCommit[:min(12, len(cp.LastCommit))]) + } + if len(cp.ModifiedFiles) > 0 { + fmt.Printf("Modified Files: %d\n", len(cp.ModifiedFiles)) + for _, f := range cp.ModifiedFiles { + fmt.Printf(" - %s\n", f) + } + } + if cp.Notes != "" { + fmt.Printf("Notes: %s\n", cp.Notes) + } + if cp.SessionID != "" { + fmt.Printf("Session ID: %s\n", cp.SessionID) + } + + return nil +} + +func runCheckpointClear(cmd *cobra.Command, args []string) error { + cwd, err := os.Getwd() + if err != nil { + return fmt.Errorf("getting current directory: %w", err) + } + + if err := checkpoint.Remove(cwd); err != nil { + return fmt.Errorf("removing checkpoint: %w", err) + } + + fmt.Printf("%s Checkpoint cleared\n", style.Bold.Render("✓")) + return nil +} + +// detectMoleculeContext tries to detect the current molecule and step from beads. +func detectMoleculeContext(workDir string, ctx RoleInfo) (moleculeID, stepID, stepTitle string) { + b := beads.New(workDir) + + // Get agent identity for query + roleCtx := RoleContext{ + Role: ctx.Role, + Rig: ctx.Rig, + Polecat: ctx.Polecat, + } + assignee := getAgentIdentity(roleCtx) + if assignee == "" { + return "", "", "" + } + + // Find in-progress issues for this agent + issues, err := b.List(beads.ListOptions{ + Status: "in_progress", + Assignee: assignee, + Priority: -1, + }) + if err != nil || len(issues) == 0 { + return "", "", "" + } + + // Check for molecule metadata + for _, issue := range issues { + // Look for instantiated_from in description + lines := strings.Split(issue.Description, "\n") + for _, line := range lines { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "instantiated_from:") { + moleculeID = strings.TrimSpace(strings.TrimPrefix(line, "instantiated_from:")) + stepID = issue.ID + stepTitle = issue.Title + return moleculeID, stepID, stepTitle + } + } + } + + return "", "", "" +} + +// detectHookedBead finds the currently hooked bead for the agent. +func detectHookedBead(workDir string, ctx RoleInfo) string { + b := beads.New(workDir) + + // Get agent identity + roleCtx := RoleContext{ + Role: ctx.Role, + Rig: ctx.Rig, + Polecat: ctx.Polecat, + } + assignee := getAgentIdentity(roleCtx) + if assignee == "" { + return "" + } + + // Find hooked beads for this agent + hookedBeads, err := b.List(beads.ListOptions{ + Status: beads.StatusHooked, + Assignee: assignee, + Priority: -1, + }) + if err != nil || len(hookedBeads) == 0 { + return "" + } + + return hookedBeads[0].ID +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} diff --git a/internal/cmd/prime.go b/internal/cmd/prime.go index 3a434587..7a29b917 100644 --- a/internal/cmd/prime.go +++ b/internal/cmd/prime.go @@ -9,9 +9,11 @@ import ( "os/exec" "path/filepath" "strings" + "time" "github.com/spf13/cobra" "github.com/steveyegge/gastown/internal/beads" + "github.com/steveyegge/gastown/internal/checkpoint" "github.com/steveyegge/gastown/internal/constants" "github.com/steveyegge/gastown/internal/events" "github.com/steveyegge/gastown/internal/lock" @@ -134,6 +136,9 @@ func runPrime(cmd *cobra.Command, args []string) error { // Output molecule context if working on a molecule step outputMoleculeContext(ctx) + // Output previous session checkpoint for crash recovery + outputCheckpointContext(ctx) + // Run bd prime to output beads workflow context runBdPrime(cwd) @@ -1418,6 +1423,75 @@ func checkPendingEscalations(ctx RoleContext) { fmt.Println() } +// outputCheckpointContext reads and displays any previous session checkpoint. +// This enables crash recovery by showing what the previous session was working on. +func outputCheckpointContext(ctx RoleContext) { + // Only applies to polecats and crew workers + if ctx.Role != RolePolecat && ctx.Role != RoleCrew { + return + } + + // Read checkpoint + cp, err := checkpoint.Read(ctx.WorkDir) + if err != nil { + // Silently ignore read errors + return + } + if cp == nil { + // No checkpoint exists + return + } + + // Check if checkpoint is stale (older than 24 hours) + if cp.IsStale(24 * time.Hour) { + // Remove stale checkpoint + _ = checkpoint.Remove(ctx.WorkDir) + return + } + + // Display checkpoint context + fmt.Println() + fmt.Printf("%s\n\n", style.Bold.Render("## 📌 Previous Session Checkpoint")) + fmt.Printf("A previous session left a checkpoint %s ago.\n\n", cp.Age().Round(time.Minute)) + + if cp.StepTitle != "" { + fmt.Printf(" **Working on:** %s\n", cp.StepTitle) + } + if cp.MoleculeID != "" { + fmt.Printf(" **Molecule:** %s\n", cp.MoleculeID) + } + if cp.CurrentStep != "" { + fmt.Printf(" **Step:** %s\n", cp.CurrentStep) + } + if cp.HookedBead != "" { + fmt.Printf(" **Hooked bead:** %s\n", cp.HookedBead) + } + if cp.Branch != "" { + fmt.Printf(" **Branch:** %s\n", cp.Branch) + } + if len(cp.ModifiedFiles) > 0 { + fmt.Printf(" **Modified files:** %d\n", len(cp.ModifiedFiles)) + // Show first few files + maxShow := 5 + if len(cp.ModifiedFiles) < maxShow { + maxShow = len(cp.ModifiedFiles) + } + for i := 0; i < maxShow; i++ { + fmt.Printf(" - %s\n", cp.ModifiedFiles[i]) + } + if len(cp.ModifiedFiles) > maxShow { + fmt.Printf(" ... and %d more\n", len(cp.ModifiedFiles)-maxShow) + } + } + if cp.Notes != "" { + fmt.Printf(" **Notes:** %s\n", cp.Notes) + } + fmt.Println() + + fmt.Println("Use this context to resume work. The checkpoint will be updated as you progress.") + fmt.Println() +} + // emitSessionEvent emits a session_start event for seance discovery. // The event is written to ~/gt/.events.jsonl and can be queried via gt seance. // Session ID comes from CLAUDE_SESSION_ID env var if available.