feat(checkpoint): Add polecat session checkpoint for crash recovery (gt-441j6)

Add checkpoint system for polecats and crew workers to recover state
after session crash or context limit.

Features:
- internal/checkpoint package with Checkpoint type
- gt checkpoint write/read/clear commands
- Checkpoint display in gt prime startup
- Auto-detection of molecule, step, hooked bead
- Git state capture (modified files, branch, commit)

The checkpoint captures:
- Current molecule and step being worked
- Hooked bead
- Modified files list
- Git branch and last commit
- Session notes
- Timestamp

Checkpoints are stored in .polecat-checkpoint.json and displayed
during session startup via gt prime.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
rictus
2026-01-01 18:45:29 -08:00
committed by Steve Yegge
parent 65c34efd4e
commit f883a09317
3 changed files with 587 additions and 0 deletions

View File

@@ -0,0 +1,216 @@
// Package checkpoint provides session checkpointing for crash recovery.
// When a polecat session dies (context limit, crash, timeout), checkpoints
// allow the next session to recover state and resume work.
package checkpoint
import (
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
)
// Filename is the checkpoint file name within the polecat directory.
const Filename = ".polecat-checkpoint.json"
// Checkpoint represents a session recovery checkpoint.
type Checkpoint struct {
// MoleculeID is the current molecule being worked.
MoleculeID string `json:"molecule_id,omitempty"`
// CurrentStep is the step ID currently in progress.
CurrentStep string `json:"current_step,omitempty"`
// StepTitle is the human-readable title of the current step.
StepTitle string `json:"step_title,omitempty"`
// ModifiedFiles lists files modified since the last commit.
ModifiedFiles []string `json:"modified_files,omitempty"`
// LastCommit is the SHA of the last commit.
LastCommit string `json:"last_commit,omitempty"`
// Branch is the current git branch.
Branch string `json:"branch,omitempty"`
// HookedBead is the bead ID on the agent's hook.
HookedBead string `json:"hooked_bead,omitempty"`
// Timestamp is when the checkpoint was written.
Timestamp time.Time `json:"timestamp"`
// SessionID identifies the session that wrote the checkpoint.
SessionID string `json:"session_id,omitempty"`
// Notes contains optional context from the session.
Notes string `json:"notes,omitempty"`
}
// Path returns the checkpoint file path for a given polecat directory.
func Path(polecatDir string) string {
return filepath.Join(polecatDir, Filename)
}
// Read loads a checkpoint from the polecat directory.
// Returns nil, nil if no checkpoint exists.
func Read(polecatDir string) (*Checkpoint, error) {
path := Path(polecatDir)
data, err := os.ReadFile(path)
if err != nil {
if os.IsNotExist(err) {
return nil, nil
}
return nil, fmt.Errorf("reading checkpoint: %w", err)
}
var cp Checkpoint
if err := json.Unmarshal(data, &cp); err != nil {
return nil, fmt.Errorf("parsing checkpoint: %w", err)
}
return &cp, nil
}
// Write saves a checkpoint to the polecat directory.
func Write(polecatDir string, cp *Checkpoint) error {
// Set timestamp if not already set
if cp.Timestamp.IsZero() {
cp.Timestamp = time.Now()
}
// Set session ID from environment if available
if cp.SessionID == "" {
cp.SessionID = os.Getenv("CLAUDE_SESSION_ID")
if cp.SessionID == "" {
cp.SessionID = fmt.Sprintf("pid-%d", os.Getpid())
}
}
data, err := json.MarshalIndent(cp, "", " ")
if err != nil {
return fmt.Errorf("marshaling checkpoint: %w", err)
}
path := Path(polecatDir)
if err := os.WriteFile(path, data, 0644); err != nil {
return fmt.Errorf("writing checkpoint: %w", err)
}
return nil
}
// Remove deletes the checkpoint file.
func Remove(polecatDir string) error {
path := Path(polecatDir)
if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("removing checkpoint: %w", err)
}
return nil
}
// Capture creates a checkpoint by capturing current git and work state.
func Capture(polecatDir string) (*Checkpoint, error) {
cp := &Checkpoint{
Timestamp: time.Now(),
}
// Get modified files from git status
cmd := exec.Command("git", "status", "--porcelain")
cmd.Dir = polecatDir
output, err := cmd.Output()
if err == nil {
lines := strings.Split(strings.TrimSpace(string(output)), "\n")
for _, line := range lines {
if len(line) > 3 {
// Format: XY filename
file := strings.TrimSpace(line[3:])
if file != "" {
cp.ModifiedFiles = append(cp.ModifiedFiles, file)
}
}
}
}
// Get last commit SHA
cmd = exec.Command("git", "rev-parse", "HEAD")
cmd.Dir = polecatDir
output, err = cmd.Output()
if err == nil {
cp.LastCommit = strings.TrimSpace(string(output))
}
// Get current branch
cmd = exec.Command("git", "rev-parse", "--abbrev-ref", "HEAD")
cmd.Dir = polecatDir
output, err = cmd.Output()
if err == nil {
cp.Branch = strings.TrimSpace(string(output))
}
return cp, nil
}
// WithMolecule adds molecule context to a checkpoint.
func (cp *Checkpoint) WithMolecule(moleculeID, stepID, stepTitle string) *Checkpoint {
cp.MoleculeID = moleculeID
cp.CurrentStep = stepID
cp.StepTitle = stepTitle
return cp
}
// WithHookedBead adds hooked bead context to a checkpoint.
func (cp *Checkpoint) WithHookedBead(beadID string) *Checkpoint {
cp.HookedBead = beadID
return cp
}
// WithNotes adds context notes to a checkpoint.
func (cp *Checkpoint) WithNotes(notes string) *Checkpoint {
cp.Notes = notes
return cp
}
// Age returns how long ago the checkpoint was written.
func (cp *Checkpoint) Age() time.Duration {
return time.Since(cp.Timestamp)
}
// IsStale returns true if the checkpoint is older than the threshold.
func (cp *Checkpoint) IsStale(threshold time.Duration) bool {
return cp.Age() > threshold
}
// Summary returns a concise summary of the checkpoint.
func (cp *Checkpoint) Summary() string {
var parts []string
if cp.MoleculeID != "" {
if cp.CurrentStep != "" {
parts = append(parts, fmt.Sprintf("molecule %s, step %s", cp.MoleculeID, cp.CurrentStep))
} else {
parts = append(parts, fmt.Sprintf("molecule %s", cp.MoleculeID))
}
}
if cp.HookedBead != "" {
parts = append(parts, fmt.Sprintf("hooked: %s", cp.HookedBead))
}
if len(cp.ModifiedFiles) > 0 {
parts = append(parts, fmt.Sprintf("%d modified files", len(cp.ModifiedFiles)))
}
if cp.Branch != "" {
parts = append(parts, fmt.Sprintf("branch: %s", cp.Branch))
}
if len(parts) == 0 {
return "no significant state"
}
return strings.Join(parts, ", ")
}

View File

@@ -0,0 +1,297 @@
package cmd
import (
"fmt"
"os"
"strings"
"github.com/spf13/cobra"
"github.com/steveyegge/gastown/internal/beads"
"github.com/steveyegge/gastown/internal/checkpoint"
"github.com/steveyegge/gastown/internal/style"
"github.com/steveyegge/gastown/internal/workspace"
)
var checkpointCmd = &cobra.Command{
Use: "checkpoint",
GroupID: GroupDiag,
Short: "Manage session checkpoints for crash recovery",
Long: `Manage checkpoints for polecat session crash recovery.
Checkpoints capture the current work state so that if a session crashes,
the next session can resume from where it left off.
Checkpoint data includes:
- Current molecule and step
- Hooked bead
- Modified files list
- Git branch and last commit
- Timestamp
Checkpoints are stored in .polecat-checkpoint.json in the polecat directory.`,
}
var checkpointWriteCmd = &cobra.Command{
Use: "write",
Short: "Write a checkpoint of current session state",
Long: `Capture and write the current session state to a checkpoint file.
This is typically called:
- After closing a molecule step
- Periodically during long work sessions
- Before handoff to another session
The checkpoint captures git state, molecule progress, and hooked work.`,
RunE: runCheckpointWrite,
}
var checkpointReadCmd = &cobra.Command{
Use: "read",
Short: "Read and display the current checkpoint",
Long: `Read and display the checkpoint file if one exists.`,
RunE: runCheckpointRead,
}
var checkpointClearCmd = &cobra.Command{
Use: "clear",
Short: "Clear the checkpoint file",
Long: `Remove the checkpoint file. Use after work is complete or checkpoint is no longer needed.`,
RunE: runCheckpointClear,
}
var (
checkpointNotes string
checkpointMolecule string
checkpointStep string
)
func init() {
checkpointCmd.AddCommand(checkpointWriteCmd)
checkpointCmd.AddCommand(checkpointReadCmd)
checkpointCmd.AddCommand(checkpointClearCmd)
checkpointWriteCmd.Flags().StringVar(&checkpointNotes, "notes", "",
"Add notes to the checkpoint")
checkpointWriteCmd.Flags().StringVar(&checkpointMolecule, "molecule", "",
"Override molecule ID (auto-detected if not specified)")
checkpointWriteCmd.Flags().StringVar(&checkpointStep, "step", "",
"Override step ID (auto-detected if not specified)")
rootCmd.AddCommand(checkpointCmd)
}
func runCheckpointWrite(cmd *cobra.Command, args []string) error {
cwd, err := os.Getwd()
if err != nil {
return fmt.Errorf("getting current directory: %w", err)
}
// Detect role context
townRoot, err := workspace.FindFromCwd()
if err != nil || townRoot == "" {
return fmt.Errorf("not in a Gas Town workspace")
}
roleInfo, err := GetRoleWithContext(cwd, townRoot)
if err != nil {
return fmt.Errorf("detecting role: %w", err)
}
// Only polecats and crew workers use checkpoints
if roleInfo.Role != RolePolecat && roleInfo.Role != RoleCrew {
fmt.Printf("%s Checkpoints only apply to polecats and crew workers\n",
style.Dim.Render("○"))
return nil
}
// Capture current state
cp, err := checkpoint.Capture(cwd)
if err != nil {
return fmt.Errorf("capturing checkpoint: %w", err)
}
// Add notes if provided
if checkpointNotes != "" {
cp.WithNotes(checkpointNotes)
}
// Try to detect molecule context if not overridden
if checkpointMolecule == "" || checkpointStep == "" {
moleculeID, stepID, stepTitle := detectMoleculeContext(cwd, roleInfo)
if checkpointMolecule == "" {
checkpointMolecule = moleculeID
}
if checkpointStep == "" {
checkpointStep = stepID
}
if stepTitle != "" {
cp.WithMolecule(checkpointMolecule, checkpointStep, stepTitle)
}
}
// Add molecule context
if checkpointMolecule != "" {
cp.WithMolecule(checkpointMolecule, checkpointStep, "")
}
// Detect hooked bead
hookedBead := detectHookedBead(cwd, roleInfo)
if hookedBead != "" {
cp.WithHookedBead(hookedBead)
}
// Write checkpoint
if err := checkpoint.Write(cwd, cp); err != nil {
return fmt.Errorf("writing checkpoint: %w", err)
}
fmt.Printf("%s Checkpoint written\n", style.Bold.Render("✓"))
fmt.Printf(" %s\n", cp.Summary())
return nil
}
func runCheckpointRead(cmd *cobra.Command, args []string) error {
cwd, err := os.Getwd()
if err != nil {
return fmt.Errorf("getting current directory: %w", err)
}
cp, err := checkpoint.Read(cwd)
if err != nil {
return fmt.Errorf("reading checkpoint: %w", err)
}
if cp == nil {
fmt.Printf("%s No checkpoint exists\n", style.Dim.Render("○"))
return nil
}
fmt.Printf("%s\n\n", style.Bold.Render("Checkpoint"))
fmt.Printf("Timestamp: %s (%s ago)\n", cp.Timestamp.Format("2006-01-02 15:04:05"), cp.Age().Round(1))
if cp.MoleculeID != "" {
fmt.Printf("Molecule: %s\n", cp.MoleculeID)
}
if cp.CurrentStep != "" {
fmt.Printf("Step: %s\n", cp.CurrentStep)
}
if cp.StepTitle != "" {
fmt.Printf("Step Title: %s\n", cp.StepTitle)
}
if cp.HookedBead != "" {
fmt.Printf("Hooked Bead: %s\n", cp.HookedBead)
}
if cp.Branch != "" {
fmt.Printf("Branch: %s\n", cp.Branch)
}
if cp.LastCommit != "" {
fmt.Printf("Last Commit: %s\n", cp.LastCommit[:min(12, len(cp.LastCommit))])
}
if len(cp.ModifiedFiles) > 0 {
fmt.Printf("Modified Files: %d\n", len(cp.ModifiedFiles))
for _, f := range cp.ModifiedFiles {
fmt.Printf(" - %s\n", f)
}
}
if cp.Notes != "" {
fmt.Printf("Notes: %s\n", cp.Notes)
}
if cp.SessionID != "" {
fmt.Printf("Session ID: %s\n", cp.SessionID)
}
return nil
}
func runCheckpointClear(cmd *cobra.Command, args []string) error {
cwd, err := os.Getwd()
if err != nil {
return fmt.Errorf("getting current directory: %w", err)
}
if err := checkpoint.Remove(cwd); err != nil {
return fmt.Errorf("removing checkpoint: %w", err)
}
fmt.Printf("%s Checkpoint cleared\n", style.Bold.Render("✓"))
return nil
}
// detectMoleculeContext tries to detect the current molecule and step from beads.
func detectMoleculeContext(workDir string, ctx RoleInfo) (moleculeID, stepID, stepTitle string) {
b := beads.New(workDir)
// Get agent identity for query
roleCtx := RoleContext{
Role: ctx.Role,
Rig: ctx.Rig,
Polecat: ctx.Polecat,
}
assignee := getAgentIdentity(roleCtx)
if assignee == "" {
return "", "", ""
}
// Find in-progress issues for this agent
issues, err := b.List(beads.ListOptions{
Status: "in_progress",
Assignee: assignee,
Priority: -1,
})
if err != nil || len(issues) == 0 {
return "", "", ""
}
// Check for molecule metadata
for _, issue := range issues {
// Look for instantiated_from in description
lines := strings.Split(issue.Description, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "instantiated_from:") {
moleculeID = strings.TrimSpace(strings.TrimPrefix(line, "instantiated_from:"))
stepID = issue.ID
stepTitle = issue.Title
return moleculeID, stepID, stepTitle
}
}
}
return "", "", ""
}
// detectHookedBead finds the currently hooked bead for the agent.
func detectHookedBead(workDir string, ctx RoleInfo) string {
b := beads.New(workDir)
// Get agent identity
roleCtx := RoleContext{
Role: ctx.Role,
Rig: ctx.Rig,
Polecat: ctx.Polecat,
}
assignee := getAgentIdentity(roleCtx)
if assignee == "" {
return ""
}
// Find hooked beads for this agent
hookedBeads, err := b.List(beads.ListOptions{
Status: beads.StatusHooked,
Assignee: assignee,
Priority: -1,
})
if err != nil || len(hookedBeads) == 0 {
return ""
}
return hookedBeads[0].ID
}
func min(a, b int) int {
if a < b {
return a
}
return b
}

View File

@@ -9,9 +9,11 @@ import (
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"strings" "strings"
"time"
"github.com/spf13/cobra" "github.com/spf13/cobra"
"github.com/steveyegge/gastown/internal/beads" "github.com/steveyegge/gastown/internal/beads"
"github.com/steveyegge/gastown/internal/checkpoint"
"github.com/steveyegge/gastown/internal/constants" "github.com/steveyegge/gastown/internal/constants"
"github.com/steveyegge/gastown/internal/events" "github.com/steveyegge/gastown/internal/events"
"github.com/steveyegge/gastown/internal/lock" "github.com/steveyegge/gastown/internal/lock"
@@ -134,6 +136,9 @@ func runPrime(cmd *cobra.Command, args []string) error {
// Output molecule context if working on a molecule step // Output molecule context if working on a molecule step
outputMoleculeContext(ctx) outputMoleculeContext(ctx)
// Output previous session checkpoint for crash recovery
outputCheckpointContext(ctx)
// Run bd prime to output beads workflow context // Run bd prime to output beads workflow context
runBdPrime(cwd) runBdPrime(cwd)
@@ -1418,6 +1423,75 @@ func checkPendingEscalations(ctx RoleContext) {
fmt.Println() fmt.Println()
} }
// outputCheckpointContext reads and displays any previous session checkpoint.
// This enables crash recovery by showing what the previous session was working on.
func outputCheckpointContext(ctx RoleContext) {
// Only applies to polecats and crew workers
if ctx.Role != RolePolecat && ctx.Role != RoleCrew {
return
}
// Read checkpoint
cp, err := checkpoint.Read(ctx.WorkDir)
if err != nil {
// Silently ignore read errors
return
}
if cp == nil {
// No checkpoint exists
return
}
// Check if checkpoint is stale (older than 24 hours)
if cp.IsStale(24 * time.Hour) {
// Remove stale checkpoint
_ = checkpoint.Remove(ctx.WorkDir)
return
}
// Display checkpoint context
fmt.Println()
fmt.Printf("%s\n\n", style.Bold.Render("## 📌 Previous Session Checkpoint"))
fmt.Printf("A previous session left a checkpoint %s ago.\n\n", cp.Age().Round(time.Minute))
if cp.StepTitle != "" {
fmt.Printf(" **Working on:** %s\n", cp.StepTitle)
}
if cp.MoleculeID != "" {
fmt.Printf(" **Molecule:** %s\n", cp.MoleculeID)
}
if cp.CurrentStep != "" {
fmt.Printf(" **Step:** %s\n", cp.CurrentStep)
}
if cp.HookedBead != "" {
fmt.Printf(" **Hooked bead:** %s\n", cp.HookedBead)
}
if cp.Branch != "" {
fmt.Printf(" **Branch:** %s\n", cp.Branch)
}
if len(cp.ModifiedFiles) > 0 {
fmt.Printf(" **Modified files:** %d\n", len(cp.ModifiedFiles))
// Show first few files
maxShow := 5
if len(cp.ModifiedFiles) < maxShow {
maxShow = len(cp.ModifiedFiles)
}
for i := 0; i < maxShow; i++ {
fmt.Printf(" - %s\n", cp.ModifiedFiles[i])
}
if len(cp.ModifiedFiles) > maxShow {
fmt.Printf(" ... and %d more\n", len(cp.ModifiedFiles)-maxShow)
}
}
if cp.Notes != "" {
fmt.Printf(" **Notes:** %s\n", cp.Notes)
}
fmt.Println()
fmt.Println("Use this context to resume work. The checkpoint will be updated as you progress.")
fmt.Println()
}
// emitSessionEvent emits a session_start event for seance discovery. // emitSessionEvent emits a session_start event for seance discovery.
// The event is written to ~/gt/.events.jsonl and can be queried via gt seance. // The event is written to ~/gt/.events.jsonl and can be queried via gt seance.
// Session ID comes from CLAUDE_SESSION_ID env var if available. // Session ID comes from CLAUDE_SESSION_ID env var if available.