Merge: dag-mjxm10sv (gt-j8kxo) - Add gt rig restart for multi-rig restart support

This commit is contained in:
mayor
2026-01-02 17:29:28 -08:00
committed by Steve Yegge

View File

@@ -223,6 +223,31 @@ Examples:
RunE: runRigStop,
}
var rigRestartCmd = &cobra.Command{
Use: "restart <rig>...",
Short: "Restart one or more rigs (stop then start)",
Long: `Restart the patrol agents (witness and refinery) for one or more rigs.
This is equivalent to 'gt rig stop' followed by 'gt rig start' for each rig.
Useful after polecats complete work and land their changes.
Before shutdown, checks all polecats for uncommitted work:
- Uncommitted changes (modified/untracked files)
- Stashes
- Unpushed commits
Use --force to skip graceful shutdown and kill immediately.
Use --nuclear to bypass ALL safety checks (will lose work!).
Examples:
gt rig restart gastown
gt rig restart gastown beads
gt rig restart --force gastown beads
gt rig restart --nuclear gastown # DANGER: loses uncommitted work`,
Args: cobra.MinimumNArgs(1),
RunE: runRigRestart,
}
// Flags
var (
rigAddPrefix string
@@ -235,6 +260,8 @@ var (
rigShutdownNuclear bool
rigStopForce bool
rigStopNuclear bool
rigRestartForce bool
rigRestartNuclear bool
)
func init() {
@@ -245,6 +272,7 @@ func init() {
rigCmd.AddCommand(rigRebootCmd)
rigCmd.AddCommand(rigRemoveCmd)
rigCmd.AddCommand(rigResetCmd)
rigCmd.AddCommand(rigRestartCmd)
rigCmd.AddCommand(rigShutdownCmd)
rigCmd.AddCommand(rigStartCmd)
rigCmd.AddCommand(rigStatusCmd)
@@ -265,6 +293,9 @@ func init() {
rigStopCmd.Flags().BoolVarP(&rigStopForce, "force", "f", false, "Force immediate shutdown")
rigStopCmd.Flags().BoolVar(&rigStopNuclear, "nuclear", false, "DANGER: Bypass ALL safety checks (loses uncommitted work!)")
rigRestartCmd.Flags().BoolVarP(&rigRestartForce, "force", "f", false, "Force immediate shutdown during restart")
rigRestartCmd.Flags().BoolVar(&rigRestartNuclear, "nuclear", false, "DANGER: Bypass ALL safety checks (loses uncommitted work!)")
}
func runRigAdd(cmd *cobra.Command, args []string) error {
@@ -1203,3 +1234,191 @@ func runRigStop(cmd *cobra.Command, args []string) error {
return nil
}
func runRigRestart(cmd *cobra.Command, args []string) error {
// Find workspace
townRoot, err := workspace.FindFromCwdOrError()
if err != nil {
return fmt.Errorf("not in a Gas Town workspace: %w", err)
}
// Load rigs config
rigsPath := filepath.Join(townRoot, "mayor", "rigs.json")
rigsConfig, err := config.LoadRigsConfig(rigsPath)
if err != nil {
rigsConfig = &config.RigsConfig{Rigs: make(map[string]config.RigEntry)}
}
g := git.NewGit(townRoot)
rigMgr := rig.NewManager(townRoot, rigsConfig, g)
t := tmux.NewTmux()
// Track results
var succeeded []string
var failed []string
// Process each rig
for _, rigName := range args {
r, err := rigMgr.GetRig(rigName)
if err != nil {
fmt.Printf("%s Rig '%s' not found\n", style.Warning.Render("⚠"), rigName)
failed = append(failed, rigName)
continue
}
fmt.Printf("Restarting rig %s...\n", style.Bold.Render(rigName))
// Check all polecats for uncommitted work (unless nuclear)
if !rigRestartNuclear {
polecatGit := git.NewGit(r.Path)
polecatMgr := polecat.NewManager(r, polecatGit)
polecats, err := polecatMgr.List()
if err == nil && len(polecats) > 0 {
var problemPolecats []struct {
name string
status *git.UncommittedWorkStatus
}
for _, p := range polecats {
pGit := git.NewGit(p.ClonePath)
status, err := pGit.CheckUncommittedWork()
if err == nil && !status.Clean() {
problemPolecats = append(problemPolecats, struct {
name string
status *git.UncommittedWorkStatus
}{p.Name, status})
}
}
if len(problemPolecats) > 0 {
fmt.Printf("\n%s Cannot restart %s - polecats have uncommitted work:\n", style.Warning.Render("⚠"), rigName)
for _, pp := range problemPolecats {
fmt.Printf(" %s: %s\n", style.Bold.Render(pp.name), pp.status.String())
}
failed = append(failed, rigName)
continue
}
}
}
var stopErrors []string
var startErrors []string
// === STOP PHASE ===
fmt.Printf(" Stopping...\n")
// 1. Stop all polecat sessions
sessMgr := session.NewManager(t, r)
infos, err := sessMgr.List()
if err == nil && len(infos) > 0 {
fmt.Printf(" Stopping %d polecat session(s)...\n", len(infos))
if err := sessMgr.StopAll(rigRestartForce); err != nil {
stopErrors = append(stopErrors, fmt.Sprintf("polecat sessions: %v", err))
}
}
// 2. Stop the refinery
refMgr := refinery.NewManager(r)
refStatus, err := refMgr.Status()
if err == nil && refStatus.State == refinery.StateRunning {
fmt.Printf(" Stopping refinery...\n")
if err := refMgr.Stop(); err != nil {
stopErrors = append(stopErrors, fmt.Sprintf("refinery: %v", err))
}
}
// 3. Stop the witness
witMgr := witness.NewManager(r)
witStatus, err := witMgr.Status()
if err == nil && witStatus.State == witness.StateRunning {
fmt.Printf(" Stopping witness...\n")
if err := witMgr.Stop(); err != nil {
stopErrors = append(stopErrors, fmt.Sprintf("witness: %v", err))
}
}
if len(stopErrors) > 0 {
fmt.Printf(" %s Stop errors:\n", style.Warning.Render("⚠"))
for _, e := range stopErrors {
fmt.Printf(" - %s\n", e)
}
failed = append(failed, rigName)
continue
}
// === START PHASE ===
fmt.Printf(" Starting...\n")
var started []string
var skipped []string
// 1. Start the witness
witnessSession := fmt.Sprintf("gt-%s-witness", rigName)
witnessRunning, _ := t.HasSession(witnessSession)
if witnessRunning {
skipped = append(skipped, "witness")
} else {
fmt.Printf(" Starting witness...\n")
created, err := ensureWitnessSession(rigName, r)
if err != nil {
fmt.Printf(" %s Failed to start witness: %v\n", style.Warning.Render("⚠"), err)
startErrors = append(startErrors, fmt.Sprintf("witness: %v", err))
} else if created {
_ = witMgr.Start()
started = append(started, "witness")
}
}
// 2. Start the refinery
refinerySession := fmt.Sprintf("gt-%s-refinery", rigName)
refineryRunning, _ := t.HasSession(refinerySession)
if refineryRunning {
skipped = append(skipped, "refinery")
} else {
fmt.Printf(" Starting refinery...\n")
if err := refMgr.Start(false); err != nil {
fmt.Printf(" %s Failed to start refinery: %v\n", style.Warning.Render("⚠"), err)
startErrors = append(startErrors, fmt.Sprintf("refinery: %v", err))
} else {
started = append(started, "refinery")
}
}
// Report results for this rig
if len(started) > 0 {
fmt.Printf(" %s Started: %s\n", style.Success.Render("✓"), strings.Join(started, ", "))
}
if len(skipped) > 0 {
fmt.Printf(" %s Skipped: %s (already running)\n", style.Dim.Render("•"), strings.Join(skipped, ", "))
}
if len(startErrors) > 0 {
fmt.Printf(" %s Start errors:\n", style.Warning.Render("⚠"))
for _, e := range startErrors {
fmt.Printf(" - %s\n", e)
}
failed = append(failed, rigName)
} else {
fmt.Printf("%s Rig %s restarted\n", style.Success.Render("✓"), rigName)
succeeded = append(succeeded, rigName)
}
fmt.Println()
}
// Summary
if len(args) > 1 {
if len(succeeded) > 0 {
fmt.Printf("%s Restarted: %s\n", style.Success.Render("✓"), strings.Join(succeeded, ", "))
}
if len(failed) > 0 {
fmt.Printf("%s Failed: %s\n", style.Warning.Render("⚠"), strings.Join(failed, ", "))
fmt.Printf("\nUse %s to force shutdown (DANGER: will lose work!)\n", style.Bold.Render("--nuclear"))
return fmt.Errorf("some rigs failed to restart")
}
} else if len(failed) > 0 {
fmt.Printf("\nUse %s to force shutdown (DANGER: will lose work!)\n", style.Bold.Render("--nuclear"))
return fmt.Errorf("rig failed to restart")
}
return nil
}