Merge pull request #460 from sauerdaniel/pr/shutdown-reliability

fix(shutdown): Improve gastown shutdown reliability
This commit is contained in:
Steve Yegge
2026-01-21 20:33:54 -08:00
committed by GitHub
2 changed files with 84 additions and 26 deletions

View File

@@ -209,6 +209,14 @@ const recoveryHeartbeatInterval = 3 * time.Minute
// - Agents with work-on-hook not progressing (GUPP violation)
// - Orphaned work (assigned to dead agents)
func (d *Daemon) heartbeat(state *State) {
// Skip heartbeat if shutdown is in progress.
// This prevents the daemon from fighting shutdown by auto-restarting killed agents.
// The shutdown.lock file is created by gt down before terminating sessions.
if d.isShutdownInProgress() {
d.logger.Println("Shutdown in progress, skipping heartbeat")
return
}
d.logger.Println("Heartbeat starting (recovery-focused)")
// 1. Ensure Deacon is running (restart if dead)
@@ -672,6 +680,15 @@ func (d *Daemon) Stop() {
d.cancel()
}
// isShutdownInProgress checks if a shutdown is currently in progress.
// The shutdown.lock file is created by gt down before terminating sessions.
// This prevents the daemon from fighting shutdown by auto-restarting killed agents.
func (d *Daemon) isShutdownInProgress() bool {
lockPath := filepath.Join(d.config.TownRoot, "daemon", "shutdown.lock")
_, err := os.Stat(lockPath)
return err == nil
}
// IsRunning checks if a daemon is running for the given town.
// It checks the PID file and verifies the process is alive.
// Note: The file lock in Run() is the authoritative mechanism for preventing