fix(shutdown): Improve gastown shutdown reliability
Fixes #291 - gastown is very hard to kill/shutdown/stop Changes: - Add shutdown coordination: daemon checks shutdown.lock and skips heartbeat auto-restarts during shutdown to prevent fighting shutdown - Add orphaned Claude/node process detection in shutdown verification The daemon's heartbeat now checks for shutdown.lock (created by gt down) and skips auto-restart logic when shutdown is in progress. This prevents the daemon from restarting agents that were intentionally killed during shutdown. Shutdown verification now includes detection of orphaned Claude/node processes that may be left behind when tmux sessions are killed but child processes don't terminate.
This commit is contained in:
@@ -209,6 +209,14 @@ const recoveryHeartbeatInterval = 3 * time.Minute
|
||||
// - Agents with work-on-hook not progressing (GUPP violation)
|
||||
// - Orphaned work (assigned to dead agents)
|
||||
func (d *Daemon) heartbeat(state *State) {
|
||||
// Skip heartbeat if shutdown is in progress.
|
||||
// This prevents the daemon from fighting shutdown by auto-restarting killed agents.
|
||||
// The shutdown.lock file is created by gt down before terminating sessions.
|
||||
if d.isShutdownInProgress() {
|
||||
d.logger.Println("Shutdown in progress, skipping heartbeat")
|
||||
return
|
||||
}
|
||||
|
||||
d.logger.Println("Heartbeat starting (recovery-focused)")
|
||||
|
||||
// 1. Ensure Deacon is running (restart if dead)
|
||||
@@ -669,6 +677,15 @@ func (d *Daemon) Stop() {
|
||||
d.cancel()
|
||||
}
|
||||
|
||||
// isShutdownInProgress checks if a shutdown is currently in progress.
|
||||
// The shutdown.lock file is created by gt down before terminating sessions.
|
||||
// This prevents the daemon from fighting shutdown by auto-restarting killed agents.
|
||||
func (d *Daemon) isShutdownInProgress() bool {
|
||||
lockPath := filepath.Join(d.config.TownRoot, "daemon", "shutdown.lock")
|
||||
_, err := os.Stat(lockPath)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// IsRunning checks if a daemon is running for the given town.
|
||||
// It checks the PID file and verifies the process is alive.
|
||||
// Note: The file lock in Run() is the authoritative mechanism for preventing
|
||||
|
||||
Reference in New Issue
Block a user