Implements the town daemon (gt-99m) that handles: - Periodic heartbeat to poke Mayor and Witnesses - Lifecycle request processing (cycle, restart, shutdown) - Session management for agent restarts Commands: - gt daemon start: Start daemon in background - gt daemon stop: Stop running daemon - gt daemon status: Show daemon status and stats - gt daemon logs: View daemon log file The daemon is a "dumb scheduler" - all intelligence remains in agents. It simply pokes them on schedule and executes lifecycle requests. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
275 lines
6.3 KiB
Go
275 lines
6.3 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"os/signal"
|
|
"path/filepath"
|
|
"strconv"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/steveyegge/gastown/internal/tmux"
|
|
)
|
|
|
|
// Daemon is the town-level background service.
|
|
type Daemon struct {
|
|
config *Config
|
|
tmux *tmux.Tmux
|
|
logger *log.Logger
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
}
|
|
|
|
// New creates a new daemon instance.
|
|
func New(config *Config) (*Daemon, error) {
|
|
// Ensure daemon directory exists
|
|
daemonDir := filepath.Dir(config.LogFile)
|
|
if err := os.MkdirAll(daemonDir, 0755); err != nil {
|
|
return nil, fmt.Errorf("creating daemon directory: %w", err)
|
|
}
|
|
|
|
// Open log file
|
|
logFile, err := os.OpenFile(config.LogFile, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("opening log file: %w", err)
|
|
}
|
|
|
|
logger := log.New(logFile, "", log.LstdFlags)
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
return &Daemon{
|
|
config: config,
|
|
tmux: tmux.NewTmux(),
|
|
logger: logger,
|
|
ctx: ctx,
|
|
cancel: cancel,
|
|
}, nil
|
|
}
|
|
|
|
// Run starts the daemon main loop.
|
|
func (d *Daemon) Run() error {
|
|
d.logger.Printf("Daemon starting (PID %d)", os.Getpid())
|
|
|
|
// Write PID file
|
|
if err := os.WriteFile(d.config.PidFile, []byte(strconv.Itoa(os.Getpid())), 0644); err != nil {
|
|
return fmt.Errorf("writing PID file: %w", err)
|
|
}
|
|
defer os.Remove(d.config.PidFile)
|
|
|
|
// Update state
|
|
state := &State{
|
|
Running: true,
|
|
PID: os.Getpid(),
|
|
StartedAt: time.Now(),
|
|
}
|
|
if err := SaveState(d.config.TownRoot, state); err != nil {
|
|
d.logger.Printf("Warning: failed to save state: %v", err)
|
|
}
|
|
|
|
// Handle signals
|
|
sigChan := make(chan os.Signal, 1)
|
|
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
|
|
|
// Heartbeat ticker
|
|
ticker := time.NewTicker(d.config.HeartbeatInterval)
|
|
defer ticker.Stop()
|
|
|
|
d.logger.Printf("Daemon running, heartbeat every %v", d.config.HeartbeatInterval)
|
|
|
|
// Initial heartbeat
|
|
d.heartbeat(state)
|
|
|
|
for {
|
|
select {
|
|
case <-d.ctx.Done():
|
|
d.logger.Println("Daemon context cancelled, shutting down")
|
|
return d.shutdown(state)
|
|
|
|
case sig := <-sigChan:
|
|
d.logger.Printf("Received signal %v, shutting down", sig)
|
|
return d.shutdown(state)
|
|
|
|
case <-ticker.C:
|
|
d.heartbeat(state)
|
|
}
|
|
}
|
|
}
|
|
|
|
// heartbeat performs one heartbeat cycle.
|
|
func (d *Daemon) heartbeat(state *State) {
|
|
d.logger.Println("Heartbeat starting")
|
|
|
|
// 1. Poke Mayor
|
|
d.pokeMayor()
|
|
|
|
// 2. Poke Witnesses (for each rig)
|
|
d.pokeWitnesses()
|
|
|
|
// 3. Process lifecycle requests
|
|
d.processLifecycleRequests()
|
|
|
|
// Update state
|
|
state.LastHeartbeat = time.Now()
|
|
state.HeartbeatCount++
|
|
if err := SaveState(d.config.TownRoot, state); err != nil {
|
|
d.logger.Printf("Warning: failed to save state: %v", err)
|
|
}
|
|
|
|
d.logger.Printf("Heartbeat complete (#%d)", state.HeartbeatCount)
|
|
}
|
|
|
|
// pokeMayor sends a heartbeat to the Mayor session.
|
|
func (d *Daemon) pokeMayor() {
|
|
const mayorSession = "gt-mayor"
|
|
|
|
running, err := d.tmux.HasSession(mayorSession)
|
|
if err != nil {
|
|
d.logger.Printf("Error checking Mayor session: %v", err)
|
|
return
|
|
}
|
|
|
|
if !running {
|
|
d.logger.Println("Mayor session not running, skipping poke")
|
|
return
|
|
}
|
|
|
|
// Send heartbeat message via tmux
|
|
msg := "HEARTBEAT: check your rigs"
|
|
if err := d.tmux.SendKeys(mayorSession, msg); err != nil {
|
|
d.logger.Printf("Error poking Mayor: %v", err)
|
|
return
|
|
}
|
|
|
|
d.logger.Println("Poked Mayor")
|
|
}
|
|
|
|
// pokeWitnesses sends heartbeats to all Witness sessions.
|
|
func (d *Daemon) pokeWitnesses() {
|
|
// Find all rigs by looking for witness sessions
|
|
// Session naming: gt-<rig>-witness
|
|
sessions, err := d.tmux.ListSessions()
|
|
if err != nil {
|
|
d.logger.Printf("Error listing sessions: %v", err)
|
|
return
|
|
}
|
|
|
|
for _, session := range sessions {
|
|
// Check if it's a witness session
|
|
if !isWitnessSession(session) {
|
|
continue
|
|
}
|
|
|
|
msg := "HEARTBEAT: check your workers"
|
|
if err := d.tmux.SendKeys(session, msg); err != nil {
|
|
d.logger.Printf("Error poking Witness %s: %v", session, err)
|
|
continue
|
|
}
|
|
|
|
d.logger.Printf("Poked Witness: %s", session)
|
|
}
|
|
}
|
|
|
|
// isWitnessSession checks if a session name is a witness session.
|
|
func isWitnessSession(name string) bool {
|
|
// Pattern: gt-<rig>-witness
|
|
if len(name) < 12 { // "gt-x-witness" minimum
|
|
return false
|
|
}
|
|
return name[:3] == "gt-" && name[len(name)-8:] == "-witness"
|
|
}
|
|
|
|
// processLifecycleRequests checks for and processes lifecycle requests.
|
|
func (d *Daemon) processLifecycleRequests() {
|
|
d.ProcessLifecycleRequests()
|
|
}
|
|
|
|
// shutdown performs graceful shutdown.
|
|
func (d *Daemon) shutdown(state *State) error {
|
|
d.logger.Println("Daemon shutting down")
|
|
|
|
state.Running = false
|
|
if err := SaveState(d.config.TownRoot, state); err != nil {
|
|
d.logger.Printf("Warning: failed to save final state: %v", err)
|
|
}
|
|
|
|
d.logger.Println("Daemon stopped")
|
|
return nil
|
|
}
|
|
|
|
// Stop signals the daemon to stop.
|
|
func (d *Daemon) Stop() {
|
|
d.cancel()
|
|
}
|
|
|
|
// IsRunning checks if a daemon is running for the given town.
|
|
func IsRunning(townRoot string) (bool, int, error) {
|
|
pidFile := filepath.Join(townRoot, "daemon", "daemon.pid")
|
|
data, err := os.ReadFile(pidFile)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return false, 0, nil
|
|
}
|
|
return false, 0, err
|
|
}
|
|
|
|
pid, err := strconv.Atoi(string(data))
|
|
if err != nil {
|
|
return false, 0, nil
|
|
}
|
|
|
|
// Check if process is running
|
|
process, err := os.FindProcess(pid)
|
|
if err != nil {
|
|
return false, 0, nil
|
|
}
|
|
|
|
// On Unix, FindProcess always succeeds. Send signal 0 to check if alive.
|
|
err = process.Signal(syscall.Signal(0))
|
|
if err != nil {
|
|
// Process not running, clean up stale PID file
|
|
os.Remove(pidFile)
|
|
return false, 0, nil
|
|
}
|
|
|
|
return true, pid, nil
|
|
}
|
|
|
|
// StopDaemon stops the running daemon for the given town.
|
|
func StopDaemon(townRoot string) error {
|
|
running, pid, err := IsRunning(townRoot)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if !running {
|
|
return fmt.Errorf("daemon is not running")
|
|
}
|
|
|
|
process, err := os.FindProcess(pid)
|
|
if err != nil {
|
|
return fmt.Errorf("finding process: %w", err)
|
|
}
|
|
|
|
// Send SIGTERM for graceful shutdown
|
|
if err := process.Signal(syscall.SIGTERM); err != nil {
|
|
return fmt.Errorf("sending SIGTERM: %w", err)
|
|
}
|
|
|
|
// Wait a bit for graceful shutdown
|
|
time.Sleep(500 * time.Millisecond)
|
|
|
|
// Check if still running
|
|
if err := process.Signal(syscall.Signal(0)); err == nil {
|
|
// Still running, force kill
|
|
process.Signal(syscall.SIGKILL)
|
|
}
|
|
|
|
// Clean up PID file
|
|
pidFile := filepath.Join(townRoot, "daemon", "daemon.pid")
|
|
os.Remove(pidFile)
|
|
|
|
return nil
|
|
}
|