package daemon import ( "context" "fmt" "log" "os" "os/signal" "path/filepath" "strconv" "syscall" "time" "github.com/steveyegge/gastown/internal/tmux" ) // Daemon is the town-level background service. type Daemon struct { config *Config tmux *tmux.Tmux logger *log.Logger ctx context.Context cancel context.CancelFunc } // New creates a new daemon instance. func New(config *Config) (*Daemon, error) { // Ensure daemon directory exists daemonDir := filepath.Dir(config.LogFile) if err := os.MkdirAll(daemonDir, 0755); err != nil { return nil, fmt.Errorf("creating daemon directory: %w", err) } // Open log file logFile, err := os.OpenFile(config.LogFile, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644) if err != nil { return nil, fmt.Errorf("opening log file: %w", err) } logger := log.New(logFile, "", log.LstdFlags) ctx, cancel := context.WithCancel(context.Background()) return &Daemon{ config: config, tmux: tmux.NewTmux(), logger: logger, ctx: ctx, cancel: cancel, }, nil } // Run starts the daemon main loop. func (d *Daemon) Run() error { d.logger.Printf("Daemon starting (PID %d)", os.Getpid()) // Write PID file if err := os.WriteFile(d.config.PidFile, []byte(strconv.Itoa(os.Getpid())), 0644); err != nil { return fmt.Errorf("writing PID file: %w", err) } defer os.Remove(d.config.PidFile) // Update state state := &State{ Running: true, PID: os.Getpid(), StartedAt: time.Now(), } if err := SaveState(d.config.TownRoot, state); err != nil { d.logger.Printf("Warning: failed to save state: %v", err) } // Handle signals sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) // Heartbeat ticker ticker := time.NewTicker(d.config.HeartbeatInterval) defer ticker.Stop() d.logger.Printf("Daemon running, heartbeat every %v", d.config.HeartbeatInterval) // Initial heartbeat d.heartbeat(state) for { select { case <-d.ctx.Done(): d.logger.Println("Daemon context cancelled, shutting down") return d.shutdown(state) case sig := <-sigChan: d.logger.Printf("Received signal %v, shutting down", sig) return d.shutdown(state) case <-ticker.C: d.heartbeat(state) } } } // heartbeat performs one heartbeat cycle. func (d *Daemon) heartbeat(state *State) { d.logger.Println("Heartbeat starting") // 1. Poke Mayor d.pokeMayor() // 2. Poke Witnesses (for each rig) d.pokeWitnesses() // 3. Process lifecycle requests d.processLifecycleRequests() // Update state state.LastHeartbeat = time.Now() state.HeartbeatCount++ if err := SaveState(d.config.TownRoot, state); err != nil { d.logger.Printf("Warning: failed to save state: %v", err) } d.logger.Printf("Heartbeat complete (#%d)", state.HeartbeatCount) } // pokeMayor sends a heartbeat to the Mayor session. func (d *Daemon) pokeMayor() { const mayorSession = "gt-mayor" running, err := d.tmux.HasSession(mayorSession) if err != nil { d.logger.Printf("Error checking Mayor session: %v", err) return } if !running { d.logger.Println("Mayor session not running, skipping poke") return } // Send heartbeat message via tmux msg := "HEARTBEAT: check your rigs" if err := d.tmux.SendKeys(mayorSession, msg); err != nil { d.logger.Printf("Error poking Mayor: %v", err) return } d.logger.Println("Poked Mayor") } // pokeWitnesses sends heartbeats to all Witness sessions. func (d *Daemon) pokeWitnesses() { // Find all rigs by looking for witness sessions // Session naming: gt--witness sessions, err := d.tmux.ListSessions() if err != nil { d.logger.Printf("Error listing sessions: %v", err) return } for _, session := range sessions { // Check if it's a witness session if !isWitnessSession(session) { continue } msg := "HEARTBEAT: check your workers" if err := d.tmux.SendKeys(session, msg); err != nil { d.logger.Printf("Error poking Witness %s: %v", session, err) continue } d.logger.Printf("Poked Witness: %s", session) } } // isWitnessSession checks if a session name is a witness session. func isWitnessSession(name string) bool { // Pattern: gt--witness if len(name) < 12 { // "gt-x-witness" minimum return false } return name[:3] == "gt-" && name[len(name)-8:] == "-witness" } // processLifecycleRequests checks for and processes lifecycle requests. func (d *Daemon) processLifecycleRequests() { d.ProcessLifecycleRequests() } // shutdown performs graceful shutdown. func (d *Daemon) shutdown(state *State) error { d.logger.Println("Daemon shutting down") state.Running = false if err := SaveState(d.config.TownRoot, state); err != nil { d.logger.Printf("Warning: failed to save final state: %v", err) } d.logger.Println("Daemon stopped") return nil } // Stop signals the daemon to stop. func (d *Daemon) Stop() { d.cancel() } // IsRunning checks if a daemon is running for the given town. func IsRunning(townRoot string) (bool, int, error) { pidFile := filepath.Join(townRoot, "daemon", "daemon.pid") data, err := os.ReadFile(pidFile) if err != nil { if os.IsNotExist(err) { return false, 0, nil } return false, 0, err } pid, err := strconv.Atoi(string(data)) if err != nil { return false, 0, nil } // Check if process is running process, err := os.FindProcess(pid) if err != nil { return false, 0, nil } // On Unix, FindProcess always succeeds. Send signal 0 to check if alive. err = process.Signal(syscall.Signal(0)) if err != nil { // Process not running, clean up stale PID file os.Remove(pidFile) return false, 0, nil } return true, pid, nil } // StopDaemon stops the running daemon for the given town. func StopDaemon(townRoot string) error { running, pid, err := IsRunning(townRoot) if err != nil { return err } if !running { return fmt.Errorf("daemon is not running") } process, err := os.FindProcess(pid) if err != nil { return fmt.Errorf("finding process: %w", err) } // Send SIGTERM for graceful shutdown if err := process.Signal(syscall.SIGTERM); err != nil { return fmt.Errorf("sending SIGTERM: %w", err) } // Wait a bit for graceful shutdown time.Sleep(500 * time.Millisecond) // Check if still running if err := process.Signal(syscall.Signal(0)); err == nil { // Still running, force kill process.Signal(syscall.SIGKILL) } // Clean up PID file pidFile := filepath.Join(townRoot, "daemon", "daemon.pid") os.Remove(pidFile) return nil }