Files
gastown/internal/daemon/lifecycle.go
gastown/crew/jack e8d27e7212 fix: Create and lookup rig agent beads with correct prefix
Per docs/architecture.md, Witness and Refinery are rig-level agents that
should use the rig's configured prefix (e.g., pi- for pixelforge) instead
of hardcoded "gt-".

This extends PR #183's creation fix to also fix all lookup paths:
- internal/rig/manager.go: Create agent beads in rig beads with rig prefix
- internal/daemon/daemon.go: Use rig prefix when looking up agent state
- internal/daemon/lifecycle.go: Use rig prefix for identity-to-bead mapping
- internal/cmd/sling.go: Pass townRoot for prefix lookup
- internal/cmd/unsling.go: Pass townRoot for prefix lookup
- internal/cmd/molecule_status.go: Use rig prefix for agent bead lookups
- internal/cmd/molecule_attach.go: Use rig prefix for agent bead lookups
- internal/config/loader.go: Add GetRigPrefix helper

Without this fix, the daemon would:
- Create pi-gastown-witness but look for gt-gastown-witness
- Report agents as missing/dead when they are running
- Fail to manage agent lifecycle correctly

Based on work by Johann Taberlet in PR #183.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Johann Taberlet <johann.taberlet@gmail.com>
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 19:39:57 -08:00

976 lines
32 KiB
Go

package daemon
import (
"encoding/json"
"fmt"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/steveyegge/gastown/internal/beads"
"github.com/steveyegge/gastown/internal/config"
"github.com/steveyegge/gastown/internal/constants"
"github.com/steveyegge/gastown/internal/rig"
"github.com/steveyegge/gastown/internal/session"
"github.com/steveyegge/gastown/internal/tmux"
)
// BeadsMessage represents a message from gt mail inbox --json.
type BeadsMessage struct {
ID string `json:"id"`
From string `json:"from"`
To string `json:"to"`
Subject string `json:"subject"`
Body string `json:"body"`
Timestamp string `json:"timestamp"`
Read bool `json:"read"`
Priority string `json:"priority"`
Type string `json:"type"`
}
// MaxLifecycleMessageAge is the maximum age of a lifecycle message before it's ignored.
// Messages older than this are considered stale and deleted without execution.
const MaxLifecycleMessageAge = 6 * time.Hour
// ProcessLifecycleRequests checks for and processes lifecycle requests from the deacon inbox.
func (d *Daemon) ProcessLifecycleRequests() {
// Get mail for deacon identity (using gt mail, not bd mail)
cmd := exec.Command("gt", "mail", "inbox", "--identity", "deacon/", "--json")
cmd.Dir = d.config.TownRoot
output, err := cmd.Output()
if err != nil {
// gt mail might not be available or inbox empty
return
}
if len(output) == 0 || string(output) == "[]" || string(output) == "[]\n" {
return
}
var messages []BeadsMessage
if err := json.Unmarshal(output, &messages); err != nil {
d.logger.Printf("Error parsing mail: %v", err)
return
}
for _, msg := range messages {
if msg.Read {
continue // Already processed
}
request := d.parseLifecycleRequest(&msg)
if request == nil {
continue // Not a lifecycle request
}
// Check message age - ignore stale lifecycle requests
if msgTime, err := time.Parse(time.RFC3339, msg.Timestamp); err == nil {
age := time.Since(msgTime)
if age > MaxLifecycleMessageAge {
d.logger.Printf("Ignoring stale lifecycle request from %s (age: %v, max: %v) - deleting",
request.From, age.Round(time.Minute), MaxLifecycleMessageAge)
if err := d.closeMessage(msg.ID); err != nil {
d.logger.Printf("Warning: failed to delete stale message %s: %v", msg.ID, err)
}
continue
}
}
d.logger.Printf("Processing lifecycle request from %s: %s", request.From, request.Action)
// CRITICAL: Delete message FIRST, before executing action.
// This prevents stale messages from being reprocessed on every heartbeat.
// "Claim then execute" pattern: claim by deleting, then execute.
// Even if action fails, the message is gone - sender must re-request.
if err := d.closeMessage(msg.ID); err != nil {
d.logger.Printf("Warning: failed to delete message %s before execution: %v", msg.ID, err)
// Continue anyway - better to attempt action than leave stale message
}
if err := d.executeLifecycleAction(request); err != nil {
d.logger.Printf("Error executing lifecycle action: %v", err)
continue
}
}
}
// LifecycleBody is the structured body format for lifecycle requests.
// Claude should send mail with JSON body: {"action": "cycle"} or {"action": "shutdown"}
type LifecycleBody struct {
Action string `json:"action"`
}
// parseLifecycleRequest extracts a lifecycle request from a message.
// Uses structured body parsing instead of keyword matching on subject.
func (d *Daemon) parseLifecycleRequest(msg *BeadsMessage) *LifecycleRequest {
// Gate: subject must start with "LIFECYCLE:"
subject := strings.ToLower(msg.Subject)
if !strings.HasPrefix(subject, "lifecycle:") {
return nil
}
// Parse structured body for action
var body LifecycleBody
if err := json.Unmarshal([]byte(msg.Body), &body); err != nil {
// Fallback: check for simple action strings in body
bodyLower := strings.ToLower(strings.TrimSpace(msg.Body))
switch {
case bodyLower == "restart" || bodyLower == "action: restart":
body.Action = "restart"
case bodyLower == "shutdown" || bodyLower == "action: shutdown" || bodyLower == "stop":
body.Action = "shutdown"
case bodyLower == "cycle" || bodyLower == "action: cycle":
body.Action = "cycle"
default:
d.logger.Printf("Lifecycle request with unparseable body: %q", msg.Body)
return nil
}
}
// Map action string to enum
var action LifecycleAction
switch strings.ToLower(body.Action) {
case "restart":
action = ActionRestart
case "shutdown", "stop":
action = ActionShutdown
case "cycle":
action = ActionCycle
default:
d.logger.Printf("Unknown lifecycle action: %q", body.Action)
return nil
}
return &LifecycleRequest{
From: msg.From,
Action: action,
Timestamp: time.Now(),
}
}
// executeLifecycleAction performs the requested lifecycle action.
func (d *Daemon) executeLifecycleAction(request *LifecycleRequest) error {
// Determine session name from sender identity
sessionName := d.identityToSession(request.From)
if sessionName == "" {
return fmt.Errorf("unknown agent identity: %s", request.From)
}
d.logger.Printf("Executing %s for session %s", request.Action, sessionName)
// Check agent bead state (ZFC: trust what agent reports) - gt-39ttg
agentBeadID := d.identityToAgentBeadID(request.From)
if agentBeadID != "" {
if beadState, err := d.getAgentBeadState(agentBeadID); err == nil {
d.logger.Printf("Agent bead %s reports state: %s", agentBeadID, beadState)
}
}
// Check if session exists (tmux detection still needed for lifecycle actions)
running, err := d.tmux.HasSession(sessionName)
if err != nil {
return fmt.Errorf("checking session: %w", err)
}
switch request.Action {
case ActionShutdown:
if running {
if err := d.tmux.KillSession(sessionName); err != nil {
return fmt.Errorf("killing session: %w", err)
}
d.logger.Printf("Killed session %s", sessionName)
}
return nil
case ActionCycle, ActionRestart:
if running {
// Kill the session first
if err := d.tmux.KillSession(sessionName); err != nil {
return fmt.Errorf("killing session: %w", err)
}
d.logger.Printf("Killed session %s for restart", sessionName)
// Wait a moment
time.Sleep(constants.ShutdownNotifyDelay)
}
// Restart the session
if err := d.restartSession(sessionName, request.From); err != nil {
return fmt.Errorf("restarting session: %w", err)
}
d.logger.Printf("Restarted session %s", sessionName)
return nil
default:
return fmt.Errorf("unknown action: %s", request.Action)
}
}
// ParsedIdentity holds the components extracted from an agent identity string.
// This is used to look up the appropriate role bead for lifecycle config.
type ParsedIdentity struct {
RoleType string // mayor, deacon, witness, refinery, crew, polecat
RigName string // Empty for town-level agents (mayor, deacon)
AgentName string // Empty for singletons (mayor, deacon, witness, refinery)
}
// parseIdentity extracts role type, rig name, and agent name from an identity string.
// This is the ONLY place where identity string patterns are parsed.
// All other functions should use the extracted components to look up role beads.
func parseIdentity(identity string) (*ParsedIdentity, error) {
switch identity {
case "mayor":
return &ParsedIdentity{RoleType: "mayor"}, nil
case "deacon":
return &ParsedIdentity{RoleType: "deacon"}, nil
}
// Pattern: <rig>-witness → witness role
if strings.HasSuffix(identity, "-witness") {
rigName := strings.TrimSuffix(identity, "-witness")
return &ParsedIdentity{RoleType: "witness", RigName: rigName}, nil
}
// Pattern: <rig>-refinery → refinery role
if strings.HasSuffix(identity, "-refinery") {
rigName := strings.TrimSuffix(identity, "-refinery")
return &ParsedIdentity{RoleType: "refinery", RigName: rigName}, nil
}
// Pattern: <rig>-crew-<name> → crew role
if strings.Contains(identity, "-crew-") {
parts := strings.SplitN(identity, "-crew-", 2)
if len(parts) == 2 {
return &ParsedIdentity{RoleType: "crew", RigName: parts[0], AgentName: parts[1]}, nil
}
}
// Pattern: <rig>-polecat-<name> → polecat role
if strings.Contains(identity, "-polecat-") {
parts := strings.SplitN(identity, "-polecat-", 2)
if len(parts) == 2 {
return &ParsedIdentity{RoleType: "polecat", RigName: parts[0], AgentName: parts[1]}, nil
}
}
// Pattern: <rig>/polecats/<name> → polecat role (slash format)
if strings.Contains(identity, "/polecats/") {
parts := strings.Split(identity, "/polecats/")
if len(parts) == 2 {
return &ParsedIdentity{RoleType: "polecat", RigName: parts[0], AgentName: parts[1]}, nil
}
}
return nil, fmt.Errorf("unknown identity format: %s", identity)
}
// getRoleConfigForIdentity looks up the role bead for an identity and returns its config.
// Falls back to default config if role bead doesn't exist or has no config.
func (d *Daemon) getRoleConfigForIdentity(identity string) (*beads.RoleConfig, *ParsedIdentity, error) {
parsed, err := parseIdentity(identity)
if err != nil {
return nil, nil, err
}
// Look up role bead
roleBeadID := beads.RoleBeadID(parsed.RoleType)
b := beads.New(d.config.TownRoot)
config, err := b.GetRoleConfig(roleBeadID)
if err != nil {
d.logger.Printf("Warning: failed to get role config for %s: %v", roleBeadID, err)
}
// Return parsed identity even if config is nil (caller can use defaults)
return config, parsed, nil
}
// identityToSession converts a beads identity to a tmux session name.
// Uses role bead config if available, falls back to hardcoded patterns.
func (d *Daemon) identityToSession(identity string) string {
config, parsed, err := d.getRoleConfigForIdentity(identity)
if err != nil {
return ""
}
// If role bead has session_pattern, use it
if config != nil && config.SessionPattern != "" {
return beads.ExpandRolePattern(config.SessionPattern, d.config.TownRoot, parsed.RigName, parsed.AgentName, parsed.RoleType)
}
// Fallback: use default patterns based on role type
switch parsed.RoleType {
case "mayor":
return session.MayorSessionName()
case "deacon":
return session.DeaconSessionName()
case "witness", "refinery":
return fmt.Sprintf("gt-%s-%s", parsed.RigName, parsed.RoleType)
case "crew":
return fmt.Sprintf("gt-%s-crew-%s", parsed.RigName, parsed.AgentName)
case "polecat":
return fmt.Sprintf("gt-%s-%s", parsed.RigName, parsed.AgentName)
default:
return ""
}
}
// restartSession starts a new session for the given agent.
// Uses role bead config if available, falls back to hardcoded defaults.
func (d *Daemon) restartSession(sessionName, identity string) error {
// Get role config for this identity
config, parsed, err := d.getRoleConfigForIdentity(identity)
if err != nil {
return fmt.Errorf("parsing identity: %w", err)
}
// Determine working directory
workDir := d.getWorkDir(config, parsed)
if workDir == "" {
return fmt.Errorf("cannot determine working directory for %s", identity)
}
// Determine if pre-sync is needed
needsPreSync := d.getNeedsPreSync(config, parsed)
// Pre-sync workspace for agents with git worktrees
if needsPreSync {
d.logger.Printf("Pre-syncing workspace for %s at %s", identity, workDir)
d.syncWorkspace(workDir)
}
// Create session
// Use EnsureSessionFresh to handle zombie sessions that exist but have dead Claude
if err := d.tmux.EnsureSessionFresh(sessionName, workDir); err != nil {
return fmt.Errorf("creating session: %w", err)
}
// Set environment variables
d.setSessionEnvironment(sessionName, identity, config, parsed)
// Apply theme (non-fatal: theming failure doesn't affect operation)
d.applySessionTheme(sessionName, parsed)
// Get and send startup command
startCmd := d.getStartCommand(config, parsed)
if err := d.tmux.SendKeys(sessionName, startCmd); err != nil {
return fmt.Errorf("sending startup command: %w", err)
}
// Wait for Claude to start, then accept bypass permissions warning if it appears.
// This ensures automated role starts aren't blocked by the warning dialog.
if err := d.tmux.WaitForCommand(sessionName, constants.SupportedShells, constants.ClaudeStartTimeout); err != nil {
// Non-fatal - Claude might still start
}
_ = d.tmux.AcceptBypassPermissionsWarning(sessionName)
time.Sleep(constants.ShutdownNotifyDelay)
// GUPP: Gas Town Universal Propulsion Principle
// Send startup nudge for predecessor discovery via /resume
recipient := identityToBDActor(identity)
_ = session.StartupNudge(d.tmux, sessionName, session.StartupNudgeConfig{
Recipient: recipient,
Sender: "deacon",
Topic: "lifecycle-restart",
}) // Non-fatal
// Send propulsion nudge to trigger autonomous execution.
// Wait for beacon to be fully processed (needs to be separate prompt)
time.Sleep(2 * time.Second)
_ = d.tmux.NudgeSession(sessionName, session.PropulsionNudgeForRole(parsed.RoleType, workDir)) // Non-fatal
return nil
}
// getWorkDir determines the working directory for an agent.
// Uses role bead config if available, falls back to hardcoded defaults.
func (d *Daemon) getWorkDir(config *beads.RoleConfig, parsed *ParsedIdentity) string {
// If role bead has work_dir_pattern, use it
if config != nil && config.WorkDirPattern != "" {
return beads.ExpandRolePattern(config.WorkDirPattern, d.config.TownRoot, parsed.RigName, parsed.AgentName, parsed.RoleType)
}
// Fallback: use default patterns based on role type
switch parsed.RoleType {
case "mayor":
return d.config.TownRoot
case "deacon":
return d.config.TownRoot
case "witness":
return filepath.Join(d.config.TownRoot, parsed.RigName)
case "refinery":
return filepath.Join(d.config.TownRoot, parsed.RigName, "refinery", "rig")
case "crew":
return filepath.Join(d.config.TownRoot, parsed.RigName, "crew", parsed.AgentName)
case "polecat":
return filepath.Join(d.config.TownRoot, parsed.RigName, "polecats", parsed.AgentName)
default:
return ""
}
}
// getNeedsPreSync determines if a workspace needs git sync before starting.
// Uses role bead config if available, falls back to hardcoded defaults.
func (d *Daemon) getNeedsPreSync(config *beads.RoleConfig, parsed *ParsedIdentity) bool {
// If role bead has explicit config, use it
if config != nil {
return config.NeedsPreSync
}
// Fallback: roles with persistent git clones need pre-sync
switch parsed.RoleType {
case "refinery", "crew", "polecat":
return true
default:
return false
}
}
// getStartCommand determines the startup command for an agent.
// Uses role bead config if available, falls back to hardcoded defaults.
func (d *Daemon) getStartCommand(roleConfig *beads.RoleConfig, parsed *ParsedIdentity) string {
// If role bead has explicit config, use it
if roleConfig != nil && roleConfig.StartCommand != "" {
// Expand any patterns in the command
return beads.ExpandRolePattern(roleConfig.StartCommand, d.config.TownRoot, parsed.RigName, parsed.AgentName, parsed.RoleType)
}
// Default command for all agents - use runtime config
defaultCmd := "exec " + config.GetRuntimeCommand("")
// Polecats need environment variables set in the command
if parsed.RoleType == "polecat" {
return config.BuildPolecatStartupCommand(parsed.RigName, parsed.AgentName, "", "")
}
return defaultCmd
}
// setSessionEnvironment sets environment variables for the tmux session.
// Uses role bead config if available, falls back to hardcoded defaults.
func (d *Daemon) setSessionEnvironment(sessionName, identity string, config *beads.RoleConfig, parsed *ParsedIdentity) {
// Always set GT_ROLE
_ = d.tmux.SetEnvironment(sessionName, "GT_ROLE", identity)
// BD_ACTOR uses slashes instead of dashes for path-like identity
bdActor := identityToBDActor(identity)
_ = d.tmux.SetEnvironment(sessionName, "BD_ACTOR", bdActor)
// Set any custom env vars from role config
if config != nil {
for k, v := range config.EnvVars {
expanded := beads.ExpandRolePattern(v, d.config.TownRoot, parsed.RigName, parsed.AgentName, parsed.RoleType)
_ = d.tmux.SetEnvironment(sessionName, k, expanded)
}
}
}
// applySessionTheme applies tmux theming to the session.
func (d *Daemon) applySessionTheme(sessionName string, parsed *ParsedIdentity) {
if parsed.RoleType == "mayor" {
theme := tmux.MayorTheme()
_ = d.tmux.ConfigureGasTownSession(sessionName, theme, "", "Mayor", "coordinator")
} else if parsed.RigName != "" {
theme := tmux.AssignTheme(parsed.RigName)
_ = d.tmux.ConfigureGasTownSession(sessionName, theme, parsed.RigName, parsed.RoleType, parsed.RoleType)
}
}
// syncWorkspace syncs a git workspace before starting a new session.
// This ensures agents with persistent clones (like refinery) start with current code.
func (d *Daemon) syncWorkspace(workDir string) {
// Determine default branch from rig config
// workDir is like <townRoot>/<rigName>/<role>/rig or <townRoot>/<rigName>/crew/<name>
defaultBranch := "main" // fallback
rel, err := filepath.Rel(d.config.TownRoot, workDir)
if err == nil {
parts := strings.Split(rel, string(filepath.Separator))
if len(parts) > 0 {
rigPath := filepath.Join(d.config.TownRoot, parts[0])
if rigCfg, err := rig.LoadRigConfig(rigPath); err == nil && rigCfg.DefaultBranch != "" {
defaultBranch = rigCfg.DefaultBranch
}
}
}
// Fetch latest from origin
fetchCmd := exec.Command("git", "fetch", "origin")
fetchCmd.Dir = workDir
if err := fetchCmd.Run(); err != nil {
d.logger.Printf("Warning: git fetch failed in %s: %v", workDir, err)
}
// Pull with rebase to incorporate changes
pullCmd := exec.Command("git", "pull", "--rebase", "origin", defaultBranch)
pullCmd.Dir = workDir
if err := pullCmd.Run(); err != nil {
d.logger.Printf("Warning: git pull failed in %s: %v", workDir, err)
// Don't fail - agent can handle conflicts
}
// Sync beads
bdCmd := exec.Command("bd", "sync")
bdCmd.Dir = workDir
if err := bdCmd.Run(); err != nil {
d.logger.Printf("Warning: bd sync failed in %s: %v", workDir, err)
}
}
// closeMessage removes a lifecycle mail message after processing.
// We use delete instead of read because gt mail read intentionally
// doesn't mark messages as read (to preserve handoff messages).
func (d *Daemon) closeMessage(id string) error {
// Use gt mail delete to actually remove the message
cmd := exec.Command("gt", "mail", "delete", id)
cmd.Dir = d.config.TownRoot
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("gt mail delete %s: %v (output: %s)", id, err, string(output))
}
d.logger.Printf("Deleted lifecycle message: %s", id)
return nil
}
// AgentBeadInfo represents the parsed fields from an agent bead.
type AgentBeadInfo struct {
ID string `json:"id"`
Type string `json:"issue_type"`
State string // Parsed from description: agent_state
HookBead string // Parsed from description: hook_bead
RoleBead string // Parsed from description: role_bead
RoleType string // Parsed from description: role_type
Rig string // Parsed from description: rig
LastUpdate string `json:"updated_at"`
}
// getAgentBeadState reads agent state from an agent bead.
// This is the ZFC-compliant way to get agent state: trust what agents report.
// Returns the agent_state field value (idle|running|stuck|stopped) or empty string if not found.
func (d *Daemon) getAgentBeadState(agentBeadID string) (string, error) {
info, err := d.getAgentBeadInfo(agentBeadID)
if err != nil {
return "", err
}
return info.State, nil
}
// getAgentBeadInfo fetches and parses an agent bead by ID.
func (d *Daemon) getAgentBeadInfo(agentBeadID string) (*AgentBeadInfo, error) {
cmd := exec.Command("bd", "show", agentBeadID, "--json")
cmd.Dir = d.config.TownRoot
output, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("bd show %s: %w", agentBeadID, err)
}
// bd show --json returns an array with one element
var issues []struct {
ID string `json:"id"`
Type string `json:"issue_type"`
Description string `json:"description"`
UpdatedAt string `json:"updated_at"`
HookBead string `json:"hook_bead"` // Read from database column
AgentState string `json:"agent_state"` // Read from database column
}
if err := json.Unmarshal(output, &issues); err != nil {
return nil, fmt.Errorf("parsing bd show output: %w", err)
}
if len(issues) == 0 {
return nil, fmt.Errorf("agent bead not found: %s", agentBeadID)
}
issue := issues[0]
if issue.Type != "agent" {
return nil, fmt.Errorf("bead %s is not an agent bead (type=%s)", agentBeadID, issue.Type)
}
// Parse agent fields from description for role/state info
fields := beads.ParseAgentFieldsFromDescription(issue.Description)
info := &AgentBeadInfo{
ID: issue.ID,
Type: issue.Type,
LastUpdate: issue.UpdatedAt,
}
if fields != nil {
info.State = fields.AgentState
info.RoleBead = fields.RoleBead
info.RoleType = fields.RoleType
info.Rig = fields.Rig
}
// Use HookBead from database column directly (not from description)
// The description may contain stale data - the slot is the source of truth.
info.HookBead = issue.HookBead
return info, nil
}
// identityToAgentBeadID maps a daemon identity to an agent bead ID.
// Uses parseIdentity to extract components, then uses beads package helpers.
func (d *Daemon) identityToAgentBeadID(identity string) string {
parsed, err := parseIdentity(identity)
if err != nil {
return ""
}
switch parsed.RoleType {
case "deacon":
return beads.DeaconBeadIDTown()
case "mayor":
return beads.MayorBeadIDTown()
case "witness":
prefix := config.GetRigPrefix(d.config.TownRoot, parsed.RigName)
return beads.WitnessBeadIDWithPrefix(prefix, parsed.RigName)
case "refinery":
prefix := config.GetRigPrefix(d.config.TownRoot, parsed.RigName)
return beads.RefineryBeadIDWithPrefix(prefix, parsed.RigName)
case "crew":
prefix := config.GetRigPrefix(d.config.TownRoot, parsed.RigName)
return beads.CrewBeadIDWithPrefix(prefix, parsed.RigName, parsed.AgentName)
case "polecat":
prefix := config.GetRigPrefix(d.config.TownRoot, parsed.RigName)
return beads.PolecatBeadIDWithPrefix(prefix, parsed.RigName, parsed.AgentName)
default:
return ""
}
}
// DeadAgentTimeout is how long an agent can report "running" without updating
// before the daemon marks it as dead. This is a fallback for crashed agents.
const DeadAgentTimeout = 15 * time.Minute
// checkStaleAgents looks for agents that report state=running but haven't
// updated their bead recently. These are likely dead agents that crashed
// without updating their state. This is the timeout fallback per gt-2hzl4.
func (d *Daemon) checkStaleAgents() {
// Known agent bead IDs to check
agentBeadIDs := []string{
beads.DeaconBeadIDTown(),
beads.MayorBeadIDTown(),
}
// Dynamically discover rigs from the rigs config
rigsConfigPath := filepath.Join(d.config.TownRoot, "mayor", "rigs.json")
rigsConfig, err := config.LoadRigsConfig(rigsConfigPath)
if err != nil {
// Log warning but continue with global agents only
d.logger.Printf("Warning: could not load rigs config: %v", err)
} else {
// Add rig-specific agents (witness, refinery) for each discovered rig
for rigName, rigEntry := range rigsConfig.Rigs {
// Get rig prefix from config (defaults to "gt" if not set)
prefix := "gt"
if rigEntry.BeadsConfig != nil && rigEntry.BeadsConfig.Prefix != "" {
prefix = strings.TrimSuffix(rigEntry.BeadsConfig.Prefix, "-")
}
agentBeadIDs = append(agentBeadIDs, beads.WitnessBeadIDWithPrefix(prefix, rigName))
agentBeadIDs = append(agentBeadIDs, beads.RefineryBeadIDWithPrefix(prefix, rigName))
}
}
for _, agentBeadID := range agentBeadIDs {
info, err := d.getAgentBeadInfo(agentBeadID)
if err != nil {
// Agent bead doesn't exist or error fetching - skip
continue
}
// Only check agents reporting they're running/working
if info.State != "running" && info.State != "working" {
continue
}
// Parse the updated_at timestamp
updatedAt, err := time.Parse(time.RFC3339, info.LastUpdate)
if err != nil {
d.logger.Printf("Warning: cannot parse updated_at for %s: %v", agentBeadID, err)
continue
}
// Check if stale
age := time.Since(updatedAt)
if age > DeadAgentTimeout {
d.logger.Printf("Agent %s appears dead (state=%s, last update %v ago, timeout %v)",
agentBeadID, info.State, age.Round(time.Minute), DeadAgentTimeout)
// Mark as dead
if err := d.markAgentDead(agentBeadID); err != nil {
d.logger.Printf("Warning: failed to mark %s as dead: %v", agentBeadID, err)
} else {
d.logger.Printf("Marked agent %s as dead due to timeout", agentBeadID)
}
}
}
}
// markAgentDead updates an agent bead's state to "dead".
// Uses bd update to modify the description with the new agent_state.
func (d *Daemon) markAgentDead(agentBeadID string) error {
// Get current agent info
info, err := d.getAgentBeadInfo(agentBeadID)
if err != nil {
return fmt.Errorf("fetching agent bead: %w", err)
}
// Build new description with updated state
newDesc := fmt.Sprintf("role_type: %s\nrig: %s\nagent_state: dead\nhook_bead: %s\nrole_bead: %s\n\nMarked dead by daemon at %s (was %s, last update too old)",
info.RoleType,
info.Rig,
info.HookBead,
info.RoleBead,
time.Now().Format(time.RFC3339),
info.State,
)
// Use bd update to set the new description
cmd := exec.Command("bd", "update", agentBeadID, "--description", newDesc)
cmd.Dir = d.config.TownRoot
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("bd update: %w (output: %s)", err, string(output))
}
return nil
}
// identityToBDActor converts a daemon identity to BD_ACTOR format (with slashes).
// Uses parseIdentity to extract components, then builds the slash format.
func identityToBDActor(identity string) string {
// Handle already-slash-formatted identities
if strings.Contains(identity, "/polecats/") || strings.Contains(identity, "/crew/") ||
strings.Contains(identity, "/witness") || strings.Contains(identity, "/refinery") {
return identity
}
parsed, err := parseIdentity(identity)
if err != nil {
return identity // Unknown format - return as-is
}
switch parsed.RoleType {
case "mayor", "deacon":
return parsed.RoleType
case "witness":
return parsed.RigName + "/witness"
case "refinery":
return parsed.RigName + "/refinery"
case "crew":
return parsed.RigName + "/crew/" + parsed.AgentName
case "polecat":
return parsed.RigName + "/polecats/" + parsed.AgentName
default:
return identity
}
}
// GUPPViolationTimeout is how long an agent can have work on hook without
// progressing before it's considered a GUPP (Gas Town Universal Propulsion
// Principle) violation. GUPP states: if you have work on your hook, you run it.
const GUPPViolationTimeout = 30 * time.Minute
// checkGUPPViolations looks for agents that have work-on-hook but aren't
// progressing. This is a GUPP violation: agents with hooked work must execute.
// The daemon detects these and notifies the relevant Witness for remediation.
func (d *Daemon) checkGUPPViolations() {
// Check polecat agents - they're the ones with work-on-hook
rigs := d.getKnownRigs()
for _, rigName := range rigs {
d.checkRigGUPPViolations(rigName)
}
}
// checkRigGUPPViolations checks polecats in a specific rig for GUPP violations.
func (d *Daemon) checkRigGUPPViolations(rigName string) {
// List polecat agent beads for this rig
// Pattern: gt-polecat-<rig>-<name>
cmd := exec.Command("bd", "list", "--type=agent", "--json")
cmd.Dir = d.config.TownRoot
output, err := cmd.Output()
if err != nil {
return // Silently fail - bd might not be available
}
var agents []struct {
ID string `json:"id"`
Type string `json:"issue_type"`
Description string `json:"description"`
UpdatedAt string `json:"updated_at"`
HookBead string `json:"hook_bead"` // Read from database column, not description
AgentState string `json:"agent_state"`
}
if err := json.Unmarshal(output, &agents); err != nil {
return
}
prefix := "gt-polecat-" + rigName + "-"
for _, agent := range agents {
// Only check polecats for this rig
if !strings.HasPrefix(agent.ID, prefix) {
continue
}
// Check if agent has work on hook
// Use HookBead from database column directly (not parsed from description)
if agent.HookBead == "" {
continue // No hooked work - no GUPP violation possible
}
// Check if agent is actively working
if agent.AgentState == "working" || agent.AgentState == "running" {
// Check when the agent bead was last updated
updatedAt, err := time.Parse(time.RFC3339, agent.UpdatedAt)
if err != nil {
continue
}
age := time.Since(updatedAt)
if age > GUPPViolationTimeout {
d.logger.Printf("GUPP violation: agent %s has hook_bead=%s but hasn't updated in %v (timeout: %v)",
agent.ID, agent.HookBead, age.Round(time.Minute), GUPPViolationTimeout)
// Notify the witness for this rig
d.notifyWitnessOfGUPP(rigName, agent.ID, agent.HookBead, age)
}
}
}
}
// notifyWitnessOfGUPP sends a mail to the rig's witness about a GUPP violation.
func (d *Daemon) notifyWitnessOfGUPP(rigName, agentID, hookBead string, stuckDuration time.Duration) {
witnessAddr := rigName + "/witness"
subject := fmt.Sprintf("GUPP_VIOLATION: %s stuck for %v", agentID, stuckDuration.Round(time.Minute))
body := fmt.Sprintf(`Agent %s has work on hook but isn't progressing.
hook_bead: %s
stuck_duration: %v
Action needed: Check if agent is alive and responsive. Consider restarting if stuck.`,
agentID, hookBead, stuckDuration.Round(time.Minute))
cmd := exec.Command("gt", "mail", "send", witnessAddr, "-s", subject, "-m", body)
cmd.Dir = d.config.TownRoot
if err := cmd.Run(); err != nil {
d.logger.Printf("Warning: failed to notify witness of GUPP violation: %v", err)
} else {
d.logger.Printf("Notified %s of GUPP violation for %s", witnessAddr, agentID)
}
}
// checkOrphanedWork looks for work assigned to dead agents.
// Orphaned work needs to be reassigned or the agent needs to be restarted.
func (d *Daemon) checkOrphanedWork() {
// Get list of dead agents
deadAgents := d.getDeadAgents()
if len(deadAgents) == 0 {
return
}
// For each dead agent, check if they have hooked work
// Use HookBead from database column directly (not parsed from description)
for _, agent := range deadAgents {
if agent.HookBead == "" {
continue // No hooked work to orphan
}
d.logger.Printf("Orphaned work detected: agent %s is dead but has hook_bead=%s",
agent.ID, agent.HookBead)
// Determine the rig from the agent ID (gt-polecat-<rig>-<name>)
rigName := d.extractRigFromAgentID(agent.ID)
if rigName != "" {
d.notifyWitnessOfOrphanedWork(rigName, agent.ID, agent.HookBead)
}
}
}
// deadAgentInfo holds info about a dead agent for orphaned work detection.
type deadAgentInfo struct {
ID string
HookBead string // Read from database column, not description
}
// getDeadAgents returns all agent beads with state=dead.
func (d *Daemon) getDeadAgents() []deadAgentInfo {
cmd := exec.Command("bd", "list", "--type=agent", "--json")
cmd.Dir = d.config.TownRoot
output, err := cmd.Output()
if err != nil {
return nil
}
var agents []struct {
ID string `json:"id"`
Type string `json:"issue_type"`
HookBead string `json:"hook_bead"` // Read from database column
AgentState string `json:"agent_state"` // Read from database column
}
if err := json.Unmarshal(output, &agents); err != nil {
return nil
}
var dead []deadAgentInfo
for _, agent := range agents {
if agent.AgentState == "dead" {
dead = append(dead, deadAgentInfo{
ID: agent.ID,
HookBead: agent.HookBead,
})
}
}
return dead
}
// extractRigFromAgentID extracts the rig name from a polecat agent ID.
// Example: gt-polecat-gastown-max → gastown
func (d *Daemon) extractRigFromAgentID(agentID string) string {
// Pattern: gt-polecat-<rig>-<name>
if !strings.HasPrefix(agentID, "gt-polecat-") {
return ""
}
rest := strings.TrimPrefix(agentID, "gt-polecat-")
// Find the rig name (everything before the last dash)
lastDash := strings.LastIndex(rest, "-")
if lastDash == -1 {
return ""
}
return rest[:lastDash]
}
// notifyWitnessOfOrphanedWork sends a mail to the rig's witness about orphaned work.
func (d *Daemon) notifyWitnessOfOrphanedWork(rigName, agentID, hookBead string) {
witnessAddr := rigName + "/witness"
subject := fmt.Sprintf("ORPHANED_WORK: %s has hooked work but is dead", agentID)
body := fmt.Sprintf(`Agent %s is dead but has work on its hook.
hook_bead: %s
Action needed: Either restart the agent or reassign the work.`,
agentID, hookBead)
cmd := exec.Command("gt", "mail", "send", witnessAddr, "-s", subject, "-m", body)
cmd.Dir = d.config.TownRoot
if err := cmd.Run(); err != nil {
d.logger.Printf("Warning: failed to notify witness of orphaned work: %v", err)
} else {
d.logger.Printf("Notified %s of orphaned work for %s", witnessAddr, agentID)
}
}