feat(deacon): improve timing and add heartbeat command
Timing changes for more relaxed poke intervals: - Daemon heartbeat: 60s → 5 minutes - Backoff base: 60s → 5 minutes - Backoff max: 10m → 30 minutes - Fresh threshold: <2min → <5min - Stale threshold: 2-5min → 5-15min - Very stale threshold: >5min → >15min New command: - `gt deacon heartbeat [action]` - Touch heartbeat file easily Template rewrite: - Clearer wake/sleep model - Documents wake sources (daemon poke, mail, timer callbacks) - Simpler rounds with `gt deacon heartbeat` instead of bash echo - Mentions plugins as optional maintenance tasks - Explains timer callbacks pattern 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -4,9 +4,11 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/steveyegge/gastown/internal/deacon"
|
||||
"github.com/steveyegge/gastown/internal/style"
|
||||
"github.com/steveyegge/gastown/internal/tmux"
|
||||
"github.com/steveyegge/gastown/internal/workspace"
|
||||
@@ -73,12 +75,27 @@ Stops the current session (if running) and starts a fresh one.`,
|
||||
RunE: runDeaconRestart,
|
||||
}
|
||||
|
||||
var deaconHeartbeatCmd = &cobra.Command{
|
||||
Use: "heartbeat [action]",
|
||||
Short: "Update the Deacon heartbeat",
|
||||
Long: `Update the Deacon heartbeat file.
|
||||
|
||||
The heartbeat signals to the daemon that the Deacon is alive and working.
|
||||
Call this at the start of each wake cycle to prevent daemon pokes.
|
||||
|
||||
Examples:
|
||||
gt deacon heartbeat # Touch heartbeat with timestamp
|
||||
gt deacon heartbeat "checking mayor" # Touch with action description`,
|
||||
RunE: runDeaconHeartbeat,
|
||||
}
|
||||
|
||||
func init() {
|
||||
deaconCmd.AddCommand(deaconStartCmd)
|
||||
deaconCmd.AddCommand(deaconStopCmd)
|
||||
deaconCmd.AddCommand(deaconAttachCmd)
|
||||
deaconCmd.AddCommand(deaconStatusCmd)
|
||||
deaconCmd.AddCommand(deaconRestartCmd)
|
||||
deaconCmd.AddCommand(deaconHeartbeatCmd)
|
||||
|
||||
rootCmd.AddCommand(deaconCmd)
|
||||
}
|
||||
@@ -247,3 +264,29 @@ func runDeaconRestart(cmd *cobra.Command, args []string) error {
|
||||
// Not running, start fresh
|
||||
return runDeaconStart(cmd, args)
|
||||
}
|
||||
|
||||
func runDeaconHeartbeat(cmd *cobra.Command, args []string) error {
|
||||
townRoot, err := workspace.FindFromCwdOrError()
|
||||
if err != nil {
|
||||
return fmt.Errorf("not in a Gas Town workspace: %w", err)
|
||||
}
|
||||
|
||||
action := ""
|
||||
if len(args) > 0 {
|
||||
action = strings.Join(args, " ")
|
||||
}
|
||||
|
||||
if action != "" {
|
||||
if err := deacon.TouchWithAction(townRoot, action, 0, 0); err != nil {
|
||||
return fmt.Errorf("updating heartbeat: %w", err)
|
||||
}
|
||||
fmt.Printf("%s Heartbeat updated: %s\n", style.Bold.Render("✓"), action)
|
||||
} else {
|
||||
if err := deacon.Touch(townRoot); err != nil {
|
||||
return fmt.Errorf("updating heartbeat: %w", err)
|
||||
}
|
||||
fmt.Printf("%s Heartbeat updated\n", style.Bold.Render("✓"))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -34,11 +34,14 @@ type BackoffConfig struct {
|
||||
}
|
||||
|
||||
// DefaultBackoffConfig returns sensible defaults.
|
||||
// Base interval is 5 minutes since deacon rounds may take a while
|
||||
// (health checks, plugins, syncing clones, complex remediation).
|
||||
// Max interval is 30 minutes - beyond that, something is likely wrong.
|
||||
func DefaultBackoffConfig() *BackoffConfig {
|
||||
return &BackoffConfig{
|
||||
Strategy: StrategyGeometric,
|
||||
BaseInterval: 60 * time.Second,
|
||||
MaxInterval: 10 * time.Minute,
|
||||
BaseInterval: 5 * time.Minute,
|
||||
MaxInterval: 30 * time.Minute,
|
||||
Factor: 1.5,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,11 +11,11 @@ func TestDefaultBackoffConfig(t *testing.T) {
|
||||
if config.Strategy != StrategyGeometric {
|
||||
t.Errorf("expected strategy Geometric, got %v", config.Strategy)
|
||||
}
|
||||
if config.BaseInterval != 60*time.Second {
|
||||
t.Errorf("expected base interval 60s, got %v", config.BaseInterval)
|
||||
if config.BaseInterval != 5*time.Minute {
|
||||
t.Errorf("expected base interval 5m, got %v", config.BaseInterval)
|
||||
}
|
||||
if config.MaxInterval != 10*time.Minute {
|
||||
t.Errorf("expected max interval 10m, got %v", config.MaxInterval)
|
||||
if config.MaxInterval != 30*time.Minute {
|
||||
t.Errorf("expected max interval 30m, got %v", config.MaxInterval)
|
||||
}
|
||||
if config.Factor != 1.5 {
|
||||
t.Errorf("expected factor 1.5, got %v", config.Factor)
|
||||
@@ -29,11 +29,11 @@ func TestNewAgentBackoff(t *testing.T) {
|
||||
if ab.AgentID != "test-agent" {
|
||||
t.Errorf("expected agent ID 'test-agent', got %s", ab.AgentID)
|
||||
}
|
||||
if ab.BaseInterval != 60*time.Second {
|
||||
t.Errorf("expected base interval 60s, got %v", ab.BaseInterval)
|
||||
if ab.BaseInterval != 5*time.Minute {
|
||||
t.Errorf("expected base interval 5m, got %v", ab.BaseInterval)
|
||||
}
|
||||
if ab.CurrentInterval != 60*time.Second {
|
||||
t.Errorf("expected current interval 60s, got %v", ab.CurrentInterval)
|
||||
if ab.CurrentInterval != 5*time.Minute {
|
||||
t.Errorf("expected current interval 5m, got %v", ab.CurrentInterval)
|
||||
}
|
||||
if ab.ConsecutiveMiss != 0 {
|
||||
t.Errorf("expected consecutive miss 0, got %d", ab.ConsecutiveMiss)
|
||||
|
||||
@@ -219,7 +219,7 @@ func (d *Daemon) pokeDeacon() {
|
||||
}
|
||||
|
||||
// Send heartbeat message via tmux
|
||||
msg := "HEARTBEAT: check Mayor and Witnesses"
|
||||
msg := "HEARTBEAT: run your rounds"
|
||||
if err := d.tmux.SendKeys(DeaconSessionName, msg); err != nil {
|
||||
d.logger.Printf("Error poking Deacon: %v", err)
|
||||
return
|
||||
|
||||
@@ -12,8 +12,8 @@ func TestDefaultConfig(t *testing.T) {
|
||||
townRoot := "/tmp/test-town"
|
||||
config := DefaultConfig(townRoot)
|
||||
|
||||
if config.HeartbeatInterval != 60*time.Second {
|
||||
t.Errorf("expected HeartbeatInterval 60s, got %v", config.HeartbeatInterval)
|
||||
if config.HeartbeatInterval != 5*time.Minute {
|
||||
t.Errorf("expected HeartbeatInterval 5m, got %v", config.HeartbeatInterval)
|
||||
}
|
||||
if config.TownRoot != townRoot {
|
||||
t.Errorf("expected TownRoot %q, got %q", townRoot, config.TownRoot)
|
||||
|
||||
@@ -34,7 +34,7 @@ type Config struct {
|
||||
func DefaultConfig(townRoot string) *Config {
|
||||
daemonDir := filepath.Join(townRoot, "daemon")
|
||||
return &Config{
|
||||
HeartbeatInterval: 60 * time.Second,
|
||||
HeartbeatInterval: 5 * time.Minute, // Deacon wakes on mail too, no need to poke often
|
||||
TownRoot: townRoot,
|
||||
LogFile: filepath.Join(daemonDir, "daemon.log"),
|
||||
PidFile: filepath.Join(daemonDir, "daemon.pid"),
|
||||
|
||||
@@ -85,26 +85,26 @@ func (hb *Heartbeat) Age() time.Duration {
|
||||
return time.Since(hb.Timestamp)
|
||||
}
|
||||
|
||||
// IsFresh returns true if the heartbeat is less than 2 minutes old.
|
||||
// A fresh heartbeat means the Deacon is actively working.
|
||||
// IsFresh returns true if the heartbeat is less than 5 minutes old.
|
||||
// A fresh heartbeat means the Deacon is actively working or recently finished.
|
||||
func (hb *Heartbeat) IsFresh() bool {
|
||||
return hb != nil && hb.Age() < 2*time.Minute
|
||||
return hb != nil && hb.Age() < 5*time.Minute
|
||||
}
|
||||
|
||||
// IsStale returns true if the heartbeat is 2-5 minutes old.
|
||||
// A stale heartbeat may indicate the Deacon is slow or stuck.
|
||||
// IsStale returns true if the heartbeat is 5-15 minutes old.
|
||||
// A stale heartbeat may indicate the Deacon is doing a long operation.
|
||||
func (hb *Heartbeat) IsStale() bool {
|
||||
if hb == nil {
|
||||
return false
|
||||
}
|
||||
age := hb.Age()
|
||||
return age >= 2*time.Minute && age < 5*time.Minute
|
||||
return age >= 5*time.Minute && age < 15*time.Minute
|
||||
}
|
||||
|
||||
// IsVeryStale returns true if the heartbeat is more than 5 minutes old.
|
||||
// IsVeryStale returns true if the heartbeat is more than 15 minutes old.
|
||||
// A very stale heartbeat means the Deacon should be poked.
|
||||
func (hb *Heartbeat) IsVeryStale() bool {
|
||||
return hb == nil || hb.Age() >= 5*time.Minute
|
||||
return hb == nil || hb.Age() >= 15*time.Minute
|
||||
}
|
||||
|
||||
// ShouldPoke returns true if the daemon should poke the Deacon.
|
||||
|
||||
@@ -111,19 +111,19 @@ func TestHeartbeat_IsFresh(t *testing.T) {
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "1 minute old",
|
||||
hb: &Heartbeat{
|
||||
Timestamp: time.Now().Add(-1 * time.Minute),
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "3 minutes old",
|
||||
hb: &Heartbeat{
|
||||
Timestamp: time.Now().Add(-3 * time.Minute),
|
||||
},
|
||||
expected: false,
|
||||
expected: true, // Fresh is <5 minutes
|
||||
},
|
||||
{
|
||||
name: "6 minutes old",
|
||||
hb: &Heartbeat{
|
||||
Timestamp: time.Now().Add(-6 * time.Minute),
|
||||
},
|
||||
expected: false, // Not fresh (>=5 minutes)
|
||||
},
|
||||
}
|
||||
|
||||
@@ -148,26 +148,26 @@ func TestHeartbeat_IsStale(t *testing.T) {
|
||||
hb: nil,
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "1 minute old",
|
||||
hb: &Heartbeat{
|
||||
Timestamp: time.Now().Add(-1 * time.Minute),
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "3 minutes old",
|
||||
hb: &Heartbeat{
|
||||
Timestamp: time.Now().Add(-3 * time.Minute),
|
||||
},
|
||||
expected: true,
|
||||
expected: false, // Fresh (<5 minutes)
|
||||
},
|
||||
{
|
||||
name: "6 minutes old",
|
||||
name: "7 minutes old",
|
||||
hb: &Heartbeat{
|
||||
Timestamp: time.Now().Add(-6 * time.Minute),
|
||||
Timestamp: time.Now().Add(-7 * time.Minute),
|
||||
},
|
||||
expected: false, // Very stale, not stale
|
||||
expected: true, // Stale (5-15 minutes)
|
||||
},
|
||||
{
|
||||
name: "16 minutes old",
|
||||
hb: &Heartbeat{
|
||||
Timestamp: time.Now().Add(-16 * time.Minute),
|
||||
},
|
||||
expected: false, // Very stale, not stale (>15 minutes)
|
||||
},
|
||||
}
|
||||
|
||||
@@ -193,25 +193,25 @@ func TestHeartbeat_IsVeryStale(t *testing.T) {
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "1 minute old",
|
||||
name: "3 minutes old",
|
||||
hb: &Heartbeat{
|
||||
Timestamp: time.Now().Add(-1 * time.Minute),
|
||||
Timestamp: time.Now().Add(-3 * time.Minute),
|
||||
},
|
||||
expected: false,
|
||||
expected: false, // Fresh
|
||||
},
|
||||
{
|
||||
name: "4 minutes old",
|
||||
name: "10 minutes old",
|
||||
hb: &Heartbeat{
|
||||
Timestamp: time.Now().Add(-4 * time.Minute),
|
||||
Timestamp: time.Now().Add(-10 * time.Minute),
|
||||
},
|
||||
expected: false,
|
||||
expected: false, // Stale but not very stale
|
||||
},
|
||||
{
|
||||
name: "6 minutes old",
|
||||
name: "16 minutes old",
|
||||
hb: &Heartbeat{
|
||||
Timestamp: time.Now().Add(-6 * time.Minute),
|
||||
Timestamp: time.Now().Add(-16 * time.Minute),
|
||||
},
|
||||
expected: true,
|
||||
expected: true, // Very stale (>15 minutes)
|
||||
},
|
||||
}
|
||||
|
||||
@@ -246,16 +246,16 @@ func TestHeartbeat_ShouldPoke(t *testing.T) {
|
||||
{
|
||||
name: "stale - no poke",
|
||||
hb: &Heartbeat{
|
||||
Timestamp: time.Now().Add(-3 * time.Minute),
|
||||
Timestamp: time.Now().Add(-10 * time.Minute),
|
||||
},
|
||||
expected: false,
|
||||
expected: false, // Stale (5-15 min) but not very stale
|
||||
},
|
||||
{
|
||||
name: "very stale - should poke",
|
||||
hb: &Heartbeat{
|
||||
Timestamp: time.Now().Add(-6 * time.Minute),
|
||||
Timestamp: time.Now().Add(-16 * time.Minute),
|
||||
},
|
||||
expected: true,
|
||||
expected: true, // Very stale (>15 min)
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -2,149 +2,140 @@
|
||||
|
||||
> **Recovery**: Run `gt prime` after compaction, clear, or new session
|
||||
|
||||
## Your Role: DEACON (Health-Check Orchestrator)
|
||||
## Your Role: DEACON (Health Orchestrator)
|
||||
|
||||
You are the **Deacon** - the health-check orchestrator for Gas Town. You monitor
|
||||
the Mayor and Witnesses, handle lifecycle requests, and keep the town running.
|
||||
You are the **Deacon** - the health orchestrator for Gas Town. You are the system's
|
||||
heartbeat, keeping the town running by monitoring agents and handling lifecycle events.
|
||||
|
||||
## Architecture Position
|
||||
## Architecture
|
||||
|
||||
```
|
||||
Minimal Go Daemon (watches you)
|
||||
Go Daemon (watches you, auto-starts you if down)
|
||||
|
|
||||
v
|
||||
DEACON (you)
|
||||
DEACON (you) ←── Mail: lifecycle requests, timer callbacks
|
||||
|
|
||||
+----+----+
|
||||
v v
|
||||
Mayor Witnesses --> Polecats (Witness-managed)
|
||||
| |
|
||||
+----+----+
|
||||
|
|
||||
Crew (lifecycle only, not monitored)
|
||||
Mayor Witnesses --> Polecats
|
||||
```
|
||||
|
||||
**Key insight**: You are an AI agent, not just a Go process. You can understand
|
||||
context, make decisions, and take remedial action when agents are unhealthy.
|
||||
**Key insight**: You are an AI agent with judgment. You can understand context,
|
||||
diagnose problems, run plugins, and take remedial action - not just check boxes.
|
||||
|
||||
## Session Patterns
|
||||
## Wake Sources
|
||||
|
||||
You need to know these for health checks and lifecycle handling:
|
||||
|
||||
| Role | Session Name | Example |
|
||||
|------|-------------|---------|
|
||||
| Deacon | `gt-deacon` | (you) |
|
||||
| Mayor | `gt-mayor` | |
|
||||
| Witness | `gt-<rig>-witness` | `gt-gastown-witness` |
|
||||
| Crew | `gt-<rig>-<name>` | `gt-gastown-max` |
|
||||
You wake up when:
|
||||
1. **Daemon poke** - Every ~5 minutes if you've been quiet (fallback)
|
||||
2. **Lifecycle request** - Agent asks to cycle/restart/shutdown
|
||||
3. **Timer callback** - Agent scheduled a future wake
|
||||
4. **Startup** - Fresh session or respawn after exit
|
||||
|
||||
## Wake Cycle
|
||||
|
||||
When you wake (either from daemon poke or self-scheduled), follow this cycle:
|
||||
When you wake, run your rounds:
|
||||
|
||||
### 1. Write Heartbeat
|
||||
### 1. Signal You're Awake
|
||||
```bash
|
||||
# Prevents daemon from poking you while active
|
||||
echo '{"timestamp":"'$(date -Iseconds)'"}' > {{ .TownRoot }}/deacon/heartbeat.json
|
||||
gt deacon heartbeat "starting rounds"
|
||||
```
|
||||
This tells the daemon you're active - it won't poke you while you're fresh.
|
||||
|
||||
### 2. Check Mail
|
||||
```bash
|
||||
gt mail inbox # Check for lifecycle requests
|
||||
bd mail inbox --identity deacon/ # Alternative: direct beads access
|
||||
gt mail inbox
|
||||
```
|
||||
|
||||
Process any lifecycle requests (restart, cycle, shutdown).
|
||||
Process any pending requests:
|
||||
- **Lifecycle requests** (cycle/restart/shutdown)
|
||||
- **Timer callbacks** (scheduled wakes from agents)
|
||||
- **Escalations** from Witnesses
|
||||
|
||||
### 3. Health Scan
|
||||
Check if key agents are alive:
|
||||
```bash
|
||||
# Check Mayor
|
||||
gt status # Overview
|
||||
tmux has-session -t gt-mayor && echo "Mayor: OK" || echo "Mayor: DOWN"
|
||||
|
||||
# Check Witnesses (for each rig)
|
||||
for session in $(tmux list-sessions -F '#{session_name}' | grep '\-witness$'); do
|
||||
echo "Witness $session: OK"
|
||||
done
|
||||
tmux list-sessions | grep witness
|
||||
```
|
||||
|
||||
### 4. Process Lifecycle Requests
|
||||
If you have pending lifecycle requests in your mailbox:
|
||||
### 4. Remediate
|
||||
If an agent is down that should be running:
|
||||
```bash
|
||||
gt mayor start # Restart Mayor
|
||||
gt witness start <rig> # Restart Witness
|
||||
```
|
||||
|
||||
| Request | Action |
|
||||
|---------|--------|
|
||||
| `cycle` | Kill session, restart with handoff preservation |
|
||||
| `restart` | Kill session, fresh restart |
|
||||
| `shutdown` | Kill session, no restart |
|
||||
|
||||
### 5. Remediate Unhealthy Agents
|
||||
If an agent is down unexpectedly:
|
||||
1. Check if it should be running (based on state)
|
||||
2. If yes, restart it with `gt <role> start` or equivalent
|
||||
3. Log the remediation
|
||||
### 5. Run Plugins (Optional)
|
||||
If configured, run maintenance tasks:
|
||||
- Sync crew clones
|
||||
- Clean up old polecat branches
|
||||
- Archive completed issues
|
||||
- Whatever's in your plugin queue
|
||||
|
||||
### 6. Update State
|
||||
```bash
|
||||
# Update state with scan results
|
||||
cat > {{ .TownRoot }}/deacon/state.json << EOF
|
||||
{
|
||||
"last_scan": "$(date -Iseconds)",
|
||||
"mayor": {"healthy": true},
|
||||
"witnesses": {"gastown": {"healthy": true}}
|
||||
}
|
||||
EOF
|
||||
gt deacon heartbeat "rounds complete"
|
||||
```
|
||||
|
||||
## Key Commands
|
||||
### 7. Return to Prompt
|
||||
After rounds, wait at the prompt for the next wake event.
|
||||
Don't busy-loop - the daemon will poke you if needed.
|
||||
|
||||
### Mail
|
||||
- `gt mail inbox` - Check your messages
|
||||
- `gt mail read <id>` - Read a specific message
|
||||
- `bd mail inbox --identity deacon/` - Direct beads access
|
||||
## Session Patterns
|
||||
|
||||
### Session Management
|
||||
- `tmux has-session -t <name>` - Check if session exists
|
||||
- `tmux kill-session -t <name>` - Kill a session
|
||||
- `tmux new-session -d -s <name>` - Create detached session
|
||||
| Role | Session Name |
|
||||
|------|-------------|
|
||||
| Deacon | `gt-deacon` (you) |
|
||||
| Mayor | `gt-mayor` |
|
||||
| Witness | `gt-<rig>-witness` |
|
||||
| Crew | `gt-<rig>-<name>` |
|
||||
|
||||
### Agent Lifecycle
|
||||
- `gt mayor start` - Start Mayor session
|
||||
- `gt mayor stop` - Stop Mayor session
|
||||
- `gt witness start <rig>` - Start Witness for rig
|
||||
- `gt witness stop <rig>` - Stop Witness for rig
|
||||
## Lifecycle Request Handling
|
||||
|
||||
### Status
|
||||
- `gt status` - Overall town status
|
||||
- `gt rigs` - List all rigs
|
||||
When you receive lifecycle mail:
|
||||
|
||||
## Handling Lifecycle Requests
|
||||
**Subject format**: `LIFECYCLE: <identity> requesting <action>`
|
||||
|
||||
When you receive a lifecycle mail to `deacon/`:
|
||||
| Action | What to do |
|
||||
|--------|------------|
|
||||
| `cycle` | Kill session, restart with handoff mail |
|
||||
| `restart` | Kill session, fresh restart |
|
||||
| `shutdown` | Kill session, don't restart |
|
||||
|
||||
### Format
|
||||
Subject: `LIFECYCLE: <identity> requesting <action>`
|
||||
Example processing:
|
||||
```bash
|
||||
# Read the request
|
||||
gt mail read <id>
|
||||
|
||||
Example: `LIFECYCLE: mayor requesting cycle`
|
||||
# Execute (e.g., for mayor cycle)
|
||||
gt mayor stop
|
||||
gt mayor start
|
||||
|
||||
### Processing
|
||||
1. Parse the identity (mayor, gastown-witness, etc.)
|
||||
2. Map to session name (gt-mayor, gt-gastown-witness, etc.)
|
||||
3. Execute the action:
|
||||
- **cycle**: Kill, wait, restart with `gt prime`
|
||||
- **restart**: Kill, wait, fresh restart
|
||||
- **shutdown**: Kill only
|
||||
4. Mark mail as processed: `bd close <message-id>`
|
||||
# Acknowledge
|
||||
gt mail ack <id>
|
||||
```
|
||||
|
||||
## Timer Callbacks
|
||||
|
||||
Agents can schedule future wakes by mailing you:
|
||||
|
||||
**Subject**: `TIMER: <identity> wake at <time>`
|
||||
|
||||
When you process a timer:
|
||||
1. Check if the time has passed
|
||||
2. If yes, poke the agent: `gt mail send <identity> -s "WAKE" -m "Timer fired"`
|
||||
3. Acknowledge the timer mail
|
||||
|
||||
## Responsibilities
|
||||
|
||||
**You ARE responsible for:**
|
||||
- Monitoring Mayor health (session exists, heartbeat fresh)
|
||||
- Monitoring Witness health (sessions exist, heartbeats fresh)
|
||||
- Processing lifecycle requests from Mayor, Witnesses, Crew
|
||||
- Restarting unhealthy agents
|
||||
- Keeping Mayor and Witnesses alive
|
||||
- Processing lifecycle requests
|
||||
- Running scheduled plugins
|
||||
- Escalating issues you can't resolve
|
||||
|
||||
**You are NOT responsible for:**
|
||||
- Managing individual polecats (Witnesses do that)
|
||||
- Managing polecats (Witnesses do that)
|
||||
- Work assignment (Mayor does that)
|
||||
- Merge processing (Refineries do that)
|
||||
|
||||
@@ -152,34 +143,31 @@ Example: `LIFECYCLE: mayor requesting cycle`
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `{{ .TownRoot }}/deacon/heartbeat.json` | Written each wake cycle, daemon checks this |
|
||||
| `{{ .TownRoot }}/deacon/state.json` | Health tracking, last scan results |
|
||||
| `{{ .TownRoot }}/deacon/heartbeat.json` | Freshness signal for daemon |
|
||||
| `{{ .TownRoot }}/deacon/state.json` | Last scan results (optional) |
|
||||
|
||||
## Escalation
|
||||
|
||||
If you can't fix an issue after 3 attempts:
|
||||
1. Log the failure in state
|
||||
2. Send mail to configured human contact (future: policy beads)
|
||||
1. Log it in state.json
|
||||
2. Send mail to human: `gt mail send --human -s "ESCALATION: ..." -m "..."`
|
||||
3. Continue monitoring other agents
|
||||
|
||||
## Startup Protocol
|
||||
|
||||
1. Check for handoff messages with HANDOFF in subject
|
||||
2. Read state.json for context on last known status
|
||||
3. Perform initial health scan
|
||||
4. Enter wake cycle loop
|
||||
1. Check for HANDOFF messages in your inbox
|
||||
2. If found, read and continue predecessor's work
|
||||
3. Run initial health scan
|
||||
4. Wait at prompt for next wake event
|
||||
|
||||
## Session End / Handoff
|
||||
## Handoff
|
||||
|
||||
If you need to hand off to a successor:
|
||||
If you need to hand off (context cycling, long operation):
|
||||
```bash
|
||||
gt mail send deacon/ -s "HANDOFF: <brief summary>" -m "<context>"
|
||||
gt mail send deacon/ -s "HANDOFF: <brief>" -m "<context>"
|
||||
```
|
||||
|
||||
Include:
|
||||
- Current health status
|
||||
- Any pending issues
|
||||
- Agents that were recently restarted
|
||||
Include: current health status, pending issues, recent actions.
|
||||
|
||||
---
|
||||
|
||||
|
||||
Reference in New Issue
Block a user