Implements per-agent backoff tracking to reduce noise for busy agents: - AgentBackoff type tracks interval, miss count, and last activity - BackoffManager manages state across all agents - Geometric backoff strategy (1.5x factor, 10min cap) - Integrates with keepalive to skip pokes when agents are fresh - Resets backoff immediately when activity detected Closes gt-8bx 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
188 lines
5.0 KiB
Go
188 lines
5.0 KiB
Go
package daemon
|
|
|
|
import (
|
|
"time"
|
|
)
|
|
|
|
// BackoffStrategy defines how intervals grow.
|
|
type BackoffStrategy string
|
|
|
|
const (
|
|
// StrategyFixed keeps the same interval (no backoff).
|
|
StrategyFixed BackoffStrategy = "fixed"
|
|
|
|
// StrategyGeometric multiplies by a factor each miss (1.5x).
|
|
StrategyGeometric BackoffStrategy = "geometric"
|
|
|
|
// StrategyExponential doubles interval each miss (2x).
|
|
StrategyExponential BackoffStrategy = "exponential"
|
|
)
|
|
|
|
// BackoffConfig holds backoff configuration.
|
|
type BackoffConfig struct {
|
|
// Strategy determines how intervals grow.
|
|
Strategy BackoffStrategy
|
|
|
|
// BaseInterval is the starting interval (default 60s).
|
|
BaseInterval time.Duration
|
|
|
|
// MaxInterval is the cap on how large intervals can grow (default 10m).
|
|
MaxInterval time.Duration
|
|
|
|
// Factor is the multiplier for geometric backoff (default 1.5).
|
|
Factor float64
|
|
}
|
|
|
|
// DefaultBackoffConfig returns sensible defaults.
|
|
func DefaultBackoffConfig() *BackoffConfig {
|
|
return &BackoffConfig{
|
|
Strategy: StrategyGeometric,
|
|
BaseInterval: 60 * time.Second,
|
|
MaxInterval: 10 * time.Minute,
|
|
Factor: 1.5,
|
|
}
|
|
}
|
|
|
|
// AgentBackoff tracks backoff state for a single agent.
|
|
type AgentBackoff struct {
|
|
// AgentID identifies the agent (e.g., "mayor", "gastown-witness").
|
|
AgentID string
|
|
|
|
// BaseInterval is the starting interval.
|
|
BaseInterval time.Duration
|
|
|
|
// CurrentInterval is the current (possibly backed-off) interval.
|
|
CurrentInterval time.Duration
|
|
|
|
// MaxInterval caps how large intervals can grow.
|
|
MaxInterval time.Duration
|
|
|
|
// ConsecutiveMiss counts pokes with no response.
|
|
ConsecutiveMiss int
|
|
|
|
// LastPoke is when we last poked this agent.
|
|
LastPoke time.Time
|
|
|
|
// LastActivity is when the agent last showed activity.
|
|
LastActivity time.Time
|
|
}
|
|
|
|
// NewAgentBackoff creates backoff state for an agent.
|
|
func NewAgentBackoff(agentID string, config *BackoffConfig) *AgentBackoff {
|
|
if config == nil {
|
|
config = DefaultBackoffConfig()
|
|
}
|
|
return &AgentBackoff{
|
|
AgentID: agentID,
|
|
BaseInterval: config.BaseInterval,
|
|
CurrentInterval: config.BaseInterval,
|
|
MaxInterval: config.MaxInterval,
|
|
}
|
|
}
|
|
|
|
// ShouldPoke returns true if enough time has passed since the last poke.
|
|
func (ab *AgentBackoff) ShouldPoke() bool {
|
|
if ab.LastPoke.IsZero() {
|
|
return true // Never poked
|
|
}
|
|
return time.Since(ab.LastPoke) >= ab.CurrentInterval
|
|
}
|
|
|
|
// RecordPoke records that we poked the agent.
|
|
func (ab *AgentBackoff) RecordPoke() {
|
|
ab.LastPoke = time.Now()
|
|
}
|
|
|
|
// RecordMiss records that the agent didn't respond since last poke.
|
|
// This increases the backoff interval.
|
|
func (ab *AgentBackoff) RecordMiss(config *BackoffConfig) {
|
|
ab.ConsecutiveMiss++
|
|
|
|
if config == nil {
|
|
config = DefaultBackoffConfig()
|
|
}
|
|
|
|
switch config.Strategy {
|
|
case StrategyFixed:
|
|
// No change
|
|
case StrategyGeometric:
|
|
ab.CurrentInterval = time.Duration(float64(ab.CurrentInterval) * config.Factor)
|
|
case StrategyExponential:
|
|
ab.CurrentInterval = ab.CurrentInterval * 2
|
|
}
|
|
|
|
// Cap at max interval
|
|
if ab.CurrentInterval > ab.MaxInterval {
|
|
ab.CurrentInterval = ab.MaxInterval
|
|
}
|
|
}
|
|
|
|
// RecordActivity records that the agent showed activity.
|
|
// This resets the backoff to the base interval.
|
|
func (ab *AgentBackoff) RecordActivity() {
|
|
ab.ConsecutiveMiss = 0
|
|
ab.CurrentInterval = ab.BaseInterval
|
|
ab.LastActivity = time.Now()
|
|
}
|
|
|
|
// BackoffManager tracks backoff state for all agents.
|
|
type BackoffManager struct {
|
|
config *BackoffConfig
|
|
agents map[string]*AgentBackoff
|
|
}
|
|
|
|
// NewBackoffManager creates a new backoff manager.
|
|
func NewBackoffManager(config *BackoffConfig) *BackoffManager {
|
|
if config == nil {
|
|
config = DefaultBackoffConfig()
|
|
}
|
|
return &BackoffManager{
|
|
config: config,
|
|
agents: make(map[string]*AgentBackoff),
|
|
}
|
|
}
|
|
|
|
// GetOrCreate returns backoff state for an agent, creating if needed.
|
|
func (bm *BackoffManager) GetOrCreate(agentID string) *AgentBackoff {
|
|
if ab, ok := bm.agents[agentID]; ok {
|
|
return ab
|
|
}
|
|
ab := NewAgentBackoff(agentID, bm.config)
|
|
bm.agents[agentID] = ab
|
|
return ab
|
|
}
|
|
|
|
// ShouldPoke returns true if we should poke the given agent.
|
|
func (bm *BackoffManager) ShouldPoke(agentID string) bool {
|
|
return bm.GetOrCreate(agentID).ShouldPoke()
|
|
}
|
|
|
|
// RecordPoke records that we poked an agent.
|
|
func (bm *BackoffManager) RecordPoke(agentID string) {
|
|
bm.GetOrCreate(agentID).RecordPoke()
|
|
}
|
|
|
|
// RecordMiss records that an agent didn't respond.
|
|
func (bm *BackoffManager) RecordMiss(agentID string) {
|
|
bm.GetOrCreate(agentID).RecordMiss(bm.config)
|
|
}
|
|
|
|
// RecordActivity records that an agent showed activity.
|
|
func (bm *BackoffManager) RecordActivity(agentID string) {
|
|
bm.GetOrCreate(agentID).RecordActivity()
|
|
}
|
|
|
|
// GetInterval returns the current interval for an agent.
|
|
func (bm *BackoffManager) GetInterval(agentID string) time.Duration {
|
|
return bm.GetOrCreate(agentID).CurrentInterval
|
|
}
|
|
|
|
// Stats returns a map of agent ID to current interval for logging.
|
|
func (bm *BackoffManager) Stats() map[string]time.Duration {
|
|
stats := make(map[string]time.Duration, len(bm.agents))
|
|
for id, ab := range bm.agents {
|
|
stats[id] = ab.CurrentInterval
|
|
}
|
|
return stats
|
|
}
|