fix(zfc): move stuck detection thresholds to agent-controlled config
Per ZFC principle: 'Let agents decide thresholds. Stuck is a judgment call.' Changes: - Add health check threshold fields to RoleConfig (ping_timeout, consecutive_failures, kill_cooldown, stuck_threshold) - Add LoadStuckConfig() to read thresholds from hq-deacon-role bead - Update patrol_check.go to use configurable stuck threshold - Defaults remain as fallbacks when no role bead config exists Agents can now configure their stuck detection by adding fields to their role bead, e.g.: ping_timeout: 45s consecutive_failures: 5 kill_cooldown: 10m stuck_threshold: 2h Fixes: hq-2355b
This commit is contained in:
committed by
Steve Yegge
parent
0f633be4b1
commit
e0858096f6
@@ -8,13 +8,17 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/steveyegge/gastown/internal/beads"
|
||||
)
|
||||
|
||||
// Default parameters for stuck-session detection.
|
||||
// These are fallbacks when no role bead config exists.
|
||||
// Per ZFC: "Let agents decide thresholds. 'Stuck' is a judgment call."
|
||||
const (
|
||||
DefaultPingTimeout = 30 * time.Second // How long to wait for response
|
||||
DefaultConsecutiveFailures = 3 // Failures before force-kill
|
||||
DefaultCooldown = 5 * time.Minute // Minimum time between force-kills
|
||||
DefaultPingTimeout = 30 * time.Second // How long to wait for response
|
||||
DefaultConsecutiveFailures = 3 // Failures before force-kill
|
||||
DefaultCooldown = 5 * time.Minute // Minimum time between force-kills
|
||||
)
|
||||
|
||||
// StuckConfig holds configurable parameters for stuck-session detection.
|
||||
@@ -33,6 +37,37 @@ func DefaultStuckConfig() *StuckConfig {
|
||||
}
|
||||
}
|
||||
|
||||
// LoadStuckConfig loads stuck detection config from the Deacon's role bead.
|
||||
// Returns defaults if no role bead exists or if fields aren't configured.
|
||||
// Per ZFC: agents control their own thresholds via their role beads.
|
||||
func LoadStuckConfig(townRoot string) *StuckConfig {
|
||||
config := DefaultStuckConfig()
|
||||
|
||||
// Load from hq-deacon-role bead
|
||||
bd := beads.NewWithBeadsDir(townRoot, beads.ResolveBeadsDir(townRoot))
|
||||
roleConfig, err := bd.GetRoleConfig(beads.RoleBeadIDTown("deacon"))
|
||||
if err != nil || roleConfig == nil {
|
||||
return config
|
||||
}
|
||||
|
||||
// Override defaults with role bead values
|
||||
if roleConfig.PingTimeout != "" {
|
||||
if d, err := time.ParseDuration(roleConfig.PingTimeout); err == nil {
|
||||
config.PingTimeout = d
|
||||
}
|
||||
}
|
||||
if roleConfig.ConsecutiveFailures > 0 {
|
||||
config.ConsecutiveFailures = roleConfig.ConsecutiveFailures
|
||||
}
|
||||
if roleConfig.KillCooldown != "" {
|
||||
if d, err := time.ParseDuration(roleConfig.KillCooldown); err == nil {
|
||||
config.Cooldown = d
|
||||
}
|
||||
}
|
||||
|
||||
return config
|
||||
}
|
||||
|
||||
// AgentHealthState tracks the health check state for a single agent.
|
||||
type AgentHealthState struct {
|
||||
// AgentID is the identifier (e.g., "gastown/polecats/max" or "deacon")
|
||||
|
||||
Reference in New Issue
Block a user