fix(zfc): move stuck detection thresholds to agent-controlled config
Per ZFC principle: 'Let agents decide thresholds. Stuck is a judgment call.' Changes: - Add health check threshold fields to RoleConfig (ping_timeout, consecutive_failures, kill_cooldown, stuck_threshold) - Add LoadStuckConfig() to read thresholds from hq-deacon-role bead - Update patrol_check.go to use configurable stuck threshold - Defaults remain as fallbacks when no role bead config exists Agents can now configure their stuck detection by adding fields to their role bead, e.g.: ping_timeout: 45s consecutive_failures: 5 kill_cooldown: 10m stuck_threshold: 2h Fixes: hq-2355b
This commit is contained in:
committed by
Steve Yegge
parent
0f633be4b1
commit
e0858096f6
@@ -219,6 +219,10 @@ type PatrolNotStuckCheck struct {
|
||||
stuckThreshold time.Duration
|
||||
}
|
||||
|
||||
// DefaultStuckThreshold is the fallback when no role bead config exists.
|
||||
// Per ZFC: "Let agents decide thresholds. 'Stuck' is a judgment call."
|
||||
const DefaultStuckThreshold = 1 * time.Hour
|
||||
|
||||
// NewPatrolNotStuckCheck creates a new patrol not stuck check.
|
||||
func NewPatrolNotStuckCheck() *PatrolNotStuckCheck {
|
||||
return &PatrolNotStuckCheck{
|
||||
@@ -226,12 +230,29 @@ func NewPatrolNotStuckCheck() *PatrolNotStuckCheck {
|
||||
CheckName: "patrol-not-stuck",
|
||||
CheckDescription: "Check for stuck patrol wisps (>1h in_progress)",
|
||||
},
|
||||
stuckThreshold: 1 * time.Hour,
|
||||
stuckThreshold: DefaultStuckThreshold,
|
||||
}
|
||||
}
|
||||
|
||||
// loadStuckThreshold loads the stuck threshold from the Deacon's role bead.
|
||||
// Returns the default if no config exists.
|
||||
func loadStuckThreshold(townRoot string) time.Duration {
|
||||
bd := beads.NewWithBeadsDir(townRoot, beads.ResolveBeadsDir(townRoot))
|
||||
roleConfig, err := bd.GetRoleConfig(beads.RoleBeadIDTown("deacon"))
|
||||
if err != nil || roleConfig == nil || roleConfig.StuckThreshold == "" {
|
||||
return DefaultStuckThreshold
|
||||
}
|
||||
if d, err := time.ParseDuration(roleConfig.StuckThreshold); err == nil {
|
||||
return d
|
||||
}
|
||||
return DefaultStuckThreshold
|
||||
}
|
||||
|
||||
// Run checks for stuck patrol wisps.
|
||||
func (c *PatrolNotStuckCheck) Run(ctx *CheckContext) *CheckResult {
|
||||
// Load threshold from role bead (ZFC: agent-controlled)
|
||||
c.stuckThreshold = loadStuckThreshold(ctx.TownRoot)
|
||||
|
||||
rigs, err := discoverRigs(ctx.TownRoot)
|
||||
if err != nil {
|
||||
return &CheckResult{
|
||||
@@ -261,11 +282,12 @@ func (c *PatrolNotStuckCheck) Run(ctx *CheckContext) *CheckResult {
|
||||
stuckWisps = append(stuckWisps, stuck...)
|
||||
}
|
||||
|
||||
thresholdStr := c.stuckThreshold.String()
|
||||
if len(stuckWisps) > 0 {
|
||||
return &CheckResult{
|
||||
Name: c.Name(),
|
||||
Status: StatusWarning,
|
||||
Message: fmt.Sprintf("%d stuck patrol wisp(s) found (>1h)", len(stuckWisps)),
|
||||
Message: fmt.Sprintf("%d stuck patrol wisp(s) found (>%s)", len(stuckWisps), thresholdStr),
|
||||
Details: stuckWisps,
|
||||
FixHint: "Manual review required - wisps may need to be burned or sessions restarted",
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user