fix(zfc): move stuck detection thresholds to agent-controlled config

Per ZFC principle: 'Let agents decide thresholds. Stuck is a judgment call.'

Changes:
- Add health check threshold fields to RoleConfig (ping_timeout,
  consecutive_failures, kill_cooldown, stuck_threshold)
- Add LoadStuckConfig() to read thresholds from hq-deacon-role bead
- Update patrol_check.go to use configurable stuck threshold
- Defaults remain as fallbacks when no role bead config exists

Agents can now configure their stuck detection by adding fields to their
role bead, e.g.:
  ping_timeout: 45s
  consecutive_failures: 5
  kill_cooldown: 10m
  stuck_threshold: 2h

Fixes: hq-2355b
This commit is contained in:
gastown/crew/gus
2026-01-09 22:07:35 -08:00
committed by Steve Yegge
parent 0f633be4b1
commit e0858096f6
3 changed files with 103 additions and 5 deletions

View File

@@ -219,6 +219,10 @@ type PatrolNotStuckCheck struct {
stuckThreshold time.Duration
}
// DefaultStuckThreshold is the fallback when no role bead config exists.
// Per ZFC: "Let agents decide thresholds. 'Stuck' is a judgment call."
const DefaultStuckThreshold = 1 * time.Hour
// NewPatrolNotStuckCheck creates a new patrol not stuck check.
func NewPatrolNotStuckCheck() *PatrolNotStuckCheck {
return &PatrolNotStuckCheck{
@@ -226,12 +230,29 @@ func NewPatrolNotStuckCheck() *PatrolNotStuckCheck {
CheckName: "patrol-not-stuck",
CheckDescription: "Check for stuck patrol wisps (>1h in_progress)",
},
stuckThreshold: 1 * time.Hour,
stuckThreshold: DefaultStuckThreshold,
}
}
// loadStuckThreshold loads the stuck threshold from the Deacon's role bead.
// Returns the default if no config exists.
func loadStuckThreshold(townRoot string) time.Duration {
bd := beads.NewWithBeadsDir(townRoot, beads.ResolveBeadsDir(townRoot))
roleConfig, err := bd.GetRoleConfig(beads.RoleBeadIDTown("deacon"))
if err != nil || roleConfig == nil || roleConfig.StuckThreshold == "" {
return DefaultStuckThreshold
}
if d, err := time.ParseDuration(roleConfig.StuckThreshold); err == nil {
return d
}
return DefaultStuckThreshold
}
// Run checks for stuck patrol wisps.
func (c *PatrolNotStuckCheck) Run(ctx *CheckContext) *CheckResult {
// Load threshold from role bead (ZFC: agent-controlled)
c.stuckThreshold = loadStuckThreshold(ctx.TownRoot)
rigs, err := discoverRigs(ctx.TownRoot)
if err != nil {
return &CheckResult{
@@ -261,11 +282,12 @@ func (c *PatrolNotStuckCheck) Run(ctx *CheckContext) *CheckResult {
stuckWisps = append(stuckWisps, stuck...)
}
thresholdStr := c.stuckThreshold.String()
if len(stuckWisps) > 0 {
return &CheckResult{
Name: c.Name(),
Status: StatusWarning,
Message: fmt.Sprintf("%d stuck patrol wisp(s) found (>1h)", len(stuckWisps)),
Message: fmt.Sprintf("%d stuck patrol wisp(s) found (>%s)", len(stuckWisps), thresholdStr),
Details: stuckWisps,
FixHint: "Manual review required - wisps may need to be burned or sessions restarted",
}