fix: remove vestigial state.json files from agent directories

Agent directories (witness/, refinery/, mayor/) contained state.json files
with last_active timestamps that were never updated, making them stale and
misleading. This change removes:

- initAgentStates function that created vestigial state.json files
- AgentState type and related Load/Save functions from config package
- MayorStateValidCheck from doctor checks
- requesting_* lifecycle verification (dead code - flags were never set)
- FileStateJSON constant and MayorStatePath function

Kept intact:
- daemon/state.json (actively used for daemon runtime state)
- crew/<name>/state.json (operational CrewWorker metadata)
- Agent state tracking via beads (the ZFC-compliant approach)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
splendid
2026-01-03 21:20:11 -08:00
committed by Steve Yegge
parent 60ecf1ff76
commit acd2565a5b
13 changed files with 23 additions and 576 deletions

View File

@@ -3,23 +3,15 @@ package doctor
import (
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/steveyegge/gastown/internal/session"
)
// LifecycleHygieneCheck detects and cleans up stale lifecycle state.
// This can happen when:
// - Lifecycle messages weren't properly deleted after processing
// - Agent state.json has stuck requesting_* flags
// - Session was manually killed without clearing state
// This can happen when lifecycle messages weren't properly deleted after processing.
type LifecycleHygieneCheck struct {
FixableCheck
staleMessages []staleMessage
stuckStateFiles []stuckState
staleMessages []staleMessage
}
type staleMessage struct {
@@ -28,19 +20,13 @@ type staleMessage struct {
From string
}
type stuckState struct {
stateFile string
identity string
flag string
}
// NewLifecycleHygieneCheck creates a new lifecycle hygiene check.
func NewLifecycleHygieneCheck() *LifecycleHygieneCheck {
return &LifecycleHygieneCheck{
FixableCheck: FixableCheck{
BaseCheck: BaseCheck{
CheckName: "lifecycle-hygiene",
CheckDescription: "Check for stale lifecycle messages and stuck state flags",
CheckDescription: "Check for stale lifecycle messages",
},
},
}
@@ -49,36 +35,21 @@ func NewLifecycleHygieneCheck() *LifecycleHygieneCheck {
// Run checks for stale lifecycle state.
func (c *LifecycleHygieneCheck) Run(ctx *CheckContext) *CheckResult {
c.staleMessages = nil
c.stuckStateFiles = nil
var details []string
// Check for stale lifecycle messages in deacon inbox
staleCount := c.checkDeaconInbox(ctx)
if staleCount > 0 {
details = append(details, fmt.Sprintf("%d stale lifecycle message(s) in deacon inbox", staleCount))
}
// Check for stuck requesting_* flags in state files
stuckCount := c.checkStateFiles(ctx)
if stuckCount > 0 {
details = append(details, fmt.Sprintf("%d agent(s) with stuck requesting_* flags", stuckCount))
}
total := staleCount + stuckCount
if total == 0 {
if staleCount == 0 {
return &CheckResult{
Name: c.Name(),
Status: StatusOK,
Message: "No stale lifecycle state found",
Message: "No stale lifecycle messages found",
}
}
return &CheckResult{
Name: c.Name(),
Status: StatusWarning,
Message: fmt.Sprintf("Found %d lifecycle hygiene issue(s)", total),
Details: details,
Message: fmt.Sprintf("Found %d stale lifecycle message(s) in deacon inbox", staleCount),
FixHint: "Run 'gt doctor --fix' to clean up",
}
}
@@ -121,139 +92,7 @@ func (c *LifecycleHygieneCheck) checkDeaconInbox(ctx *CheckContext) int {
return len(c.staleMessages)
}
// checkStateFiles looks for stuck requesting_* flags in state.json files.
func (c *LifecycleHygieneCheck) checkStateFiles(ctx *CheckContext) int {
stateFiles := c.findStateFiles(ctx.TownRoot)
for _, sf := range stateFiles {
data, err := os.ReadFile(sf.path)
if err != nil {
continue
}
var state map[string]interface{}
if err := json.Unmarshal(data, &state); err != nil {
continue
}
// Check for any requesting_* flags
for key, val := range state {
if strings.HasPrefix(key, "requesting_") {
if boolVal, ok := val.(bool); ok && boolVal {
// Found a stuck flag - verify session is actually healthy
if c.isSessionHealthy(sf.identity, ctx.TownRoot) {
c.stuckStateFiles = append(c.stuckStateFiles, stuckState{
stateFile: sf.path,
identity: sf.identity,
flag: key,
})
}
}
}
}
}
return len(c.stuckStateFiles)
}
type stateFileInfo struct {
path string
identity string
}
// findStateFiles locates all state.json files for agents.
func (c *LifecycleHygieneCheck) findStateFiles(townRoot string) []stateFileInfo {
var files []stateFileInfo
// Mayor state
mayorState := filepath.Join(townRoot, "mayor", "state.json")
if _, err := os.Stat(mayorState); err == nil {
files = append(files, stateFileInfo{path: mayorState, identity: "mayor"})
}
// Scan rigs for witness, refinery, and crew state files
entries, err := os.ReadDir(townRoot)
if err != nil {
return files
}
for _, entry := range entries {
if !entry.IsDir() || strings.HasPrefix(entry.Name(), ".") || entry.Name() == "mayor" {
continue
}
rigName := entry.Name()
rigPath := filepath.Join(townRoot, rigName)
// Witness state
witnessState := filepath.Join(rigPath, "witness", "state.json")
if _, err := os.Stat(witnessState); err == nil {
files = append(files, stateFileInfo{
path: witnessState,
identity: rigName + "-witness",
})
}
// Refinery state
refineryState := filepath.Join(rigPath, "refinery", "state.json")
if _, err := os.Stat(refineryState); err == nil {
files = append(files, stateFileInfo{
path: refineryState,
identity: rigName + "-refinery",
})
}
// Crew state files
crewPath := filepath.Join(rigPath, "crew")
crewEntries, err := os.ReadDir(crewPath)
if err != nil {
continue
}
for _, crew := range crewEntries {
if !crew.IsDir() || strings.HasPrefix(crew.Name(), ".") {
continue
}
crewState := filepath.Join(crewPath, crew.Name(), "state.json")
if _, err := os.Stat(crewState); err == nil {
files = append(files, stateFileInfo{
path: crewState,
identity: rigName + "-crew-" + crew.Name(),
})
}
}
}
return files
}
// isSessionHealthy checks if the tmux session for this identity exists and is running.
func (c *LifecycleHygieneCheck) isSessionHealthy(identity, _ string) bool {
sessionName := identityToSessionName(identity)
if sessionName == "" {
return false
}
// Check if session exists
cmd := exec.Command("tmux", "has-session", "-t", sessionName)
return cmd.Run() == nil
}
// identityToSessionName converts an identity to its tmux session name.
func identityToSessionName(identity string) string {
switch identity {
case "mayor":
return session.MayorSessionName()
default:
if strings.HasSuffix(identity, "-witness") ||
strings.HasSuffix(identity, "-refinery") ||
strings.Contains(identity, "-crew-") {
return "gt-" + identity
}
return ""
}
}
// Fix cleans up stale lifecycle state.
// Fix cleans up stale lifecycle messages.
func (c *LifecycleHygieneCheck) Fix(ctx *CheckContext) error {
var errors []string
@@ -266,39 +105,8 @@ func (c *LifecycleHygieneCheck) Fix(ctx *CheckContext) error {
}
}
// Clear stuck requesting_* flags
for _, stuck := range c.stuckStateFiles {
if err := c.clearRequestingFlag(stuck); err != nil {
errors = append(errors, fmt.Sprintf("failed to clear %s in %s: %v", stuck.flag, stuck.identity, err))
}
}
if len(errors) > 0 {
return fmt.Errorf("%s", strings.Join(errors, "; "))
}
return nil
}
// clearRequestingFlag removes the stuck requesting_* flag from a state file.
func (c *LifecycleHygieneCheck) clearRequestingFlag(stuck stuckState) error {
data, err := os.ReadFile(stuck.stateFile)
if err != nil {
return err
}
var state map[string]interface{}
if err := json.Unmarshal(data, &state); err != nil {
return err
}
// Remove the requesting flag and any associated timestamp
delete(state, stuck.flag)
delete(state, "requesting_time")
newData, err := json.MarshalIndent(state, "", " ")
if err != nil {
return err
}
return os.WriteFile(stuck.stateFile, newData, 0644)
}