fix: remove vestigial state.json files from agent directories
Agent directories (witness/, refinery/, mayor/) contained state.json files with last_active timestamps that were never updated, making them stale and misleading. This change removes: - initAgentStates function that created vestigial state.json files - AgentState type and related Load/Save functions from config package - MayorStateValidCheck from doctor checks - requesting_* lifecycle verification (dead code - flags were never set) - FileStateJSON constant and MayorStatePath function Kept intact: - daemon/state.json (actively used for daemon runtime state) - crew/<name>/state.json (operational CrewWorker metadata) - Agent state tracking via beads (the ZFC-compliant approach) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -30,7 +30,6 @@ Workspace checks:
|
|||||||
- rigs-registry-exists Check mayor/rigs.json exists (fixable)
|
- rigs-registry-exists Check mayor/rigs.json exists (fixable)
|
||||||
- rigs-registry-valid Check registered rigs exist (fixable)
|
- rigs-registry-valid Check registered rigs exist (fixable)
|
||||||
- mayor-exists Check mayor/ directory structure
|
- mayor-exists Check mayor/ directory structure
|
||||||
- mayor-state-valid Check mayor/state.json is valid (fixable)
|
|
||||||
|
|
||||||
Infrastructure checks:
|
Infrastructure checks:
|
||||||
- daemon Check if daemon is running (fixable)
|
- daemon Check if daemon is running (fixable)
|
||||||
|
|||||||
@@ -171,17 +171,6 @@ func runInstall(cmd *cobra.Command, args []string) error {
|
|||||||
}
|
}
|
||||||
fmt.Printf(" ✓ Created mayor/rigs.json\n")
|
fmt.Printf(" ✓ Created mayor/rigs.json\n")
|
||||||
|
|
||||||
// Create mayor state.json
|
|
||||||
mayorState := &config.AgentState{
|
|
||||||
Role: "mayor",
|
|
||||||
LastActive: time.Now(),
|
|
||||||
}
|
|
||||||
statePath := filepath.Join(mayorDir, "state.json")
|
|
||||||
if err := config.SaveAgentState(statePath, mayorState); err != nil {
|
|
||||||
return fmt.Errorf("writing mayor state: %w", err)
|
|
||||||
}
|
|
||||||
fmt.Printf(" ✓ Created mayor/state.json\n")
|
|
||||||
|
|
||||||
// Create Mayor CLAUDE.md at HQ root (Mayor runs from there)
|
// Create Mayor CLAUDE.md at HQ root (Mayor runs from there)
|
||||||
if err := createMayorCLAUDEmd(absPath, absPath); err != nil {
|
if err := createMayorCLAUDEmd(absPath, absPath); err != nil {
|
||||||
fmt.Printf(" %s Could not create CLAUDE.md: %v\n", style.Dim.Render("⚠"), err)
|
fmt.Printf(" %s Could not create CLAUDE.md: %v\n", style.Dim.Render("⚠"), err)
|
||||||
|
|||||||
@@ -3,7 +3,6 @@
|
|||||||
package cmd
|
package cmd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -61,22 +60,6 @@ func TestInstallCreatesCorrectStructure(t *testing.T) {
|
|||||||
t.Errorf("rigs.json should be empty, got %d rigs", len(rigsConfig.Rigs))
|
t.Errorf("rigs.json should be empty, got %d rigs", len(rigsConfig.Rigs))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify mayor/state.json
|
|
||||||
statePath := filepath.Join(hqPath, "mayor", "state.json")
|
|
||||||
assertFileExists(t, statePath, "mayor/state.json")
|
|
||||||
|
|
||||||
stateData, err := os.ReadFile(statePath)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to read state.json: %v", err)
|
|
||||||
}
|
|
||||||
var state map[string]interface{}
|
|
||||||
if err := json.Unmarshal(stateData, &state); err != nil {
|
|
||||||
t.Fatalf("failed to parse state.json: %v", err)
|
|
||||||
}
|
|
||||||
if state["role"] != "mayor" {
|
|
||||||
t.Errorf("state.json role = %q, want %q", state["role"], "mayor")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify CLAUDE.md exists
|
// Verify CLAUDE.md exists
|
||||||
claudePath := filepath.Join(hqPath, "CLAUDE.md")
|
claudePath := filepath.Join(hqPath, "CLAUDE.md")
|
||||||
assertFileExists(t, claudePath, "CLAUDE.md")
|
assertFileExists(t, claudePath, "CLAUDE.md")
|
||||||
|
|||||||
@@ -542,17 +542,20 @@ func TestRigAddCreatesAgentDirs(t *testing.T) {
|
|||||||
|
|
||||||
rigPath := filepath.Join(townRoot, "agenttest")
|
rigPath := filepath.Join(townRoot, "agenttest")
|
||||||
|
|
||||||
// Verify agent state files exist
|
// Verify agent directories exist (state.json files are no longer created)
|
||||||
expectedStateFiles := []string{
|
expectedDirs := []string{
|
||||||
"witness/state.json",
|
"witness",
|
||||||
"refinery/state.json",
|
"refinery",
|
||||||
"mayor/state.json",
|
"mayor",
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, stateFile := range expectedStateFiles {
|
for _, dir := range expectedDirs {
|
||||||
path := filepath.Join(rigPath, stateFile)
|
path := filepath.Join(rigPath, dir)
|
||||||
if _, err := os.Stat(path); err != nil {
|
info, err := os.Stat(path)
|
||||||
t.Errorf("expected state file %s to exist: %v", stateFile, err)
|
if err != nil {
|
||||||
|
t.Errorf("expected directory %s to exist: %v", dir, err)
|
||||||
|
} else if !info.IsDir() {
|
||||||
|
t.Errorf("expected %s to be a directory", dir)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -113,50 +113,6 @@ func SaveRigsConfig(path string, config *RigsConfig) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// LoadAgentState loads an agent state file.
|
|
||||||
func LoadAgentState(path string) (*AgentState, error) {
|
|
||||||
data, err := os.ReadFile(path) //nolint:gosec // G304: path is constructed internally, not from user input
|
|
||||||
if err != nil {
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
return nil, fmt.Errorf("%w: %s", ErrNotFound, path)
|
|
||||||
}
|
|
||||||
return nil, fmt.Errorf("reading state: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var state AgentState
|
|
||||||
if err := json.Unmarshal(data, &state); err != nil {
|
|
||||||
return nil, fmt.Errorf("parsing state: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := validateAgentState(&state); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return &state, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// SaveAgentState saves an agent state to a file.
|
|
||||||
func SaveAgentState(path string, state *AgentState) error {
|
|
||||||
if err := validateAgentState(state); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
|
|
||||||
return fmt.Errorf("creating directory: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := json.MarshalIndent(state, "", " ")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("encoding state: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := os.WriteFile(path, data, 0644); err != nil { //nolint:gosec // G306: state files don't contain secrets
|
|
||||||
return fmt.Errorf("writing state: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// validateTownConfig validates a TownConfig.
|
// validateTownConfig validates a TownConfig.
|
||||||
func validateTownConfig(c *TownConfig) error {
|
func validateTownConfig(c *TownConfig) error {
|
||||||
if c.Type != "town" && c.Type != "" {
|
if c.Type != "town" && c.Type != "" {
|
||||||
@@ -182,14 +138,6 @@ func validateRigsConfig(c *RigsConfig) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// validateAgentState validates an AgentState.
|
|
||||||
func validateAgentState(s *AgentState) error {
|
|
||||||
if s.Role == "" {
|
|
||||||
return fmt.Errorf("%w: role", ErrMissingField)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// LoadRigConfig loads and validates a rig configuration file.
|
// LoadRigConfig loads and validates a rig configuration file.
|
||||||
func LoadRigConfig(path string) (*RigConfig, error) {
|
func LoadRigConfig(path string) (*RigConfig, error) {
|
||||||
data, err := os.ReadFile(path) //nolint:gosec // G304: path is constructed internally, not from user input
|
data, err := os.ReadFile(path) //nolint:gosec // G304: path is constructed internally, not from user input
|
||||||
|
|||||||
@@ -80,36 +80,6 @@ func TestRigsConfigRoundTrip(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAgentStateRoundTrip(t *testing.T) {
|
|
||||||
dir := t.TempDir()
|
|
||||||
path := filepath.Join(dir, "state.json")
|
|
||||||
|
|
||||||
original := &AgentState{
|
|
||||||
Role: "mayor",
|
|
||||||
LastActive: time.Now().Truncate(time.Second),
|
|
||||||
Session: "abc123",
|
|
||||||
Extra: map[string]any{
|
|
||||||
"custom": "value",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := SaveAgentState(path, original); err != nil {
|
|
||||||
t.Fatalf("SaveAgentState: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
loaded, err := LoadAgentState(path)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("LoadAgentState: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if loaded.Role != original.Role {
|
|
||||||
t.Errorf("Role = %q, want %q", loaded.Role, original.Role)
|
|
||||||
}
|
|
||||||
if loaded.Session != original.Session {
|
|
||||||
t.Errorf("Session = %q, want %q", loaded.Session, original.Session)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLoadTownConfigNotFound(t *testing.T) {
|
func TestLoadTownConfigNotFound(t *testing.T) {
|
||||||
_, err := LoadTownConfig("/nonexistent/path.json")
|
_, err := LoadTownConfig("/nonexistent/path.json")
|
||||||
if err == nil {
|
if err == nil {
|
||||||
@@ -129,12 +99,6 @@ func TestValidationErrors(t *testing.T) {
|
|||||||
if err := validateTownConfig(tc); err == nil {
|
if err := validateTownConfig(tc); err == nil {
|
||||||
t.Error("expected error for wrong type")
|
t.Error("expected error for wrong type")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Missing role
|
|
||||||
as := &AgentState{}
|
|
||||||
if err := validateAgentState(as); err == nil {
|
|
||||||
t.Error("expected error for missing role")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRigConfigRoundTrip(t *testing.T) {
|
func TestRigConfigRoundTrip(t *testing.T) {
|
||||||
|
|||||||
@@ -65,14 +65,6 @@ type BeadsConfig struct {
|
|||||||
Prefix string `json:"prefix"` // issue prefix
|
Prefix string `json:"prefix"` // issue prefix
|
||||||
}
|
}
|
||||||
|
|
||||||
// AgentState represents an agent's current state (*/state.json).
|
|
||||||
type AgentState struct {
|
|
||||||
Role string `json:"role"` // "mayor", "witness", etc.
|
|
||||||
LastActive time.Time `json:"last_active"`
|
|
||||||
Session string `json:"session,omitempty"`
|
|
||||||
Extra map[string]any `json:"extra,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// CurrentTownVersion is the current schema version for TownConfig.
|
// CurrentTownVersion is the current schema version for TownConfig.
|
||||||
// Version 2: Added Owner and PublicName fields for federation identity.
|
// Version 2: Added Owner and PublicName fields for federation identity.
|
||||||
const CurrentTownVersion = 2
|
const CurrentTownVersion = 2
|
||||||
|
|||||||
@@ -64,9 +64,6 @@ const (
|
|||||||
// FileTownJSON is the town configuration file in mayor/.
|
// FileTownJSON is the town configuration file in mayor/.
|
||||||
FileTownJSON = "town.json"
|
FileTownJSON = "town.json"
|
||||||
|
|
||||||
// FileStateJSON is the agent state file.
|
|
||||||
FileStateJSON = "state.json"
|
|
||||||
|
|
||||||
// FileConfigJSON is the general config file.
|
// FileConfigJSON is the general config file.
|
||||||
FileConfigJSON = "config.json"
|
FileConfigJSON = "config.json"
|
||||||
|
|
||||||
@@ -176,11 +173,6 @@ func MayorTownPath(townRoot string) string {
|
|||||||
return townRoot + "/" + DirMayor + "/" + FileTownJSON
|
return townRoot + "/" + DirMayor + "/" + FileTownJSON
|
||||||
}
|
}
|
||||||
|
|
||||||
// MayorStatePath returns the path to mayor state.json within a town root.
|
|
||||||
func MayorStatePath(townRoot string) string {
|
|
||||||
return townRoot + "/" + DirMayor + "/" + FileStateJSON
|
|
||||||
}
|
|
||||||
|
|
||||||
// RigMayorPath returns the path to mayor/rig within a rig.
|
// RigMayorPath returns the path to mayor/rig within a rig.
|
||||||
func RigMayorPath(rigPath string) string {
|
func RigMayorPath(rigPath string) string {
|
||||||
return rigPath + "/" + DirMayor + "/" + DirRig
|
return rigPath + "/" + DirMayor + "/" + DirRig
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package daemon
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -160,11 +159,6 @@ func (d *Daemon) executeLifecycleAction(request *LifecycleRequest) error {
|
|||||||
|
|
||||||
d.logger.Printf("Executing %s for session %s", request.Action, sessionName)
|
d.logger.Printf("Executing %s for session %s", request.Action, sessionName)
|
||||||
|
|
||||||
// Verify agent state shows requesting_<action>=true before killing
|
|
||||||
if err := d.verifyAgentRequestingState(request.From, request.Action); err != nil {
|
|
||||||
return fmt.Errorf("state verification failed: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check agent bead state (ZFC: trust what agent reports) - gt-39ttg
|
// Check agent bead state (ZFC: trust what agent reports) - gt-39ttg
|
||||||
agentBeadID := d.identityToAgentBeadID(request.From)
|
agentBeadID := d.identityToAgentBeadID(request.From)
|
||||||
if agentBeadID != "" {
|
if agentBeadID != "" {
|
||||||
@@ -206,11 +200,6 @@ func (d *Daemon) executeLifecycleAction(request *LifecycleRequest) error {
|
|||||||
return fmt.Errorf("restarting session: %w", err)
|
return fmt.Errorf("restarting session: %w", err)
|
||||||
}
|
}
|
||||||
d.logger.Printf("Restarted session %s", sessionName)
|
d.logger.Printf("Restarted session %s", sessionName)
|
||||||
|
|
||||||
// Clear the requesting state so we don't cycle again
|
|
||||||
if err := d.clearAgentRequestingState(request.From, request.Action); err != nil {
|
|
||||||
d.logger.Printf("Warning: failed to clear agent state: %v", err)
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
default:
|
default:
|
||||||
@@ -517,115 +506,6 @@ func (d *Daemon) closeMessage(id string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// verifyAgentRequestingState verifies that the agent has set requesting_<action>=true
|
|
||||||
// in its state.json before we kill its session. This ensures the agent is actually
|
|
||||||
// ready to be killed and has completed its pre-shutdown tasks (git clean, handoff mail, etc).
|
|
||||||
func (d *Daemon) verifyAgentRequestingState(identity string, action LifecycleAction) error {
|
|
||||||
stateFile := d.identityToStateFile(identity)
|
|
||||||
if stateFile == "" {
|
|
||||||
// If we can't determine state file, log warning but allow action
|
|
||||||
// This maintains backwards compatibility with agents that don't support state files yet
|
|
||||||
d.logger.Printf("Warning: cannot determine state file for %s, skipping verification", identity)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := os.ReadFile(stateFile)
|
|
||||||
if err != nil {
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
return fmt.Errorf("agent state file not found: %s (agent must set requesting_%s=true before lifecycle request)", stateFile, action)
|
|
||||||
}
|
|
||||||
return fmt.Errorf("reading agent state: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var state map[string]interface{}
|
|
||||||
if err := json.Unmarshal(data, &state); err != nil {
|
|
||||||
return fmt.Errorf("parsing agent state: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for requesting_<action>=true
|
|
||||||
key := "requesting_" + string(action)
|
|
||||||
val, ok := state[key]
|
|
||||||
if !ok {
|
|
||||||
return fmt.Errorf("agent state missing %s field (agent must set this before lifecycle request)", key)
|
|
||||||
}
|
|
||||||
|
|
||||||
requesting, ok := val.(bool)
|
|
||||||
if !ok || !requesting {
|
|
||||||
return fmt.Errorf("agent state %s is not true (got: %v)", key, val)
|
|
||||||
}
|
|
||||||
|
|
||||||
d.logger.Printf("Verified agent %s has %s=true", identity, key)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// clearAgentRequestingState clears the requesting_<action>=true flag after
|
|
||||||
// successfully completing a lifecycle action. This prevents the daemon from
|
|
||||||
// repeatedly cycling the same session.
|
|
||||||
func (d *Daemon) clearAgentRequestingState(identity string, action LifecycleAction) error {
|
|
||||||
stateFile := d.identityToStateFile(identity)
|
|
||||||
if stateFile == "" {
|
|
||||||
return fmt.Errorf("cannot determine state file for %s", identity)
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := os.ReadFile(stateFile)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("reading state file: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var state map[string]interface{}
|
|
||||||
if err := json.Unmarshal(data, &state); err != nil {
|
|
||||||
return fmt.Errorf("parsing state: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove the requesting_<action> key
|
|
||||||
key := "requesting_" + string(action)
|
|
||||||
delete(state, key)
|
|
||||||
delete(state, "requesting_time") // Also clean up the timestamp
|
|
||||||
|
|
||||||
// Write back
|
|
||||||
newData, err := json.MarshalIndent(state, "", " ")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("marshaling state: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := os.WriteFile(stateFile, newData, 0644); err != nil {
|
|
||||||
return fmt.Errorf("writing state file: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
d.logger.Printf("Cleared %s from agent %s state", key, identity)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// identityToStateFile maps an agent identity to its state.json file path.
|
|
||||||
// Uses parseIdentity to extract components, then derives state file location.
|
|
||||||
func (d *Daemon) identityToStateFile(identity string) string {
|
|
||||||
parsed, err := parseIdentity(identity)
|
|
||||||
if err != nil {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// Derive state file path based on working directory
|
|
||||||
workDir := d.getWorkDir(nil, parsed) // Use defaults, not role bead config
|
|
||||||
if workDir == "" {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// For mayor and deacon, state file is in a subdirectory
|
|
||||||
switch parsed.RoleType {
|
|
||||||
case "mayor":
|
|
||||||
return filepath.Join(d.config.TownRoot, "mayor", "state.json")
|
|
||||||
case "deacon":
|
|
||||||
return filepath.Join(d.config.TownRoot, "deacon", "state.json")
|
|
||||||
case "witness":
|
|
||||||
return filepath.Join(d.config.TownRoot, parsed.RigName, "witness", "state.json")
|
|
||||||
case "refinery":
|
|
||||||
return filepath.Join(d.config.TownRoot, parsed.RigName, "refinery", "state.json")
|
|
||||||
default:
|
|
||||||
// For crew and polecat, state file is in their working directory
|
|
||||||
return filepath.Join(workDir, "state.json")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// AgentBeadInfo represents the parsed fields from an agent bead.
|
// AgentBeadInfo represents the parsed fields from an agent bead.
|
||||||
type AgentBeadInfo struct {
|
type AgentBeadInfo struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
|
|||||||
@@ -3,23 +3,15 @@ package doctor
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/steveyegge/gastown/internal/session"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// LifecycleHygieneCheck detects and cleans up stale lifecycle state.
|
// LifecycleHygieneCheck detects and cleans up stale lifecycle state.
|
||||||
// This can happen when:
|
// This can happen when lifecycle messages weren't properly deleted after processing.
|
||||||
// - Lifecycle messages weren't properly deleted after processing
|
|
||||||
// - Agent state.json has stuck requesting_* flags
|
|
||||||
// - Session was manually killed without clearing state
|
|
||||||
type LifecycleHygieneCheck struct {
|
type LifecycleHygieneCheck struct {
|
||||||
FixableCheck
|
FixableCheck
|
||||||
staleMessages []staleMessage
|
staleMessages []staleMessage
|
||||||
stuckStateFiles []stuckState
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type staleMessage struct {
|
type staleMessage struct {
|
||||||
@@ -28,19 +20,13 @@ type staleMessage struct {
|
|||||||
From string
|
From string
|
||||||
}
|
}
|
||||||
|
|
||||||
type stuckState struct {
|
|
||||||
stateFile string
|
|
||||||
identity string
|
|
||||||
flag string
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewLifecycleHygieneCheck creates a new lifecycle hygiene check.
|
// NewLifecycleHygieneCheck creates a new lifecycle hygiene check.
|
||||||
func NewLifecycleHygieneCheck() *LifecycleHygieneCheck {
|
func NewLifecycleHygieneCheck() *LifecycleHygieneCheck {
|
||||||
return &LifecycleHygieneCheck{
|
return &LifecycleHygieneCheck{
|
||||||
FixableCheck: FixableCheck{
|
FixableCheck: FixableCheck{
|
||||||
BaseCheck: BaseCheck{
|
BaseCheck: BaseCheck{
|
||||||
CheckName: "lifecycle-hygiene",
|
CheckName: "lifecycle-hygiene",
|
||||||
CheckDescription: "Check for stale lifecycle messages and stuck state flags",
|
CheckDescription: "Check for stale lifecycle messages",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -49,36 +35,21 @@ func NewLifecycleHygieneCheck() *LifecycleHygieneCheck {
|
|||||||
// Run checks for stale lifecycle state.
|
// Run checks for stale lifecycle state.
|
||||||
func (c *LifecycleHygieneCheck) Run(ctx *CheckContext) *CheckResult {
|
func (c *LifecycleHygieneCheck) Run(ctx *CheckContext) *CheckResult {
|
||||||
c.staleMessages = nil
|
c.staleMessages = nil
|
||||||
c.stuckStateFiles = nil
|
|
||||||
|
|
||||||
var details []string
|
|
||||||
|
|
||||||
// Check for stale lifecycle messages in deacon inbox
|
// Check for stale lifecycle messages in deacon inbox
|
||||||
staleCount := c.checkDeaconInbox(ctx)
|
staleCount := c.checkDeaconInbox(ctx)
|
||||||
if staleCount > 0 {
|
if staleCount == 0 {
|
||||||
details = append(details, fmt.Sprintf("%d stale lifecycle message(s) in deacon inbox", staleCount))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for stuck requesting_* flags in state files
|
|
||||||
stuckCount := c.checkStateFiles(ctx)
|
|
||||||
if stuckCount > 0 {
|
|
||||||
details = append(details, fmt.Sprintf("%d agent(s) with stuck requesting_* flags", stuckCount))
|
|
||||||
}
|
|
||||||
|
|
||||||
total := staleCount + stuckCount
|
|
||||||
if total == 0 {
|
|
||||||
return &CheckResult{
|
return &CheckResult{
|
||||||
Name: c.Name(),
|
Name: c.Name(),
|
||||||
Status: StatusOK,
|
Status: StatusOK,
|
||||||
Message: "No stale lifecycle state found",
|
Message: "No stale lifecycle messages found",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return &CheckResult{
|
return &CheckResult{
|
||||||
Name: c.Name(),
|
Name: c.Name(),
|
||||||
Status: StatusWarning,
|
Status: StatusWarning,
|
||||||
Message: fmt.Sprintf("Found %d lifecycle hygiene issue(s)", total),
|
Message: fmt.Sprintf("Found %d stale lifecycle message(s) in deacon inbox", staleCount),
|
||||||
Details: details,
|
|
||||||
FixHint: "Run 'gt doctor --fix' to clean up",
|
FixHint: "Run 'gt doctor --fix' to clean up",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -121,139 +92,7 @@ func (c *LifecycleHygieneCheck) checkDeaconInbox(ctx *CheckContext) int {
|
|||||||
return len(c.staleMessages)
|
return len(c.staleMessages)
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkStateFiles looks for stuck requesting_* flags in state.json files.
|
// Fix cleans up stale lifecycle messages.
|
||||||
func (c *LifecycleHygieneCheck) checkStateFiles(ctx *CheckContext) int {
|
|
||||||
stateFiles := c.findStateFiles(ctx.TownRoot)
|
|
||||||
|
|
||||||
for _, sf := range stateFiles {
|
|
||||||
data, err := os.ReadFile(sf.path)
|
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
var state map[string]interface{}
|
|
||||||
if err := json.Unmarshal(data, &state); err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for any requesting_* flags
|
|
||||||
for key, val := range state {
|
|
||||||
if strings.HasPrefix(key, "requesting_") {
|
|
||||||
if boolVal, ok := val.(bool); ok && boolVal {
|
|
||||||
// Found a stuck flag - verify session is actually healthy
|
|
||||||
if c.isSessionHealthy(sf.identity, ctx.TownRoot) {
|
|
||||||
c.stuckStateFiles = append(c.stuckStateFiles, stuckState{
|
|
||||||
stateFile: sf.path,
|
|
||||||
identity: sf.identity,
|
|
||||||
flag: key,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return len(c.stuckStateFiles)
|
|
||||||
}
|
|
||||||
|
|
||||||
type stateFileInfo struct {
|
|
||||||
path string
|
|
||||||
identity string
|
|
||||||
}
|
|
||||||
|
|
||||||
// findStateFiles locates all state.json files for agents.
|
|
||||||
func (c *LifecycleHygieneCheck) findStateFiles(townRoot string) []stateFileInfo {
|
|
||||||
var files []stateFileInfo
|
|
||||||
|
|
||||||
// Mayor state
|
|
||||||
mayorState := filepath.Join(townRoot, "mayor", "state.json")
|
|
||||||
if _, err := os.Stat(mayorState); err == nil {
|
|
||||||
files = append(files, stateFileInfo{path: mayorState, identity: "mayor"})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scan rigs for witness, refinery, and crew state files
|
|
||||||
entries, err := os.ReadDir(townRoot)
|
|
||||||
if err != nil {
|
|
||||||
return files
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, entry := range entries {
|
|
||||||
if !entry.IsDir() || strings.HasPrefix(entry.Name(), ".") || entry.Name() == "mayor" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
rigName := entry.Name()
|
|
||||||
rigPath := filepath.Join(townRoot, rigName)
|
|
||||||
|
|
||||||
// Witness state
|
|
||||||
witnessState := filepath.Join(rigPath, "witness", "state.json")
|
|
||||||
if _, err := os.Stat(witnessState); err == nil {
|
|
||||||
files = append(files, stateFileInfo{
|
|
||||||
path: witnessState,
|
|
||||||
identity: rigName + "-witness",
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Refinery state
|
|
||||||
refineryState := filepath.Join(rigPath, "refinery", "state.json")
|
|
||||||
if _, err := os.Stat(refineryState); err == nil {
|
|
||||||
files = append(files, stateFileInfo{
|
|
||||||
path: refineryState,
|
|
||||||
identity: rigName + "-refinery",
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Crew state files
|
|
||||||
crewPath := filepath.Join(rigPath, "crew")
|
|
||||||
crewEntries, err := os.ReadDir(crewPath)
|
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for _, crew := range crewEntries {
|
|
||||||
if !crew.IsDir() || strings.HasPrefix(crew.Name(), ".") {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
crewState := filepath.Join(crewPath, crew.Name(), "state.json")
|
|
||||||
if _, err := os.Stat(crewState); err == nil {
|
|
||||||
files = append(files, stateFileInfo{
|
|
||||||
path: crewState,
|
|
||||||
identity: rigName + "-crew-" + crew.Name(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return files
|
|
||||||
}
|
|
||||||
|
|
||||||
// isSessionHealthy checks if the tmux session for this identity exists and is running.
|
|
||||||
func (c *LifecycleHygieneCheck) isSessionHealthy(identity, _ string) bool {
|
|
||||||
sessionName := identityToSessionName(identity)
|
|
||||||
if sessionName == "" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if session exists
|
|
||||||
cmd := exec.Command("tmux", "has-session", "-t", sessionName)
|
|
||||||
return cmd.Run() == nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// identityToSessionName converts an identity to its tmux session name.
|
|
||||||
func identityToSessionName(identity string) string {
|
|
||||||
switch identity {
|
|
||||||
case "mayor":
|
|
||||||
return session.MayorSessionName()
|
|
||||||
default:
|
|
||||||
if strings.HasSuffix(identity, "-witness") ||
|
|
||||||
strings.HasSuffix(identity, "-refinery") ||
|
|
||||||
strings.Contains(identity, "-crew-") {
|
|
||||||
return "gt-" + identity
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fix cleans up stale lifecycle state.
|
|
||||||
func (c *LifecycleHygieneCheck) Fix(ctx *CheckContext) error {
|
func (c *LifecycleHygieneCheck) Fix(ctx *CheckContext) error {
|
||||||
var errors []string
|
var errors []string
|
||||||
|
|
||||||
@@ -266,39 +105,8 @@ func (c *LifecycleHygieneCheck) Fix(ctx *CheckContext) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear stuck requesting_* flags
|
|
||||||
for _, stuck := range c.stuckStateFiles {
|
|
||||||
if err := c.clearRequestingFlag(stuck); err != nil {
|
|
||||||
errors = append(errors, fmt.Sprintf("failed to clear %s in %s: %v", stuck.flag, stuck.identity, err))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(errors) > 0 {
|
if len(errors) > 0 {
|
||||||
return fmt.Errorf("%s", strings.Join(errors, "; "))
|
return fmt.Errorf("%s", strings.Join(errors, "; "))
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// clearRequestingFlag removes the stuck requesting_* flag from a state file.
|
|
||||||
func (c *LifecycleHygieneCheck) clearRequestingFlag(stuck stuckState) error {
|
|
||||||
data, err := os.ReadFile(stuck.stateFile)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
var state map[string]interface{}
|
|
||||||
if err := json.Unmarshal(data, &state); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove the requesting flag and any associated timestamp
|
|
||||||
delete(state, stuck.flag)
|
|
||||||
delete(state, "requesting_time")
|
|
||||||
|
|
||||||
newData, err := json.MarshalIndent(state, "", " ")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return os.WriteFile(stuck.stateFile, newData, 0644)
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -373,78 +373,6 @@ func (c *MayorExistsCheck) Run(ctx *CheckContext) *CheckResult {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// MayorStateValidCheck verifies mayor/state.json is valid JSON if it exists.
|
|
||||||
type MayorStateValidCheck struct {
|
|
||||||
FixableCheck
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewMayorStateValidCheck creates a new mayor state validation check.
|
|
||||||
func NewMayorStateValidCheck() *MayorStateValidCheck {
|
|
||||||
return &MayorStateValidCheck{
|
|
||||||
FixableCheck: FixableCheck{
|
|
||||||
BaseCheck: BaseCheck{
|
|
||||||
CheckName: "mayor-state-valid",
|
|
||||||
CheckDescription: "Check that mayor/state.json is valid if it exists",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run validates mayor/state.json if it exists.
|
|
||||||
func (c *MayorStateValidCheck) Run(ctx *CheckContext) *CheckResult {
|
|
||||||
statePath := filepath.Join(ctx.TownRoot, "mayor", "state.json")
|
|
||||||
|
|
||||||
data, err := os.ReadFile(statePath)
|
|
||||||
if err != nil {
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
return &CheckResult{
|
|
||||||
Name: c.Name(),
|
|
||||||
Status: StatusOK,
|
|
||||||
Message: "mayor/state.json not present (optional)",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return &CheckResult{
|
|
||||||
Name: c.Name(),
|
|
||||||
Status: StatusError,
|
|
||||||
Message: "Cannot read mayor/state.json",
|
|
||||||
Details: []string{err.Error()},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Just verify it's valid JSON
|
|
||||||
var state interface{}
|
|
||||||
if err := json.Unmarshal(data, &state); err != nil {
|
|
||||||
return &CheckResult{
|
|
||||||
Name: c.Name(),
|
|
||||||
Status: StatusError,
|
|
||||||
Message: "mayor/state.json is not valid JSON",
|
|
||||||
Details: []string{err.Error()},
|
|
||||||
FixHint: "Run 'gt doctor --fix' to reset to default state",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return &CheckResult{
|
|
||||||
Name: c.Name(),
|
|
||||||
Status: StatusOK,
|
|
||||||
Message: "mayor/state.json is valid JSON",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fix resets mayor/state.json to default empty state.
|
|
||||||
func (c *MayorStateValidCheck) Fix(ctx *CheckContext) error {
|
|
||||||
statePath := filepath.Join(ctx.TownRoot, "mayor", "state.json")
|
|
||||||
|
|
||||||
// Default empty state
|
|
||||||
defaultState := map[string]interface{}{}
|
|
||||||
|
|
||||||
data, err := json.MarshalIndent(defaultState, "", " ")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("marshaling default state: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return os.WriteFile(statePath, data, 0644)
|
|
||||||
}
|
|
||||||
|
|
||||||
// WorkspaceChecks returns all workspace-level health checks.
|
// WorkspaceChecks returns all workspace-level health checks.
|
||||||
func WorkspaceChecks() []Check {
|
func WorkspaceChecks() []Check {
|
||||||
return []Check{
|
return []Check{
|
||||||
@@ -453,6 +381,5 @@ func WorkspaceChecks() []Check {
|
|||||||
NewRigsRegistryExistsCheck(),
|
NewRigsRegistryExistsCheck(),
|
||||||
NewRigsRegistryValidCheck(),
|
NewRigsRegistryValidCheck(),
|
||||||
NewMayorExistsCheck(),
|
NewMayorExistsCheck(),
|
||||||
NewMayorStateValidCheck(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+3
-35
@@ -133,9 +133,9 @@ func (m *Manager) loadRig(name string, entry config.RigEntry) (*Rig, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for witness (witnesses don't have clones, just state.json)
|
// Check for witness (witnesses don't have clones, just the witness directory)
|
||||||
witnessStatePath := filepath.Join(rigPath, "witness", "state.json")
|
witnessPath := filepath.Join(rigPath, "witness")
|
||||||
if _, err := os.Stat(witnessStatePath); err == nil {
|
if info, err := os.Stat(witnessPath); err == nil && info.IsDir() {
|
||||||
rig.HasWitness = true
|
rig.HasWitness = true
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -414,11 +414,6 @@ Use crew for your own workspace. Polecats are for batch work dispatch.
|
|||||||
return nil, fmt.Errorf("creating polecats dir: %w", err)
|
return nil, fmt.Errorf("creating polecats dir: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize agent state files
|
|
||||||
if err := m.initAgentStates(rigPath); err != nil {
|
|
||||||
return nil, fmt.Errorf("initializing agent states: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize beads at rig level
|
// Initialize beads at rig level
|
||||||
fmt.Printf(" Initializing beads database...\n")
|
fmt.Printf(" Initializing beads database...\n")
|
||||||
if err := m.initBeads(rigPath, opts.BeadsPrefix); err != nil {
|
if err := m.initBeads(rigPath, opts.BeadsPrefix); err != nil {
|
||||||
@@ -484,33 +479,6 @@ func LoadRigConfig(rigPath string) (*RigConfig, error) {
|
|||||||
return &cfg, nil
|
return &cfg, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// initAgentStates creates initial state.json files for agents.
|
|
||||||
func (m *Manager) initAgentStates(rigPath string) error {
|
|
||||||
agents := []struct {
|
|
||||||
path string
|
|
||||||
role string
|
|
||||||
}{
|
|
||||||
{filepath.Join(rigPath, "refinery", "state.json"), "refinery"},
|
|
||||||
{filepath.Join(rigPath, "witness", "state.json"), "witness"},
|
|
||||||
{filepath.Join(rigPath, "mayor", "state.json"), "mayor"},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, agent := range agents {
|
|
||||||
state := &config.AgentState{
|
|
||||||
Role: agent.role,
|
|
||||||
LastActive: time.Now(),
|
|
||||||
}
|
|
||||||
data, err := json.MarshalIndent(state, "", " ")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := os.WriteFile(agent.path, data, 0644); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// initBeads initializes the beads database at rig level.
|
// initBeads initializes the beads database at rig level.
|
||||||
// The project's .beads/config.yaml determines sync-branch settings.
|
// The project's .beads/config.yaml determines sync-branch settings.
|
||||||
// Use `bd doctor --fix` in the project to configure sync-branch if needed.
|
// Use `bd doctor --fix` in the project to configure sync-branch if needed.
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ func createTestRig(t *testing.T, root, name string) {
|
|||||||
t.Fatalf("mkdir rig: %v", err)
|
t.Fatalf("mkdir rig: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create agent dirs
|
// Create agent dirs (witness, refinery, mayor)
|
||||||
for _, dir := range AgentDirs {
|
for _, dir := range AgentDirs {
|
||||||
dirPath := filepath.Join(rigPath, dir)
|
dirPath := filepath.Join(rigPath, dir)
|
||||||
if err := os.MkdirAll(dirPath, 0755); err != nil {
|
if err := os.MkdirAll(dirPath, 0755); err != nil {
|
||||||
@@ -48,12 +48,6 @@ func createTestRig(t *testing.T, root, name string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create witness state.json (witnesses don't have clones, just state)
|
|
||||||
witnessState := filepath.Join(rigPath, "witness", "state.json")
|
|
||||||
if err := os.WriteFile(witnessState, []byte(`{"role":"witness"}`), 0644); err != nil {
|
|
||||||
t.Fatalf("write witness state: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create some polecats
|
// Create some polecats
|
||||||
polecatsDir := filepath.Join(rigPath, "polecats")
|
polecatsDir := filepath.Join(rigPath, "polecats")
|
||||||
for _, polecat := range []string{"Toast", "Cheedo"} {
|
for _, polecat := range []string{"Toast", "Cheedo"} {
|
||||||
|
|||||||
Reference in New Issue
Block a user