Add identity collision prevention, detection, and correction (gt-xp2s)
- internal/lock: New package with PID-based lockfiles for worker identity - gt prime: Acquire identity lock for crew/polecat roles, fail on collision - gt agents check: Detect stale locks and identity collisions - gt agents fix: Clean up stale locks - gt doctor: New identity-collision check with --fix support 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
136
internal/doctor/identity_check.go
Normal file
136
internal/doctor/identity_check.go
Normal file
@@ -0,0 +1,136 @@
|
||||
package doctor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/steveyegge/gastown/internal/lock"
|
||||
"github.com/steveyegge/gastown/internal/tmux"
|
||||
)
|
||||
|
||||
// IdentityCollisionCheck checks for agent identity collisions and stale locks.
|
||||
type IdentityCollisionCheck struct{}
|
||||
|
||||
// NewIdentityCollisionCheck creates a new identity collision check.
|
||||
func NewIdentityCollisionCheck() *IdentityCollisionCheck {
|
||||
return &IdentityCollisionCheck{}
|
||||
}
|
||||
|
||||
func (c *IdentityCollisionCheck) Name() string {
|
||||
return "identity-collision"
|
||||
}
|
||||
|
||||
func (c *IdentityCollisionCheck) Description() string {
|
||||
return "Check for agent identity collisions and stale locks"
|
||||
}
|
||||
|
||||
func (c *IdentityCollisionCheck) CanFix() bool {
|
||||
return true // Can fix stale locks
|
||||
}
|
||||
|
||||
func (c *IdentityCollisionCheck) Run(ctx *CheckContext) *CheckResult {
|
||||
// Find all locks
|
||||
locks, err := lock.FindAllLocks(ctx.TownRoot)
|
||||
if err != nil {
|
||||
return &CheckResult{
|
||||
Name: c.Name(),
|
||||
Status: StatusWarning,
|
||||
Message: fmt.Sprintf("could not scan for locks: %v", err),
|
||||
}
|
||||
}
|
||||
|
||||
if len(locks) == 0 {
|
||||
return &CheckResult{
|
||||
Name: c.Name(),
|
||||
Status: StatusOK,
|
||||
Message: "no worker locks found",
|
||||
}
|
||||
}
|
||||
|
||||
// Get active tmux sessions for cross-reference
|
||||
t := tmux.NewTmux()
|
||||
sessions, _ := t.ListSessions() // Ignore errors - might not have tmux
|
||||
|
||||
sessionSet := make(map[string]bool)
|
||||
for _, s := range sessions {
|
||||
sessionSet[s] = true
|
||||
}
|
||||
|
||||
var staleLocks []string
|
||||
var orphanedLocks []string
|
||||
var healthyLocks int
|
||||
|
||||
for workerDir, info := range locks {
|
||||
if info.IsStale() {
|
||||
staleLocks = append(staleLocks,
|
||||
fmt.Sprintf("%s (dead PID %d)", workerDir, info.PID))
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if session exists
|
||||
if info.SessionID != "" && !sessionSet[info.SessionID] {
|
||||
// Lock has session ID but session doesn't exist
|
||||
// This could be a collision or orphan
|
||||
orphanedLocks = append(orphanedLocks,
|
||||
fmt.Sprintf("%s (PID %d, missing session %s)", workerDir, info.PID, info.SessionID))
|
||||
continue
|
||||
}
|
||||
|
||||
healthyLocks++
|
||||
}
|
||||
|
||||
// Build result
|
||||
if len(staleLocks) == 0 && len(orphanedLocks) == 0 {
|
||||
return &CheckResult{
|
||||
Name: c.Name(),
|
||||
Status: StatusOK,
|
||||
Message: fmt.Sprintf("%d worker lock(s), all healthy", healthyLocks),
|
||||
}
|
||||
}
|
||||
|
||||
result := &CheckResult{
|
||||
Name: c.Name(),
|
||||
}
|
||||
|
||||
if len(staleLocks) > 0 {
|
||||
result.Status = StatusWarning
|
||||
result.Message = fmt.Sprintf("%d stale lock(s) found", len(staleLocks))
|
||||
result.Details = append(result.Details, "Stale locks (dead PIDs):")
|
||||
for _, s := range staleLocks {
|
||||
result.Details = append(result.Details, " "+s)
|
||||
}
|
||||
result.FixHint = "Run 'gt doctor --fix' or 'gt agents fix' to clean up"
|
||||
}
|
||||
|
||||
if len(orphanedLocks) > 0 {
|
||||
if result.Status != StatusWarning {
|
||||
result.Status = StatusWarning
|
||||
}
|
||||
if result.Message != "" {
|
||||
result.Message += ", "
|
||||
}
|
||||
result.Message += fmt.Sprintf("%d orphaned lock(s)", len(orphanedLocks))
|
||||
result.Details = append(result.Details, "Orphaned locks (missing sessions):")
|
||||
for _, s := range orphanedLocks {
|
||||
result.Details = append(result.Details, " "+s)
|
||||
}
|
||||
if !strings.Contains(result.FixHint, "doctor") {
|
||||
result.FixHint = "Run 'gt doctor --fix' to clean up stale locks"
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (c *IdentityCollisionCheck) Fix(ctx *CheckContext) error {
|
||||
cleaned, err := lock.CleanStaleLocks(ctx.TownRoot)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cleaning stale locks: %w", err)
|
||||
}
|
||||
|
||||
if cleaned > 0 {
|
||||
fmt.Printf(" Cleaned %d stale lock(s)\n", cleaned)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user