fix(channel): enforce RetentionHours in channel message retention

The RetentionHours field in ChannelFields was never enforced - only
RetentionCount was checked. Now both EnforceChannelRetention and
PruneAllChannels delete messages older than the configured hours.

Also fixes sling tests that were missing TMUX_PANE and GT_TEST_NO_NUDGE
guards, causing them to inject prompts into active tmux sessions during
test runs.

Fixes: gt-uvnfug

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
joe
2026-01-17 04:49:47 -08:00
committed by Steve Yegge
parent 8d41f817b9
commit 616ff01e2c
2 changed files with 76 additions and 24 deletions

View File

@@ -382,7 +382,7 @@ func (b *Beads) LookupChannelByName(name string) (*Issue, *ChannelFields, error)
// EnforceChannelRetention prunes old messages from a channel to enforce retention. // EnforceChannelRetention prunes old messages from a channel to enforce retention.
// Called after posting a new message to the channel (on-write cleanup). // Called after posting a new message to the channel (on-write cleanup).
// If channel has >= retainCount messages, deletes oldest until count < retainCount. // Enforces both count-based (RetentionCount) and time-based (RetentionHours) limits.
func (b *Beads) EnforceChannelRetention(name string) error { func (b *Beads) EnforceChannelRetention(name string) error {
// Get channel config // Get channel config
_, fields, err := b.GetChannelBead(name) _, fields, err := b.GetChannelBead(name)
@@ -393,8 +393,8 @@ func (b *Beads) EnforceChannelRetention(name string) error {
return fmt.Errorf("channel not found: %s", name) return fmt.Errorf("channel not found: %s", name)
} }
// Skip if no retention limit // Skip if no retention limits configured
if fields.RetentionCount <= 0 { if fields.RetentionCount <= 0 && fields.RetentionHours <= 0 {
return nil return nil
} }
@@ -411,23 +411,42 @@ func (b *Beads) EnforceChannelRetention(name string) error {
} }
var messages []struct { var messages []struct {
ID string `json:"id"` ID string `json:"id"`
CreatedAt string `json:"created_at"`
} }
if err := json.Unmarshal(out, &messages); err != nil { if err := json.Unmarshal(out, &messages); err != nil {
return fmt.Errorf("parsing channel messages: %w", err) return fmt.Errorf("parsing channel messages: %w", err)
} }
// Calculate how many to delete // Track which messages to delete (use map to avoid duplicates)
// We're being called after a new message is posted, so we want to end up with retainCount toDeleteIDs := make(map[string]bool)
toDelete := len(messages) - fields.RetentionCount
if toDelete <= 0 { // Time-based retention: delete messages older than RetentionHours
return nil // No pruning needed if fields.RetentionHours > 0 {
cutoff := time.Now().Add(-time.Duration(fields.RetentionHours) * time.Hour)
for _, msg := range messages {
createdAt, err := time.Parse(time.RFC3339, msg.CreatedAt)
if err != nil {
continue // Skip messages with unparseable timestamps
}
if createdAt.Before(cutoff) {
toDeleteIDs[msg.ID] = true
}
}
} }
// Delete oldest messages (best-effort) // Count-based retention: delete oldest messages beyond RetentionCount
for i := 0; i < toDelete && i < len(messages); i++ { if fields.RetentionCount > 0 {
toDeleteByCount := len(messages) - fields.RetentionCount
for i := 0; i < toDeleteByCount && i < len(messages); i++ {
toDeleteIDs[messages[i].ID] = true
}
}
// Delete marked messages (best-effort)
for id := range toDeleteIDs {
// Use close instead of delete for audit trail // Use close instead of delete for audit trail
_, _ = b.run("close", messages[i].ID, "--reason=channel retention pruning") _, _ = b.run("close", id, "--reason=channel retention pruning")
} }
return nil return nil
@@ -435,7 +454,8 @@ func (b *Beads) EnforceChannelRetention(name string) error {
// PruneAllChannels enforces retention on all channels. // PruneAllChannels enforces retention on all channels.
// Called by Deacon patrol as a backup cleanup mechanism. // Called by Deacon patrol as a backup cleanup mechanism.
// Uses a 10% buffer to avoid thrashing (only prunes if count > retainCount * 1.1). // Enforces both count-based (RetentionCount) and time-based (RetentionHours) limits.
// Uses a 10% buffer for count-based pruning to avoid thrashing.
func (b *Beads) PruneAllChannels() (int, error) { func (b *Beads) PruneAllChannels() (int, error) {
channels, err := b.ListChannelBeads() channels, err := b.ListChannelBeads()
if err != nil { if err != nil {
@@ -444,38 +464,62 @@ func (b *Beads) PruneAllChannels() (int, error) {
pruned := 0 pruned := 0
for name, fields := range channels { for name, fields := range channels {
if fields.RetentionCount <= 0 { // Skip if no retention limits configured
if fields.RetentionCount <= 0 && fields.RetentionHours <= 0 {
continue continue
} }
// Count messages // Get messages with timestamps
out, err := b.run("list", out, err := b.run("list",
"--type=message", "--type=message",
"--label=channel:"+name, "--label=channel:"+name,
"--json", "--json",
"--limit=0", "--limit=0",
"--sort=created",
) )
if err != nil { if err != nil {
continue // Skip on error continue // Skip on error
} }
var messages []struct { var messages []struct {
ID string `json:"id"` ID string `json:"id"`
CreatedAt string `json:"created_at"`
} }
if err := json.Unmarshal(out, &messages); err != nil { if err := json.Unmarshal(out, &messages); err != nil {
continue continue
} }
// 10% buffer - only prune if significantly over limit // Track which messages to delete (use map to avoid duplicates)
threshold := int(float64(fields.RetentionCount) * 1.1) toDeleteIDs := make(map[string]bool)
if len(messages) <= threshold {
continue // Time-based retention: delete messages older than RetentionHours
if fields.RetentionHours > 0 {
cutoff := time.Now().Add(-time.Duration(fields.RetentionHours) * time.Hour)
for _, msg := range messages {
createdAt, err := time.Parse(time.RFC3339, msg.CreatedAt)
if err != nil {
continue // Skip messages with unparseable timestamps
}
if createdAt.Before(cutoff) {
toDeleteIDs[msg.ID] = true
}
}
} }
// Prune down to exactly retainCount // Count-based retention with 10% buffer to avoid thrashing
toDelete := len(messages) - fields.RetentionCount if fields.RetentionCount > 0 {
for i := 0; i < toDelete && i < len(messages); i++ { threshold := int(float64(fields.RetentionCount) * 1.1)
if _, err := b.run("close", messages[i].ID, "--reason=patrol retention pruning"); err == nil { if len(messages) > threshold {
toDeleteByCount := len(messages) - fields.RetentionCount
for i := 0; i < toDeleteByCount && i < len(messages); i++ {
toDeleteIDs[messages[i].ID] = true
}
}
}
// Delete marked messages
for id := range toDeleteIDs {
if _, err := b.run("close", id, "--reason=patrol retention pruning"); err == nil {
pruned++ pruned++
} }
} }

View File

@@ -616,6 +616,7 @@ exit 0
t.Setenv(EnvGTRole, "crew") t.Setenv(EnvGTRole, "crew")
t.Setenv("GT_CREW", "jv") t.Setenv("GT_CREW", "jv")
t.Setenv("GT_POLECAT", "") t.Setenv("GT_POLECAT", "")
t.Setenv("TMUX_PANE", "") // Prevent inheriting real tmux pane from test runner
cwd, err := os.Getwd() cwd, err := os.Getwd()
if err != nil { if err != nil {
@@ -637,6 +638,9 @@ exit 0
slingDryRun = true slingDryRun = true
slingNoConvoy = true slingNoConvoy = true
// Prevent real tmux nudge from firing during tests (causes agent self-interruption)
t.Setenv("GT_TEST_NO_NUDGE", "1")
// EXPECTED: gt sling should use daemon mode and succeed // EXPECTED: gt sling should use daemon mode and succeed
// ACTUAL: verifyBeadExists uses --no-daemon and fails with sync error // ACTUAL: verifyBeadExists uses --no-daemon and fails with sync error
beadID := "jv-v599" beadID := "jv-v599"
@@ -792,6 +796,7 @@ exit 0
t.Setenv(EnvGTRole, "mayor") t.Setenv(EnvGTRole, "mayor")
t.Setenv("GT_POLECAT", "") t.Setenv("GT_POLECAT", "")
t.Setenv("GT_CREW", "") t.Setenv("GT_CREW", "")
t.Setenv("TMUX_PANE", "") // Prevent inheriting real tmux pane from test runner
cwd, err := os.Getwd() cwd, err := os.Getwd()
if err != nil { if err != nil {
@@ -819,6 +824,9 @@ exit 0
slingVars = nil slingVars = nil
slingOnTarget = "gt-abc123" // The bug bead we're applying formula to slingOnTarget = "gt-abc123" // The bug bead we're applying formula to
// Prevent real tmux nudge from firing during tests (causes agent self-interruption)
t.Setenv("GT_TEST_NO_NUDGE", "1")
if err := runSling(nil, []string{"mol-polecat-work"}); err != nil { if err := runSling(nil, []string{"mol-polecat-work"}); err != nil {
t.Fatalf("runSling: %v", err) t.Fatalf("runSling: %v", err)
} }