fix(channel): enforce RetentionHours in channel message retention

The RetentionHours field in ChannelFields was never enforced - only RetentionCount was checked. Now both EnforceChannelRetention and PruneAllChannels delete messages older than the configured hours. Also fixes sling tests that were missing TMUX_PANE and GT_TEST_NO_NUDGE guards, causing them to inject prompts into active tmux sessions during test runs. Fixes: gt-uvnfug Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-17 04:49:47 -08:00
parent 8d41f817b9
commit 616ff01e2c
2 changed files with 76 additions and 24 deletions
--- a/internal/beads/beads_channel.go
+++ b/internal/beads/beads_channel.go
@@ -382,7 +382,7 @@ func (b *Beads) LookupChannelByName(name string) (*Issue, *ChannelFields, error)
 // EnforceChannelRetention prunes old messages from a channel to enforce retention.
 // Called after posting a new message to the channel (on-write cleanup).
-// If channel has >= retainCount messages, deletes oldest until count < retainCount.
+// Enforces both count-based (RetentionCount) and time-based (RetentionHours) limits.
 func (b *Beads) EnforceChannelRetention(name string) error {
 	// Get channel config
 	_, fields, err := b.GetChannelBead(name)
@@ -393,8 +393,8 @@ func (b *Beads) EnforceChannelRetention(name string) error {
 		return fmt.Errorf("channel not found: %s", name)
 	}
-	// Skip if no retention limit
+	// Skip if no retention limits configured
-	if fields.RetentionCount <= 0 {
+	if fields.RetentionCount <= 0 && fields.RetentionHours <= 0 {
 		return nil
 	}
@@ -411,23 +411,42 @@ func (b *Beads) EnforceChannelRetention(name string) error {
 	}
 	var messages []struct {
-		ID string `json:"id"`
+		ID        string `json:"id"`
 		CreatedAt string `json:"created_at"`
 	}
 	if err := json.Unmarshal(out, &messages); err != nil {
 		return fmt.Errorf("parsing channel messages: %w", err)
 	}
-	// Calculate how many to delete
+	// Track which messages to delete (use map to avoid duplicates)
-	// We're being called after a new message is posted, so we want to end up with retainCount
+	toDeleteIDs := make(map[string]bool)
-	toDelete := len(messages) - fields.RetentionCount
+
-	if toDelete <= 0 {
+	// Time-based retention: delete messages older than RetentionHours
-		return nil // No pruning needed
+	if fields.RetentionHours > 0 {
 		cutoff := time.Now().Add(-time.Duration(fields.RetentionHours) * time.Hour)
 		for _, msg := range messages {
 			createdAt, err := time.Parse(time.RFC3339, msg.CreatedAt)
 			if err != nil {
 				continue // Skip messages with unparseable timestamps
 			}
 			if createdAt.Before(cutoff) {
 				toDeleteIDs[msg.ID] = true
 			}
 		}
 	}
-	// Delete oldest messages (best-effort)
+	// Count-based retention: delete oldest messages beyond RetentionCount
-	for i := 0; i < toDelete && i < len(messages); i++ {
+	if fields.RetentionCount > 0 {
 		toDeleteByCount := len(messages) - fields.RetentionCount
 		for i := 0; i < toDeleteByCount && i < len(messages); i++ {
 			toDeleteIDs[messages[i].ID] = true
 		}
 	}
 	// Delete marked messages (best-effort)
 	for id := range toDeleteIDs {
 		// Use close instead of delete for audit trail
-		_, _ = b.run("close", messages[i].ID, "--reason=channel retention pruning")
+		_, _ = b.run("close", id, "--reason=channel retention pruning")
 	}
 	return nil
@@ -435,7 +454,8 @@ func (b *Beads) EnforceChannelRetention(name string) error {
 // PruneAllChannels enforces retention on all channels.
 // Called by Deacon patrol as a backup cleanup mechanism.
-// Uses a 10% buffer to avoid thrashing (only prunes if count > retainCount * 1.1).
+// Enforces both count-based (RetentionCount) and time-based (RetentionHours) limits.
 // Uses a 10% buffer for count-based pruning to avoid thrashing.
 func (b *Beads) PruneAllChannels() (int, error) {
 	channels, err := b.ListChannelBeads()
 	if err != nil {
@@ -444,38 +464,62 @@ func (b *Beads) PruneAllChannels() (int, error) {
 	pruned := 0
 	for name, fields := range channels {
-		if fields.RetentionCount <= 0 {
+		// Skip if no retention limits configured
 		if fields.RetentionCount <= 0 && fields.RetentionHours <= 0 {
 			continue
 		}
-		// Count messages
+		// Get messages with timestamps
 		out, err := b.run("list",
 			"--type=message",
 			"--label=channel:"+name,
 			"--json",
 			"--limit=0",
 			"--sort=created",
 		)
 		if err != nil {
 			continue // Skip on error
 		}
 		var messages []struct {
-			ID string `json:"id"`
+			ID        string `json:"id"`
 			CreatedAt string `json:"created_at"`
 		}
 		if err := json.Unmarshal(out, &messages); err != nil {
 			continue
 		}
-		// 10% buffer - only prune if significantly over limit
+		// Track which messages to delete (use map to avoid duplicates)
-		threshold := int(float64(fields.RetentionCount) * 1.1)
+		toDeleteIDs := make(map[string]bool)
-		if len(messages) <= threshold {
+
-			continue
+		// Time-based retention: delete messages older than RetentionHours
 		if fields.RetentionHours > 0 {
 			cutoff := time.Now().Add(-time.Duration(fields.RetentionHours) * time.Hour)
 			for _, msg := range messages {
 				createdAt, err := time.Parse(time.RFC3339, msg.CreatedAt)
 				if err != nil {
 					continue // Skip messages with unparseable timestamps
 				}
 				if createdAt.Before(cutoff) {
 					toDeleteIDs[msg.ID] = true
 				}
 			}
 		}
-		// Prune down to exactly retainCount
+		// Count-based retention with 10% buffer to avoid thrashing
-		toDelete := len(messages) - fields.RetentionCount
+		if fields.RetentionCount > 0 {
-		for i := 0; i < toDelete && i < len(messages); i++ {
+			threshold := int(float64(fields.RetentionCount) * 1.1)
-			if _, err := b.run("close", messages[i].ID, "--reason=patrol retention pruning"); err == nil {
+			if len(messages) > threshold {
 				toDeleteByCount := len(messages) - fields.RetentionCount
 				for i := 0; i < toDeleteByCount && i < len(messages); i++ {
 					toDeleteIDs[messages[i].ID] = true
 				}
 			}
 		}
 		// Delete marked messages
 		for id := range toDeleteIDs {
 			if _, err := b.run("close", id, "--reason=patrol retention pruning"); err == nil {
 				pruned++
 			}
 		}
--- a/internal/cmd/sling_test.go
+++ b/internal/cmd/sling_test.go
@@ -616,6 +616,7 @@ exit 0
 	t.Setenv(EnvGTRole, "crew")
 	t.Setenv("GT_CREW", "jv")
 	t.Setenv("GT_POLECAT", "")
 	t.Setenv("TMUX_PANE", "") // Prevent inheriting real tmux pane from test runner
 	cwd, err := os.Getwd()
 	if err != nil {
@@ -637,6 +638,9 @@ exit 0
 	slingDryRun = true
 	slingNoConvoy = true
 	// Prevent real tmux nudge from firing during tests (causes agent self-interruption)
 	t.Setenv("GT_TEST_NO_NUDGE", "1")
 	// EXPECTED: gt sling should use daemon mode and succeed
 	// ACTUAL: verifyBeadExists uses --no-daemon and fails with sync error
 	beadID := "jv-v599"
@@ -792,6 +796,7 @@ exit 0
 	t.Setenv(EnvGTRole, "mayor")
 	t.Setenv("GT_POLECAT", "")
 	t.Setenv("GT_CREW", "")
 	t.Setenv("TMUX_PANE", "") // Prevent inheriting real tmux pane from test runner
 	cwd, err := os.Getwd()
 	if err != nil {
@@ -819,6 +824,9 @@ exit 0
 	slingVars = nil
 	slingOnTarget = "gt-abc123" // The bug bead we're applying formula to
 	// Prevent real tmux nudge from firing during tests (causes agent self-interruption)
 	t.Setenv("GT_TEST_NO_NUDGE", "1")
 	if err := runSling(nil, []string{"mol-polecat-work"}); err != nil {
 		t.Fatalf("runSling: %v", err)
 	}