fix(channel): enforce RetentionHours in channel message retention

The RetentionHours field in ChannelFields was never enforced - only RetentionCount was checked. Now both EnforceChannelRetention and PruneAllChannels delete messages older than the configured hours. Also fixes sling tests that were missing TMUX_PANE and GT_TEST_NO_NUDGE guards, causing them to inject prompts into active tmux sessions during test runs. Fixes: gt-uvnfug Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-17 04:49:47 -08:00
parent 8d41f817b9
commit 616ff01e2c
2 changed files with 76 additions and 24 deletions
@@ -382,7 +382,7 @@ func (b *Beads) LookupChannelByName(name string) (*Issue, *ChannelFields, error)

 // EnforceChannelRetention prunes old messages from a channel to enforce retention.
 // Called after posting a new message to the channel (on-write cleanup).
-// If channel has >= retainCount messages, deletes oldest until count < retainCount.
+// Enforces both count-based (RetentionCount) and time-based (RetentionHours) limits.
 func (b *Beads) EnforceChannelRetention(name string) error {
 	// Get channel config
 	_, fields, err := b.GetChannelBead(name)
@@ -393,8 +393,8 @@ func (b *Beads) EnforceChannelRetention(name string) error {
 		return fmt.Errorf("channel not found: %s", name)
 	}

-	// Skip if no retention limit
-	if fields.RetentionCount <= 0 {
+	// Skip if no retention limits configured
+	if fields.RetentionCount <= 0 && fields.RetentionHours <= 0 {
 		return nil
 	}

@@ -411,23 +411,42 @@ func (b *Beads) EnforceChannelRetention(name string) error {
 	}

 	var messages []struct {
-		ID string `json:"id"`
+		ID        string `json:"id"`
+		CreatedAt string `json:"created_at"`
 	}
 	if err := json.Unmarshal(out, &messages); err != nil {
 		return fmt.Errorf("parsing channel messages: %w", err)
 	}

-	// Calculate how many to delete
-	// We're being called after a new message is posted, so we want to end up with retainCount
-	toDelete := len(messages) - fields.RetentionCount
-	if toDelete <= 0 {
-		return nil // No pruning needed
+	// Track which messages to delete (use map to avoid duplicates)
+	toDeleteIDs := make(map[string]bool)
+
+	// Time-based retention: delete messages older than RetentionHours
+	if fields.RetentionHours > 0 {
+		cutoff := time.Now().Add(-time.Duration(fields.RetentionHours) * time.Hour)
+		for _, msg := range messages {
+			createdAt, err := time.Parse(time.RFC3339, msg.CreatedAt)
+			if err != nil {
+				continue // Skip messages with unparseable timestamps
+			}
+			if createdAt.Before(cutoff) {
+				toDeleteIDs[msg.ID] = true
+			}
+		}
 	}

-	// Delete oldest messages (best-effort)
-	for i := 0; i < toDelete && i < len(messages); i++ {
+	// Count-based retention: delete oldest messages beyond RetentionCount
+	if fields.RetentionCount > 0 {
+		toDeleteByCount := len(messages) - fields.RetentionCount
+		for i := 0; i < toDeleteByCount && i < len(messages); i++ {
+			toDeleteIDs[messages[i].ID] = true
+		}
+	}
+
+	// Delete marked messages (best-effort)
+	for id := range toDeleteIDs {
 		// Use close instead of delete for audit trail
-		_, _ = b.run("close", messages[i].ID, "--reason=channel retention pruning")
+		_, _ = b.run("close", id, "--reason=channel retention pruning")
 	}

 	return nil
@@ -435,7 +454,8 @@ func (b *Beads) EnforceChannelRetention(name string) error {

 // PruneAllChannels enforces retention on all channels.
 // Called by Deacon patrol as a backup cleanup mechanism.
-// Uses a 10% buffer to avoid thrashing (only prunes if count > retainCount * 1.1).
+// Enforces both count-based (RetentionCount) and time-based (RetentionHours) limits.
+// Uses a 10% buffer for count-based pruning to avoid thrashing.
 func (b *Beads) PruneAllChannels() (int, error) {
 	channels, err := b.ListChannelBeads()
 	if err != nil {
@@ -444,38 +464,62 @@ func (b *Beads) PruneAllChannels() (int, error) {

 	pruned := 0
 	for name, fields := range channels {
-		if fields.RetentionCount <= 0 {
+		// Skip if no retention limits configured
+		if fields.RetentionCount <= 0 && fields.RetentionHours <= 0 {
 			continue
 		}

-		// Count messages
+		// Get messages with timestamps
 		out, err := b.run("list",
 			"--type=message",
 			"--label=channel:"+name,
 			"--json",
 			"--limit=0",
+			"--sort=created",
 		)
 		if err != nil {
 			continue // Skip on error
 		}

 		var messages []struct {
-			ID string `json:"id"`
+			ID        string `json:"id"`
+			CreatedAt string `json:"created_at"`
 		}
 		if err := json.Unmarshal(out, &messages); err != nil {
 			continue
 		}

-		// 10% buffer - only prune if significantly over limit
-		threshold := int(float64(fields.RetentionCount) * 1.1)
-		if len(messages) <= threshold {
-			continue
+		// Track which messages to delete (use map to avoid duplicates)
+		toDeleteIDs := make(map[string]bool)
+
+		// Time-based retention: delete messages older than RetentionHours
+		if fields.RetentionHours > 0 {
+			cutoff := time.Now().Add(-time.Duration(fields.RetentionHours) * time.Hour)
+			for _, msg := range messages {
+				createdAt, err := time.Parse(time.RFC3339, msg.CreatedAt)
+				if err != nil {
+					continue // Skip messages with unparseable timestamps
+				}
+				if createdAt.Before(cutoff) {
+					toDeleteIDs[msg.ID] = true
+				}
+			}
 		}

-		// Prune down to exactly retainCount
-		toDelete := len(messages) - fields.RetentionCount
-		for i := 0; i < toDelete && i < len(messages); i++ {
-			if _, err := b.run("close", messages[i].ID, "--reason=patrol retention pruning"); err == nil {
+		// Count-based retention with 10% buffer to avoid thrashing
+		if fields.RetentionCount > 0 {
+			threshold := int(float64(fields.RetentionCount) * 1.1)
+			if len(messages) > threshold {
+				toDeleteByCount := len(messages) - fields.RetentionCount
+				for i := 0; i < toDeleteByCount && i < len(messages); i++ {
+					toDeleteIDs[messages[i].ID] = true
+				}
+			}
+		}
+
+		// Delete marked messages
+		for id := range toDeleteIDs {
+			if _, err := b.run("close", id, "--reason=patrol retention pruning"); err == nil {
 				pruned++
 			}
 		}
@@ -616,6 +616,7 @@ exit 0
 	t.Setenv(EnvGTRole, "crew")
 	t.Setenv("GT_CREW", "jv")
 	t.Setenv("GT_POLECAT", "")
+	t.Setenv("TMUX_PANE", "") // Prevent inheriting real tmux pane from test runner

 	cwd, err := os.Getwd()
 	if err != nil {
@@ -637,6 +638,9 @@ exit 0
 	slingDryRun = true
 	slingNoConvoy = true

+	// Prevent real tmux nudge from firing during tests (causes agent self-interruption)
+	t.Setenv("GT_TEST_NO_NUDGE", "1")
+
 	// EXPECTED: gt sling should use daemon mode and succeed
 	// ACTUAL: verifyBeadExists uses --no-daemon and fails with sync error
 	beadID := "jv-v599"
@@ -792,6 +796,7 @@ exit 0
 	t.Setenv(EnvGTRole, "mayor")
 	t.Setenv("GT_POLECAT", "")
 	t.Setenv("GT_CREW", "")
+	t.Setenv("TMUX_PANE", "") // Prevent inheriting real tmux pane from test runner

 	cwd, err := os.Getwd()
 	if err != nil {
@@ -819,6 +824,9 @@ exit 0
 	slingVars = nil
 	slingOnTarget = "gt-abc123" // The bug bead we're applying formula to

+	// Prevent real tmux nudge from firing during tests (causes agent self-interruption)
+	t.Setenv("GT_TEST_NO_NUDGE", "1")
+
 	if err := runSling(nil, []string{"mol-polecat-work"}); err != nil {
 		t.Fatalf("runSling: %v", err)
 	}