Add configurable export error handling policies (bd-exug)

Implements flexible error handling for export operations with four policies: - strict: Fail-fast on any error (default for user exports) - best-effort: Skip errors with warnings (default for auto-exports) - partial: Retry then skip with manifest tracking - required-core: Fail on core data, skip enrichments Key features: - Per-project configuration via `bd config set export.error_policy` - Separate policy for auto-exports: `auto_export.error_policy` - Retry with exponential backoff (configurable attempts/delay) - Optional export manifests documenting completeness - Per-issue encoding error handling This allows users to choose the right trade-off between data integrity and system availability for their specific project needs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-23 20:21:51 -08:00
parent 21c331baee
commit e3e0a04496
8 changed files with 797 additions and 28 deletions
--- a/internal/export/config.go
+++ b/internal/export/config.go
@@ -0,0 +1,117 @@
+package export
+
+import (
+	"context"
+	"fmt"
+	"strconv"
+
+	"github.com/steveyegge/beads/internal/storage"
+)
+
+// ConfigStore defines the minimal storage interface needed for config
+type ConfigStore interface {
+	GetConfig(ctx context.Context, key string) (string, error)
+	SetConfig(ctx context.Context, key, value string) error
+}
+
+// LoadConfig reads export configuration from storage
+func LoadConfig(ctx context.Context, store ConfigStore, isAutoExport bool) (*Config, error) {
+	cfg := &Config{
+		Policy:             DefaultErrorPolicy,
+		RetryAttempts:      DefaultRetryAttempts,
+		RetryBackoffMS:     DefaultRetryBackoffMS,
+		SkipEncodingErrors: DefaultSkipEncodingErrors,
+		WriteManifest:      DefaultWriteManifest,
+		IsAutoExport:       isAutoExport,
+	}
+
+	// Load error policy
+	if isAutoExport {
+		// Check auto-export specific policy first
+		if val, err := store.GetConfig(ctx, ConfigKeyAutoExportPolicy); err == nil && val != "" {
+			policy := ErrorPolicy(val)
+			if policy.IsValid() {
+				cfg.Policy = policy
+			}
+		}
+	}
+	// Fall back to general export policy if not set or not auto-export
+	if cfg.Policy == DefaultErrorPolicy {
+		if val, err := store.GetConfig(ctx, ConfigKeyErrorPolicy); err == nil && val != "" {
+			policy := ErrorPolicy(val)
+			if policy.IsValid() {
+				cfg.Policy = policy
+			}
+		}
+	}
+
+	// Load retry attempts
+	if val, err := store.GetConfig(ctx, ConfigKeyRetryAttempts); err == nil && val != "" {
+		if attempts, err := strconv.Atoi(val); err == nil && attempts >= 0 {
+			cfg.RetryAttempts = attempts
+		}
+	}
+
+	// Load retry backoff
+	if val, err := store.GetConfig(ctx, ConfigKeyRetryBackoffMS); err == nil && val != "" {
+		if backoff, err := strconv.Atoi(val); err == nil && backoff > 0 {
+			cfg.RetryBackoffMS = backoff
+		}
+	}
+
+	// Load skip encoding errors flag
+	if val, err := store.GetConfig(ctx, ConfigKeySkipEncodingErrors); err == nil && val != "" {
+		if skip, err := strconv.ParseBool(val); err == nil {
+			cfg.SkipEncodingErrors = skip
+		}
+	}
+
+	// Load write manifest flag
+	if val, err := store.GetConfig(ctx, ConfigKeyWriteManifest); err == nil && val != "" {
+		if write, err := strconv.ParseBool(val); err == nil {
+			cfg.WriteManifest = write
+		}
+	}
+
+	return cfg, nil
+}
+
+// SetPolicy sets the error policy for exports
+func SetPolicy(ctx context.Context, store storage.Storage, policy ErrorPolicy, autoExport bool) error {
+	if !policy.IsValid() {
+		return fmt.Errorf("invalid error policy: %s (valid: strict, best-effort, partial, required-core)", policy)
+	}
+
+	key := ConfigKeyErrorPolicy
+	if autoExport {
+		key = ConfigKeyAutoExportPolicy
+	}
+
+	return store.SetConfig(ctx, key, string(policy))
+}
+
+// SetRetryAttempts sets the number of retry attempts
+func SetRetryAttempts(ctx context.Context, store storage.Storage, attempts int) error {
+	if attempts < 0 {
+		return fmt.Errorf("retry attempts must be non-negative")
+	}
+	return store.SetConfig(ctx, ConfigKeyRetryAttempts, strconv.Itoa(attempts))
+}
+
+// SetRetryBackoff sets the initial retry backoff in milliseconds
+func SetRetryBackoff(ctx context.Context, store storage.Storage, backoffMS int) error {
+	if backoffMS <= 0 {
+		return fmt.Errorf("retry backoff must be positive")
+	}
+	return store.SetConfig(ctx, ConfigKeyRetryBackoffMS, strconv.Itoa(backoffMS))
+}
+
+// SetSkipEncodingErrors sets whether to skip issues with encoding errors
+func SetSkipEncodingErrors(ctx context.Context, store storage.Storage, skip bool) error {
+	return store.SetConfig(ctx, ConfigKeySkipEncodingErrors, strconv.FormatBool(skip))
+}
+
+// SetWriteManifest sets whether to write export manifests
+func SetWriteManifest(ctx context.Context, store storage.Storage, write bool) error {
+	return store.SetConfig(ctx, ConfigKeyWriteManifest, strconv.FormatBool(write))
+}
--- a/internal/export/executor.go
+++ b/internal/export/executor.go
@@ -0,0 +1,96 @@
+package export
+
+import (
+	"context"
+	"fmt"
+	"os"
+)
+
+// DataType represents a type of data being fetched
+type DataType string
+
+const (
+	DataTypeCore     DataType = "core"       // Issues and dependencies
+	DataTypeLabels   DataType = "labels"     // Issue labels
+	DataTypeComments DataType = "comments"   // Issue comments
+)
+
+// FetchResult holds the result of a data fetch operation
+type FetchResult struct {
+	Success  bool
+	Err      error
+	Warnings []string
+}
+
+// FetchWithPolicy executes a fetch operation with the configured error policy
+func FetchWithPolicy(ctx context.Context, cfg *Config, dataType DataType, desc string, fn func() error) FetchResult {
+	var result FetchResult
+
+	// Determine if this is core data
+	isCore := dataType == DataTypeCore
+
+	// Execute based on policy
+	switch cfg.Policy {
+	case PolicyStrict:
+		// Fail-fast on any error
+		err := RetryWithBackoff(ctx, cfg.RetryAttempts, cfg.RetryBackoffMS, desc, fn)
+		if err != nil {
+			result.Err = err
+			return result
+		}
+		result.Success = true
+
+	case PolicyBestEffort:
+		// Skip errors with warnings
+		err := RetryWithBackoff(ctx, cfg.RetryAttempts, cfg.RetryBackoffMS, desc, fn)
+		if err != nil {
+			warning := fmt.Sprintf("Warning: %s failed, skipping: %v", desc, err)
+			fmt.Fprintf(os.Stderr, "%s\n", warning)
+			result.Warnings = append(result.Warnings, warning)
+			result.Success = false // Data is missing
+			return result
+		}
+		result.Success = true
+
+	case PolicyPartial:
+		// Retry with backoff, then skip with manifest entry
+		err := RetryWithBackoff(ctx, cfg.RetryAttempts, cfg.RetryBackoffMS, desc, fn)
+		if err != nil {
+			warning := fmt.Sprintf("Warning: %s failed after retries, skipping: %v", desc, err)
+			fmt.Fprintf(os.Stderr, "%s\n", warning)
+			result.Warnings = append(result.Warnings, warning)
+			result.Success = false
+			return result
+		}
+		result.Success = true
+
+	case PolicyRequiredCore:
+		// Fail on core data, skip enrichments
+		if isCore {
+			err := RetryWithBackoff(ctx, cfg.RetryAttempts, cfg.RetryBackoffMS, desc, fn)
+			if err != nil {
+				result.Err = err
+				return result
+			}
+			result.Success = true
+		} else {
+			// Best-effort for enrichments
+			err := RetryWithBackoff(ctx, cfg.RetryAttempts, cfg.RetryBackoffMS, desc, fn)
+			if err != nil {
+				warning := fmt.Sprintf("Warning: %s (enrichment) failed, skipping: %v", desc, err)
+				fmt.Fprintf(os.Stderr, "%s\n", warning)
+				result.Warnings = append(result.Warnings, warning)
+				result.Success = false
+				return result
+			}
+			result.Success = true
+		}
+
+	default:
+		// Unknown policy, fail-fast as safest option
+		result.Err = fmt.Errorf("unknown error policy: %s", cfg.Policy)
+		return result
+	}
+
+	return result
+}
--- a/internal/export/manifest.go
+++ b/internal/export/manifest.go
@@ -0,0 +1,65 @@
+package export
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+// WriteManifest writes an export manifest alongside the JSONL file
+func WriteManifest(jsonlPath string, manifest *Manifest) error {
+	// Derive manifest path from JSONL path
+	manifestPath := strings.TrimSuffix(jsonlPath, ".jsonl") + ".manifest.json"
+
+	// Marshal manifest
+	data, err := json.MarshalIndent(manifest, "", "  ")
+	if err != nil {
+		return fmt.Errorf("failed to marshal manifest: %w", err)
+	}
+
+	// Create temp file for atomic write
+	dir := filepath.Dir(manifestPath)
+	base := filepath.Base(manifestPath)
+	tempFile, err := os.CreateTemp(dir, base+".tmp.*")
+	if err != nil {
+		return fmt.Errorf("failed to create temp manifest file: %w", err)
+	}
+	tempPath := tempFile.Name()
+	defer func() {
+		_ = tempFile.Close()
+		_ = os.Remove(tempPath)
+	}()
+
+	// Write manifest
+	if _, err := tempFile.Write(data); err != nil {
+		return fmt.Errorf("failed to write manifest: %w", err)
+	}
+
+	// Close before rename
+	_ = tempFile.Close()
+
+	// Atomic replace
+	if err := os.Rename(tempPath, manifestPath); err != nil {
+		return fmt.Errorf("failed to replace manifest file: %w", err)
+	}
+
+	// Set appropriate file permissions (0600: rw-------)
+	if err := os.Chmod(manifestPath, 0600); err != nil {
+		// Non-fatal, just log
+		fmt.Fprintf(os.Stderr, "Warning: failed to set manifest permissions: %v\n", err)
+	}
+
+	return nil
+}
+
+// NewManifest creates a new export manifest
+func NewManifest(policy ErrorPolicy) *Manifest {
+	return &Manifest{
+		ExportedAt:  time.Now(),
+		ErrorPolicy: string(policy),
+		Complete:    true, // Will be set to false if any data is missing
+	}
+}
--- a/internal/export/policy.go
+++ b/internal/export/policy.go
@@ -0,0 +1,127 @@
+package export
+
+import (
+	"context"
+	"fmt"
+	"time"
+)
+
+// ErrorPolicy defines how export operations handle errors
+type ErrorPolicy string
+
+const (
+	// PolicyStrict fails fast on any error (default for user-initiated exports)
+	PolicyStrict ErrorPolicy = "strict"
+
+	// PolicyBestEffort skips failed operations with warnings (good for auto-export)
+	PolicyBestEffort ErrorPolicy = "best-effort"
+
+	// PolicyPartial retries transient failures, skips persistent ones with manifest
+	PolicyPartial ErrorPolicy = "partial"
+
+	// PolicyRequiredCore fails on core data (issues/deps), skips enrichments (labels/comments)
+	PolicyRequiredCore ErrorPolicy = "required-core"
+)
+
+// Config keys for export error handling
+const (
+	ConfigKeyErrorPolicy        = "export.error_policy"
+	ConfigKeyRetryAttempts      = "export.retry_attempts"
+	ConfigKeyRetryBackoffMS     = "export.retry_backoff_ms"
+	ConfigKeySkipEncodingErrors = "export.skip_encoding_errors"
+	ConfigKeyWriteManifest      = "export.write_manifest"
+	ConfigKeyAutoExportPolicy   = "auto_export.error_policy"
+)
+
+// Default values
+const (
+	DefaultErrorPolicy        = PolicyStrict
+	DefaultRetryAttempts      = 3
+	DefaultRetryBackoffMS     = 100
+	DefaultSkipEncodingErrors = false
+	DefaultWriteManifest      = false
+	DefaultAutoExportPolicy   = PolicyBestEffort
+)
+
+// Config holds export error handling configuration
+type Config struct {
+	Policy              ErrorPolicy
+	RetryAttempts       int
+	RetryBackoffMS      int
+	SkipEncodingErrors  bool
+	WriteManifest       bool
+	IsAutoExport        bool // If true, may use different policy
+}
+
+// Manifest tracks export completeness and failures
+type Manifest struct {
+	ExportedCount  int           `json:"exported_count"`
+	FailedIssues   []FailedIssue `json:"failed_issues,omitempty"`
+	PartialData    []string      `json:"partial_data,omitempty"` // e.g., ["labels", "comments"]
+	Warnings       []string      `json:"warnings,omitempty"`
+	Complete       bool          `json:"complete"`
+	ExportedAt     time.Time     `json:"exported_at"`
+	ErrorPolicy    string        `json:"error_policy"`
+}
+
+// FailedIssue tracks a single issue that failed to export
+type FailedIssue struct {
+	IssueID     string   `json:"issue_id"`
+	Reason      string   `json:"reason"`
+	MissingData []string `json:"missing_data,omitempty"` // e.g., ["labels", "comments"]
+}
+
+// RetryWithBackoff wraps a function with retry logic
+func RetryWithBackoff(ctx context.Context, attempts int, initialBackoffMS int, desc string, fn func() error) error {
+	if attempts < 1 {
+		attempts = 1
+	}
+
+	var lastErr error
+	backoff := time.Duration(initialBackoffMS) * time.Millisecond
+
+	for attempt := 1; attempt <= attempts; attempt++ {
+		err := fn()
+		if err == nil {
+			return nil
+		}
+		lastErr = err
+
+		// Don't retry on context cancellation
+		if ctx.Err() != nil {
+			return ctx.Err()
+		}
+
+		// Don't wait after last attempt
+		if attempt == attempts {
+			break
+		}
+
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		case <-time.After(backoff):
+			backoff *= 2 // Exponential backoff
+		}
+	}
+
+	if attempts > 1 {
+		return fmt.Errorf("%s failed after %d attempts: %w", desc, attempts, lastErr)
+	}
+	return lastErr
+}
+
+// IsValid checks if the policy is a valid value
+func (p ErrorPolicy) IsValid() bool {
+	switch p {
+	case PolicyStrict, PolicyBestEffort, PolicyPartial, PolicyRequiredCore:
+		return true
+	default:
+		return false
+	}
+}
+
+// String implements fmt.Stringer
+func (p ErrorPolicy) String() string {
+	return string(p)
+}
--- a/internal/export/policy_test.go
+++ b/internal/export/policy_test.go
@@ -0,0 +1,176 @@
+package export
+
+import (
+	"context"
+	"errors"
+	"testing"
+	"time"
+)
+
+func TestRetryWithBackoff(t *testing.T) {
+	ctx := context.Background()
+
+	t.Run("succeeds first try", func(t *testing.T) {
+		attempts := 0
+		err := RetryWithBackoff(ctx, 3, 100, "test", func() error {
+			attempts++
+			return nil
+		})
+		if err != nil {
+			t.Errorf("expected no error, got %v", err)
+		}
+		if attempts != 1 {
+			t.Errorf("expected 1 attempt, got %d", attempts)
+		}
+	})
+
+	t.Run("succeeds after retries", func(t *testing.T) {
+		attempts := 0
+		err := RetryWithBackoff(ctx, 3, 10, "test", func() error {
+			attempts++
+			if attempts < 3 {
+				return errors.New("transient error")
+			}
+			return nil
+		})
+		if err != nil {
+			t.Errorf("expected no error, got %v", err)
+		}
+		if attempts != 3 {
+			t.Errorf("expected 3 attempts, got %d", attempts)
+		}
+	})
+
+	t.Run("fails after max retries", func(t *testing.T) {
+		attempts := 0
+		err := RetryWithBackoff(ctx, 3, 10, "test", func() error {
+			attempts++
+			return errors.New("persistent error")
+		})
+		if err == nil {
+			t.Error("expected error, got nil")
+		}
+		if attempts != 3 {
+			t.Errorf("expected 3 attempts, got %d", attempts)
+		}
+	})
+
+	t.Run("respects context cancellation", func(t *testing.T) {
+		ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
+		defer cancel()
+
+		attempts := 0
+		err := RetryWithBackoff(ctx, 10, 100, "test", func() error {
+			attempts++
+			return errors.New("error")
+		})
+		if err != context.DeadlineExceeded {
+			t.Errorf("expected DeadlineExceeded, got %v", err)
+		}
+		// Should stop before reaching max retries due to timeout
+		if attempts >= 10 {
+			t.Errorf("expected fewer than 10 attempts due to timeout, got %d", attempts)
+		}
+	})
+}
+
+func TestErrorPolicy(t *testing.T) {
+	tests := []struct {
+		name  string
+		policy ErrorPolicy
+		valid bool
+	}{
+		{"strict", PolicyStrict, true},
+		{"best-effort", PolicyBestEffort, true},
+		{"partial", PolicyPartial, true},
+		{"required-core", PolicyRequiredCore, true},
+		{"invalid", ErrorPolicy("invalid"), false},
+		{"empty", ErrorPolicy(""), false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := tt.policy.IsValid(); got != tt.valid {
+				t.Errorf("IsValid() = %v, want %v", got, tt.valid)
+			}
+		})
+	}
+}
+
+func TestFetchWithPolicy(t *testing.T) {
+	ctx := context.Background()
+
+	t.Run("strict policy fails fast", func(t *testing.T) {
+		cfg := &Config{
+			Policy:         PolicyStrict,
+			RetryAttempts:  1,
+			RetryBackoffMS: 10,
+		}
+		result := FetchWithPolicy(ctx, cfg, DataTypeCore, "test", func() error {
+			return errors.New("test error")
+		})
+		if result.Err == nil {
+			t.Error("expected error, got nil")
+		}
+		if result.Success {
+			t.Error("expected Success=false")
+		}
+	})
+
+	t.Run("best-effort policy skips errors", func(t *testing.T) {
+		cfg := &Config{
+			Policy:         PolicyBestEffort,
+			RetryAttempts:  1,
+			RetryBackoffMS: 10,
+		}
+		result := FetchWithPolicy(ctx, cfg, DataTypeLabels, "test", func() error {
+			return errors.New("test error")
+		})
+		if result.Err != nil {
+			t.Errorf("expected no error in best-effort, got %v", result.Err)
+		}
+		if result.Success {
+			t.Error("expected Success=false")
+		}
+		if len(result.Warnings) == 0 {
+			t.Error("expected warnings")
+		}
+	})
+
+	t.Run("required-core fails on core data", func(t *testing.T) {
+		cfg := &Config{
+			Policy:         PolicyRequiredCore,
+			RetryAttempts:  1,
+			RetryBackoffMS: 10,
+		}
+		result := FetchWithPolicy(ctx, cfg, DataTypeCore, "test", func() error {
+			return errors.New("test error")
+		})
+		if result.Err == nil {
+			t.Error("expected error for core data, got nil")
+		}
+		if result.Success {
+			t.Error("expected Success=false")
+		}
+	})
+
+	t.Run("required-core skips enrichment errors", func(t *testing.T) {
+		cfg := &Config{
+			Policy:         PolicyRequiredCore,
+			RetryAttempts:  1,
+			RetryBackoffMS: 10,
+		}
+		result := FetchWithPolicy(ctx, cfg, DataTypeLabels, "test", func() error {
+			return errors.New("test error")
+		})
+		if result.Err != nil {
+			t.Errorf("expected no error for enrichment, got %v", result.Err)
+		}
+		if result.Success {
+			t.Error("expected Success=false")
+		}
+		if len(result.Warnings) == 0 {
+			t.Error("expected warnings")
+		}
+	})
+}