beads/internal/testutil/fixtures/fixtures.go

// Package fixtures provides realistic test data generation for benchmarks and tests.
package fixtures

import (
	"context"
	"encoding/json"
	"fmt"
	"math/rand"
	"os"
	"path/filepath"
	"strings"
	"time"

	"github.com/steveyegge/beads/internal/storage"
	"github.com/steveyegge/beads/internal/types"
)

// labels used across all fixtures
var commonLabels = []string{
	"backend",
	"frontend",
	"urgent",
	"tech-debt",
	"documentation",
	"performance",
	"security",
	"ux",
	"api",
	"database",
}

// assignees used across all fixtures
var commonAssignees = []string{
	"alice",
	"bob",
	"charlie",
	"diana",
	"eve",
	"frank",
}

// epic titles for realistic data
var epicTitles = []string{
	"User Authentication System",
	"Payment Processing Integration",
	"Mobile App Redesign",
	"Performance Optimization",
	"API v2 Migration",
	"Search Functionality Enhancement",
	"Analytics Dashboard",
	"Multi-tenant Support",
	"Notification System",
	"Data Export Feature",
}

// feature titles (under epics)
var featureTitles = []string{
	"OAuth2 Integration",
	"Password Reset Flow",
	"Two-Factor Authentication",
	"Session Management",
	"API Endpoints",
	"Database Schema",
	"UI Components",
	"Background Jobs",
	"Error Handling",
	"Testing Infrastructure",
}

// task titles (under features)
var taskTitles = []string{
	"Implement login endpoint",
	"Add validation logic",
	"Write unit tests",
	"Update documentation",
	"Fix memory leak",
	"Optimize query performance",
	"Add error logging",
	"Refactor helper functions",
	"Update database migrations",
	"Configure deployment",
}

// Fixture size rationale:
// We only provide Large (10K) and XLarge (20K) fixtures because:
// - Performance characteristics only emerge at scale (10K+ issues)
// - Smaller fixtures don't provide meaningful optimization insights
// - Code weight matters; we avoid unused complexity
// - Target use case: repositories with thousands of issues

// DataConfig controls the distribution and characteristics of generated test data
type DataConfig struct {
	TotalIssues      int     // total number of issues to generate
	EpicRatio        float64 // percentage of issues that are epics (e.g., 0.1 for 10%)
	FeatureRatio     float64 // percentage of issues that are features (e.g., 0.3 for 30%)
	OpenRatio        float64 // percentage of issues that are open (e.g., 0.5 for 50%)
	CrossLinkRatio   float64 // percentage of tasks with cross-epic blocking dependencies (e.g., 0.2 for 20%)
	MaxEpicAgeDays   int     // maximum age in days for epics (e.g., 180)
	MaxFeatureAgeDays int    // maximum age in days for features (e.g., 150)
	MaxTaskAgeDays   int     // maximum age in days for tasks (e.g., 120)
	MaxClosedAgeDays int     // maximum days since closure (e.g., 30)
	RandSeed         int64   // random seed for reproducibility
}

// DefaultLargeConfig returns configuration for 10K issue dataset
func DefaultLargeConfig() DataConfig {
	return DataConfig{
		TotalIssues:      10000,
		EpicRatio:        0.1,
		FeatureRatio:     0.3,
		OpenRatio:        0.5,
		CrossLinkRatio:   0.2,
		MaxEpicAgeDays:   180,
		MaxFeatureAgeDays: 150,
		MaxTaskAgeDays:   120,
		MaxClosedAgeDays: 30,
		RandSeed:         42,
	}
}

// DefaultXLargeConfig returns configuration for 20K issue dataset
func DefaultXLargeConfig() DataConfig {
	return DataConfig{
		TotalIssues:      20000,
		EpicRatio:        0.1,
		FeatureRatio:     0.3,
		OpenRatio:        0.5,
		CrossLinkRatio:   0.2,
		MaxEpicAgeDays:   180,
		MaxFeatureAgeDays: 150,
		MaxTaskAgeDays:   120,
		MaxClosedAgeDays: 30,
		RandSeed:         43,
	}
}

// LargeSQLite creates a 10K issue database with realistic patterns
func LargeSQLite(ctx context.Context, store storage.Storage) error {
	cfg := DefaultLargeConfig()
	return generateIssuesWithConfig(ctx, store, cfg)
}

// XLargeSQLite creates a 20K issue database with realistic patterns
func XLargeSQLite(ctx context.Context, store storage.Storage) error {
	cfg := DefaultXLargeConfig()
	return generateIssuesWithConfig(ctx, store, cfg)
}

// LargeFromJSONL creates a 10K issue database by exporting to JSONL and reimporting
func LargeFromJSONL(ctx context.Context, store storage.Storage, tempDir string) error {
	cfg := DefaultLargeConfig()
	cfg.RandSeed = 44 // different seed for JSONL path
	return generateFromJSONL(ctx, store, tempDir, cfg)
}

// XLargeFromJSONL creates a 20K issue database by exporting to JSONL and reimporting
func XLargeFromJSONL(ctx context.Context, store storage.Storage, tempDir string) error {
	cfg := DefaultXLargeConfig()
	cfg.RandSeed = 45 // different seed for JSONL path
	return generateFromJSONL(ctx, store, tempDir, cfg)
}

// generateIssuesWithConfig creates issues with realistic epic hierarchies and cross-links using provided configuration
func generateIssuesWithConfig(ctx context.Context, store storage.Storage, cfg DataConfig) error {
	rng := rand.New(rand.NewSource(cfg.RandSeed))

	// Calculate breakdown using configuration ratios
	numEpics := int(float64(cfg.TotalIssues) * cfg.EpicRatio)
	numFeatures := int(float64(cfg.TotalIssues) * cfg.FeatureRatio)
	numTasks := cfg.TotalIssues - numEpics - numFeatures

	// Track created issues for cross-linking
	var allIssues []*types.Issue
	epicIssues := make([]*types.Issue, 0, numEpics)
	featureIssues := make([]*types.Issue, 0, numFeatures)
	taskIssues := make([]*types.Issue, 0, numTasks)

	// Progress tracking
	createdIssues := 0
	lastPctLogged := -1

	logProgress := func() {
		pct := (createdIssues * 100) / cfg.TotalIssues
		if pct >= lastPctLogged+10 {
			fmt.Printf("  Progress: %d%% (%d/%d issues created)\n", pct, createdIssues, cfg.TotalIssues)
			lastPctLogged = pct
		}
	}

	// Create epics
	for i := 0; i < numEpics; i++ {
		issue := &types.Issue{
			Title:       fmt.Sprintf("%s (Epic %d)", epicTitles[i%len(epicTitles)], i),
			Description: fmt.Sprintf("Epic for %s", epicTitles[i%len(epicTitles)]),
			Status:      randomStatus(rng, cfg.OpenRatio),
			Priority:    randomPriority(rng),
			IssueType:   types.TypeEpic,
			Assignee:    commonAssignees[rng.Intn(len(commonAssignees))],
			CreatedAt:   randomTime(rng, cfg.MaxEpicAgeDays),
			UpdatedAt:   time.Now(),
		}

		if issue.Status == types.StatusClosed {
			closedAt := randomTime(rng, cfg.MaxClosedAgeDays)
			issue.ClosedAt = &closedAt
		}

		if err := store.CreateIssue(ctx, issue, "fixture"); err != nil {
			return fmt.Errorf("failed to create epic: %w", err)
		}

		// Add labels to epics
		for j := 0; j < rng.Intn(3)+1; j++ {
			label := commonLabels[rng.Intn(len(commonLabels))]
			_ = store.AddLabel(ctx, issue.ID, label, "fixture")
		}

		epicIssues = append(epicIssues, issue)
		allIssues = append(allIssues, issue)
		createdIssues++
		logProgress()
	}

	// Create features under epics
	for i := 0; i < numFeatures; i++ {
		parentEpic := epicIssues[i%len(epicIssues)]

		issue := &types.Issue{
			Title:       fmt.Sprintf("%s (Feature %d)", featureTitles[i%len(featureTitles)], i),
			Description: fmt.Sprintf("Feature under %s", parentEpic.Title),
			Status:      randomStatus(rng, cfg.OpenRatio),
			Priority:    randomPriority(rng),
			IssueType:   types.TypeFeature,
			Assignee:    commonAssignees[rng.Intn(len(commonAssignees))],
			CreatedAt:   randomTime(rng, cfg.MaxFeatureAgeDays),
			UpdatedAt:   time.Now(),
		}

		if issue.Status == types.StatusClosed {
			closedAt := randomTime(rng, cfg.MaxClosedAgeDays)
			issue.ClosedAt = &closedAt
		}

		if err := store.CreateIssue(ctx, issue, "fixture"); err != nil {
			return fmt.Errorf("failed to create feature: %w", err)
		}

		// Add parent-child dependency to epic
		dep := &types.Dependency{
			IssueID:     issue.ID,
			DependsOnID: parentEpic.ID,
			Type:        types.DepParentChild,
			CreatedAt:   time.Now(),
			CreatedBy:   "fixture",
		}
		if err := store.AddDependency(ctx, dep, "fixture"); err != nil {
			return fmt.Errorf("failed to add feature-epic dependency: %w", err)
		}

		// Add labels
		for j := 0; j < rng.Intn(3)+1; j++ {
			label := commonLabels[rng.Intn(len(commonLabels))]
			_ = store.AddLabel(ctx, issue.ID, label, "fixture")
		}

		featureIssues = append(featureIssues, issue)
		allIssues = append(allIssues, issue)
		createdIssues++
		logProgress()
	}

	// Create tasks under features
	for i := 0; i < numTasks; i++ {
		parentFeature := featureIssues[i%len(featureIssues)]

		issue := &types.Issue{
			Title:       fmt.Sprintf("%s (Task %d)", taskTitles[i%len(taskTitles)], i),
			Description: fmt.Sprintf("Task under %s", parentFeature.Title),
			Status:      randomStatus(rng, cfg.OpenRatio),
			Priority:    randomPriority(rng),
			IssueType:   types.TypeTask,
			Assignee:    commonAssignees[rng.Intn(len(commonAssignees))],
			CreatedAt:   randomTime(rng, cfg.MaxTaskAgeDays),
			UpdatedAt:   time.Now(),
		}

		if issue.Status == types.StatusClosed {
			closedAt := randomTime(rng, cfg.MaxClosedAgeDays)
			issue.ClosedAt = &closedAt
		}

		if err := store.CreateIssue(ctx, issue, "fixture"); err != nil {
			return fmt.Errorf("failed to create task: %w", err)
		}

		// Add parent-child dependency to feature
		dep := &types.Dependency{
			IssueID:     issue.ID,
			DependsOnID: parentFeature.ID,
			Type:        types.DepParentChild,
			CreatedAt:   time.Now(),
			CreatedBy:   "fixture",
		}
		if err := store.AddDependency(ctx, dep, "fixture"); err != nil {
			return fmt.Errorf("failed to add task-feature dependency: %w", err)
		}

		// Add labels
		for j := 0; j < rng.Intn(2)+1; j++ {
			label := commonLabels[rng.Intn(len(commonLabels))]
			_ = store.AddLabel(ctx, issue.ID, label, "fixture")
		}

		taskIssues = append(taskIssues, issue)
		allIssues = append(allIssues, issue)
		createdIssues++
		logProgress()
	}

	fmt.Printf("  Progress: 100%% (%d/%d issues created) - Complete!\n", cfg.TotalIssues, cfg.TotalIssues)

	// Add cross-links between tasks across epics using configured ratio
	numCrossLinks := int(float64(numTasks) * cfg.CrossLinkRatio)
	for i := 0; i < numCrossLinks; i++ {
		fromTask := taskIssues[rng.Intn(len(taskIssues))]
		toTask := taskIssues[rng.Intn(len(taskIssues))]

		// Avoid self-dependencies
		if fromTask.ID == toTask.ID {
			continue
		}

		dep := &types.Dependency{
			IssueID:     fromTask.ID,
			DependsOnID: toTask.ID,
			Type:        types.DepBlocks,
			CreatedAt:   time.Now(),
			CreatedBy:   "fixture",
		}

		// Ignore cycle errors for cross-links (they're expected)
		_ = store.AddDependency(ctx, dep, "fixture")
	}

	return nil
}

// generateFromJSONL creates issues, exports to JSONL, clears DB, and reimports
func generateFromJSONL(ctx context.Context, store storage.Storage, tempDir string, cfg DataConfig) error {
	// First generate issues normally
	if err := generateIssuesWithConfig(ctx, store, cfg); err != nil {
		return fmt.Errorf("failed to generate issues: %w", err)
	}

	// Export to JSONL
	jsonlPath := filepath.Join(tempDir, "issues.jsonl")
	if err := exportToJSONL(ctx, store, jsonlPath); err != nil {
		return fmt.Errorf("failed to export to JSONL: %w", err)
	}

	// Clear all issues (we'll reimport them)
	allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
	if err != nil {
		return fmt.Errorf("failed to get all issues: %w", err)
	}

	for _, issue := range allIssues {
		if err := store.DeleteIssue(ctx, issue.ID); err != nil {
			return fmt.Errorf("failed to delete issue %s: %w", issue.ID, err)
		}
	}

	// Import from JSONL
	if err := importFromJSONL(ctx, store, jsonlPath); err != nil {
		return fmt.Errorf("failed to import from JSONL: %w", err)
	}

	return nil
}

// exportToJSONL exports all issues to a JSONL file
func exportToJSONL(ctx context.Context, store storage.Storage, path string) error {
	// Get all issues
	allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
	if err != nil {
		return fmt.Errorf("failed to query issues: %w", err)
	}

	// Populate dependencies and labels for each issue
	allDeps, err := store.GetAllDependencyRecords(ctx)
	if err != nil {
		return fmt.Errorf("failed to get dependencies: %w", err)
	}

	for _, issue := range allIssues {
		issue.Dependencies = allDeps[issue.ID]

		labels, err := store.GetLabels(ctx, issue.ID)
		if err != nil {
			return fmt.Errorf("failed to get labels for %s: %w", issue.ID, err)
		}
		issue.Labels = labels
	}

	// Write to JSONL file
	f, err := os.Create(path)
	if err != nil {
		return fmt.Errorf("failed to create JSONL file: %w", err)
	}
	defer f.Close()

	encoder := json.NewEncoder(f)
	for _, issue := range allIssues {
		if err := encoder.Encode(issue); err != nil {
			return fmt.Errorf("failed to encode issue: %w", err)
		}
	}

	return nil
}

// importFromJSONL imports issues from a JSONL file
func importFromJSONL(ctx context.Context, store storage.Storage, path string) error {
	// Read JSONL file
	data, err := os.ReadFile(path)
	if err != nil {
		return fmt.Errorf("failed to read JSONL file: %w", err)
	}

	// Parse issues
	var issues []*types.Issue
	lines := string(data)
	for i, line := range splitLines(lines) {
		if len(line) == 0 {
			continue
		}

		var issue types.Issue
		if err := json.Unmarshal([]byte(line), &issue); err != nil {
			return fmt.Errorf("failed to parse issue at line %d: %w", i+1, err)
		}

		issues = append(issues, &issue)
	}

	// Import issues directly using storage interface
	// Step 1: Create all issues first (without dependencies/labels)
	type savedMetadata struct {
		deps   []*types.Dependency
		labels []string
	}
	metadata := make(map[string]savedMetadata)

	for _, issue := range issues {
		// Save dependencies and labels for later
		metadata[issue.ID] = savedMetadata{
			deps:   issue.Dependencies,
			labels: issue.Labels,
		}
		issue.Dependencies = nil
		issue.Labels = nil

		if err := store.CreateIssue(ctx, issue, "fixture"); err != nil {
			// Ignore duplicate errors
			if !strings.Contains(err.Error(), "UNIQUE constraint failed") {
				return fmt.Errorf("failed to create issue %s: %w", issue.ID, err)
			}
		}
	}

	// Step 2: Add all dependencies (now that all issues exist)
	for issueID, meta := range metadata {
		for _, dep := range meta.deps {
			if err := store.AddDependency(ctx, dep, "fixture"); err != nil {
				// Ignore duplicate and cycle errors
				if !strings.Contains(err.Error(), "already exists") &&
					!strings.Contains(err.Error(), "cycle") {
					return fmt.Errorf("failed to add dependency for %s: %w", issueID, err)
				}
			}
		}

		// Add labels
		for _, label := range meta.labels {
			_ = store.AddLabel(ctx, issueID, label, "fixture")
		}
	}

	return nil
}

// splitLines splits a string by newlines
func splitLines(s string) []string {
	var lines []string
	start := 0
	for i := 0; i < len(s); i++ {
		if s[i] == '\n' {
			lines = append(lines, s[start:i])
			start = i + 1
		}
	}
	if start < len(s) {
		lines = append(lines, s[start:])
	}
	return lines
}

// randomStatus returns a random status with given open ratio
func randomStatus(rng *rand.Rand, openRatio float64) types.Status {
	r := rng.Float64()
	if r < openRatio {
		// Open statuses: open, in_progress, blocked
		statuses := []types.Status{types.StatusOpen, types.StatusInProgress, types.StatusBlocked}
		return statuses[rng.Intn(len(statuses))]
	}
	return types.StatusClosed
}

// randomPriority returns a random priority with realistic distribution
// P0: 5%, P1: 15%, P2: 50%, P3: 25%, P4: 5%
func randomPriority(rng *rand.Rand) int {
	r := rng.Intn(100)
	switch {
	case r < 5:
		return 0
	case r < 20:
		return 1
	case r < 70:
		return 2
	case r < 95:
		return 3
	default:
		return 4
	}
}

// randomTime returns a random time up to maxDaysAgo days in the past
func randomTime(rng *rand.Rand, maxDaysAgo int) time.Time {
	daysAgo := rng.Intn(maxDaysAgo)
	return time.Now().Add(-time.Duration(daysAgo) * 24 * time.Hour)
}