Files
beads/internal/testutil/fixtures/fixtures.go
2025-11-17 10:12:46 -07:00

544 lines
16 KiB
Go

// Package fixtures provides realistic test data generation for benchmarks and tests.
package fixtures
import (
"context"
"encoding/json"
"fmt"
"math/rand"
"os"
"path/filepath"
"strings"
"time"
"github.com/steveyegge/beads/internal/storage"
"github.com/steveyegge/beads/internal/types"
)
// labels used across all fixtures
var commonLabels = []string{
"backend",
"frontend",
"urgent",
"tech-debt",
"documentation",
"performance",
"security",
"ux",
"api",
"database",
}
// assignees used across all fixtures
var commonAssignees = []string{
"alice",
"bob",
"charlie",
"diana",
"eve",
"frank",
}
// epic titles for realistic data
var epicTitles = []string{
"User Authentication System",
"Payment Processing Integration",
"Mobile App Redesign",
"Performance Optimization",
"API v2 Migration",
"Search Functionality Enhancement",
"Analytics Dashboard",
"Multi-tenant Support",
"Notification System",
"Data Export Feature",
}
// feature titles (under epics)
var featureTitles = []string{
"OAuth2 Integration",
"Password Reset Flow",
"Two-Factor Authentication",
"Session Management",
"API Endpoints",
"Database Schema",
"UI Components",
"Background Jobs",
"Error Handling",
"Testing Infrastructure",
}
// task titles (under features)
var taskTitles = []string{
"Implement login endpoint",
"Add validation logic",
"Write unit tests",
"Update documentation",
"Fix memory leak",
"Optimize query performance",
"Add error logging",
"Refactor helper functions",
"Update database migrations",
"Configure deployment",
}
// Fixture size rationale:
// We only provide Large (10K) and XLarge (20K) fixtures because:
// - Performance characteristics only emerge at scale (10K+ issues)
// - Smaller fixtures don't provide meaningful optimization insights
// - Code weight matters; we avoid unused complexity
// - Target use case: repositories with thousands of issues
// DataConfig controls the distribution and characteristics of generated test data
type DataConfig struct {
TotalIssues int // total number of issues to generate
EpicRatio float64 // percentage of issues that are epics (e.g., 0.1 for 10%)
FeatureRatio float64 // percentage of issues that are features (e.g., 0.3 for 30%)
OpenRatio float64 // percentage of issues that are open (e.g., 0.5 for 50%)
CrossLinkRatio float64 // percentage of tasks with cross-epic blocking dependencies (e.g., 0.2 for 20%)
MaxEpicAgeDays int // maximum age in days for epics (e.g., 180)
MaxFeatureAgeDays int // maximum age in days for features (e.g., 150)
MaxTaskAgeDays int // maximum age in days for tasks (e.g., 120)
MaxClosedAgeDays int // maximum days since closure (e.g., 30)
RandSeed int64 // random seed for reproducibility
}
// DefaultLargeConfig returns configuration for 10K issue dataset
func DefaultLargeConfig() DataConfig {
return DataConfig{
TotalIssues: 10000,
EpicRatio: 0.1,
FeatureRatio: 0.3,
OpenRatio: 0.5,
CrossLinkRatio: 0.2,
MaxEpicAgeDays: 180,
MaxFeatureAgeDays: 150,
MaxTaskAgeDays: 120,
MaxClosedAgeDays: 30,
RandSeed: 42,
}
}
// DefaultXLargeConfig returns configuration for 20K issue dataset
func DefaultXLargeConfig() DataConfig {
return DataConfig{
TotalIssues: 20000,
EpicRatio: 0.1,
FeatureRatio: 0.3,
OpenRatio: 0.5,
CrossLinkRatio: 0.2,
MaxEpicAgeDays: 180,
MaxFeatureAgeDays: 150,
MaxTaskAgeDays: 120,
MaxClosedAgeDays: 30,
RandSeed: 43,
}
}
// LargeSQLite creates a 10K issue database with realistic patterns
func LargeSQLite(ctx context.Context, store storage.Storage) error {
cfg := DefaultLargeConfig()
return generateIssuesWithConfig(ctx, store, cfg)
}
// XLargeSQLite creates a 20K issue database with realistic patterns
func XLargeSQLite(ctx context.Context, store storage.Storage) error {
cfg := DefaultXLargeConfig()
return generateIssuesWithConfig(ctx, store, cfg)
}
// LargeFromJSONL creates a 10K issue database by exporting to JSONL and reimporting
func LargeFromJSONL(ctx context.Context, store storage.Storage, tempDir string) error {
cfg := DefaultLargeConfig()
cfg.RandSeed = 44 // different seed for JSONL path
return generateFromJSONL(ctx, store, tempDir, cfg)
}
// XLargeFromJSONL creates a 20K issue database by exporting to JSONL and reimporting
func XLargeFromJSONL(ctx context.Context, store storage.Storage, tempDir string) error {
cfg := DefaultXLargeConfig()
cfg.RandSeed = 45 // different seed for JSONL path
return generateFromJSONL(ctx, store, tempDir, cfg)
}
// generateIssuesWithConfig creates issues with realistic epic hierarchies and cross-links using provided configuration
func generateIssuesWithConfig(ctx context.Context, store storage.Storage, cfg DataConfig) error {
rng := rand.New(rand.NewSource(cfg.RandSeed)) // #nosec G404 -- deterministic math/rand used for repeatable fixture data
// Calculate breakdown using configuration ratios
numEpics := int(float64(cfg.TotalIssues) * cfg.EpicRatio)
numFeatures := int(float64(cfg.TotalIssues) * cfg.FeatureRatio)
numTasks := cfg.TotalIssues - numEpics - numFeatures
// Track created issues for cross-linking
var allIssues []*types.Issue
epicIssues := make([]*types.Issue, 0, numEpics)
featureIssues := make([]*types.Issue, 0, numFeatures)
taskIssues := make([]*types.Issue, 0, numTasks)
// Progress tracking
createdIssues := 0
lastPctLogged := -1
logProgress := func() {
pct := (createdIssues * 100) / cfg.TotalIssues
if pct >= lastPctLogged+10 {
fmt.Printf(" Progress: %d%% (%d/%d issues created)\n", pct, createdIssues, cfg.TotalIssues)
lastPctLogged = pct
}
}
// Create epics
for i := 0; i < numEpics; i++ {
issue := &types.Issue{
Title: fmt.Sprintf("%s (Epic %d)", epicTitles[i%len(epicTitles)], i),
Description: fmt.Sprintf("Epic for %s", epicTitles[i%len(epicTitles)]),
Status: randomStatus(rng, cfg.OpenRatio),
Priority: randomPriority(rng),
IssueType: types.TypeEpic,
Assignee: commonAssignees[rng.Intn(len(commonAssignees))],
CreatedAt: randomTime(rng, cfg.MaxEpicAgeDays),
UpdatedAt: time.Now(),
}
if issue.Status == types.StatusClosed {
closedAt := randomTime(rng, cfg.MaxClosedAgeDays)
issue.ClosedAt = &closedAt
}
if err := store.CreateIssue(ctx, issue, "fixture"); err != nil {
return fmt.Errorf("failed to create epic: %w", err)
}
// Add labels to epics
for j := 0; j < rng.Intn(3)+1; j++ {
label := commonLabels[rng.Intn(len(commonLabels))]
_ = store.AddLabel(ctx, issue.ID, label, "fixture")
}
epicIssues = append(epicIssues, issue)
allIssues = append(allIssues, issue)
createdIssues++
logProgress()
}
// Create features under epics
for i := 0; i < numFeatures; i++ {
parentEpic := epicIssues[i%len(epicIssues)]
issue := &types.Issue{
Title: fmt.Sprintf("%s (Feature %d)", featureTitles[i%len(featureTitles)], i),
Description: fmt.Sprintf("Feature under %s", parentEpic.Title),
Status: randomStatus(rng, cfg.OpenRatio),
Priority: randomPriority(rng),
IssueType: types.TypeFeature,
Assignee: commonAssignees[rng.Intn(len(commonAssignees))],
CreatedAt: randomTime(rng, cfg.MaxFeatureAgeDays),
UpdatedAt: time.Now(),
}
if issue.Status == types.StatusClosed {
closedAt := randomTime(rng, cfg.MaxClosedAgeDays)
issue.ClosedAt = &closedAt
}
if err := store.CreateIssue(ctx, issue, "fixture"); err != nil {
return fmt.Errorf("failed to create feature: %w", err)
}
// Add parent-child dependency to epic
dep := &types.Dependency{
IssueID: issue.ID,
DependsOnID: parentEpic.ID,
Type: types.DepParentChild,
CreatedAt: time.Now(),
CreatedBy: "fixture",
}
if err := store.AddDependency(ctx, dep, "fixture"); err != nil {
return fmt.Errorf("failed to add feature-epic dependency: %w", err)
}
// Add labels
for j := 0; j < rng.Intn(3)+1; j++ {
label := commonLabels[rng.Intn(len(commonLabels))]
_ = store.AddLabel(ctx, issue.ID, label, "fixture")
}
featureIssues = append(featureIssues, issue)
allIssues = append(allIssues, issue)
createdIssues++
logProgress()
}
// Create tasks under features
for i := 0; i < numTasks; i++ {
parentFeature := featureIssues[i%len(featureIssues)]
issue := &types.Issue{
Title: fmt.Sprintf("%s (Task %d)", taskTitles[i%len(taskTitles)], i),
Description: fmt.Sprintf("Task under %s", parentFeature.Title),
Status: randomStatus(rng, cfg.OpenRatio),
Priority: randomPriority(rng),
IssueType: types.TypeTask,
Assignee: commonAssignees[rng.Intn(len(commonAssignees))],
CreatedAt: randomTime(rng, cfg.MaxTaskAgeDays),
UpdatedAt: time.Now(),
}
if issue.Status == types.StatusClosed {
closedAt := randomTime(rng, cfg.MaxClosedAgeDays)
issue.ClosedAt = &closedAt
}
if err := store.CreateIssue(ctx, issue, "fixture"); err != nil {
return fmt.Errorf("failed to create task: %w", err)
}
// Add parent-child dependency to feature
dep := &types.Dependency{
IssueID: issue.ID,
DependsOnID: parentFeature.ID,
Type: types.DepParentChild,
CreatedAt: time.Now(),
CreatedBy: "fixture",
}
if err := store.AddDependency(ctx, dep, "fixture"); err != nil {
return fmt.Errorf("failed to add task-feature dependency: %w", err)
}
// Add labels
for j := 0; j < rng.Intn(2)+1; j++ {
label := commonLabels[rng.Intn(len(commonLabels))]
_ = store.AddLabel(ctx, issue.ID, label, "fixture")
}
taskIssues = append(taskIssues, issue)
allIssues = append(allIssues, issue)
createdIssues++
logProgress()
}
fmt.Printf(" Progress: 100%% (%d/%d issues created) - Complete!\n", cfg.TotalIssues, cfg.TotalIssues)
// Add cross-links between tasks across epics using configured ratio
numCrossLinks := int(float64(numTasks) * cfg.CrossLinkRatio)
for i := 0; i < numCrossLinks; i++ {
fromTask := taskIssues[rng.Intn(len(taskIssues))]
toTask := taskIssues[rng.Intn(len(taskIssues))]
// Avoid self-dependencies
if fromTask.ID == toTask.ID {
continue
}
dep := &types.Dependency{
IssueID: fromTask.ID,
DependsOnID: toTask.ID,
Type: types.DepBlocks,
CreatedAt: time.Now(),
CreatedBy: "fixture",
}
// Ignore cycle errors for cross-links (they're expected)
_ = store.AddDependency(ctx, dep, "fixture")
}
return nil
}
// generateFromJSONL creates issues, exports to JSONL, clears DB, and reimports
func generateFromJSONL(ctx context.Context, store storage.Storage, tempDir string, cfg DataConfig) error {
// First generate issues normally
if err := generateIssuesWithConfig(ctx, store, cfg); err != nil {
return fmt.Errorf("failed to generate issues: %w", err)
}
// Export to JSONL
jsonlPath := filepath.Join(tempDir, "issues.jsonl")
if err := exportToJSONL(ctx, store, jsonlPath); err != nil {
return fmt.Errorf("failed to export to JSONL: %w", err)
}
// Clear all issues (we'll reimport them)
allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
if err != nil {
return fmt.Errorf("failed to get all issues: %w", err)
}
for _, issue := range allIssues {
if err := store.DeleteIssue(ctx, issue.ID); err != nil {
return fmt.Errorf("failed to delete issue %s: %w", issue.ID, err)
}
}
// Import from JSONL
if err := importFromJSONL(ctx, store, jsonlPath); err != nil {
return fmt.Errorf("failed to import from JSONL: %w", err)
}
return nil
}
// exportToJSONL exports all issues to a JSONL file
func exportToJSONL(ctx context.Context, store storage.Storage, path string) error {
// Get all issues
allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
if err != nil {
return fmt.Errorf("failed to query issues: %w", err)
}
// Populate dependencies and labels for each issue
allDeps, err := store.GetAllDependencyRecords(ctx)
if err != nil {
return fmt.Errorf("failed to get dependencies: %w", err)
}
for _, issue := range allIssues {
issue.Dependencies = allDeps[issue.ID]
labels, err := store.GetLabels(ctx, issue.ID)
if err != nil {
return fmt.Errorf("failed to get labels for %s: %w", issue.ID, err)
}
issue.Labels = labels
}
// Write to JSONL file
// #nosec G304 -- fixture exports to deterministic file controlled by tests
f, err := os.Create(path)
if err != nil {
return fmt.Errorf("failed to create JSONL file: %w", err)
}
defer f.Close()
encoder := json.NewEncoder(f)
for _, issue := range allIssues {
if err := encoder.Encode(issue); err != nil {
return fmt.Errorf("failed to encode issue: %w", err)
}
}
return nil
}
// importFromJSONL imports issues from a JSONL file
func importFromJSONL(ctx context.Context, store storage.Storage, path string) error {
// Read JSONL file
// #nosec G304 -- fixture imports from deterministic file created earlier in test
data, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("failed to read JSONL file: %w", err)
}
// Parse issues
var issues []*types.Issue
lines := string(data)
for i, line := range splitLines(lines) {
if len(line) == 0 {
continue
}
var issue types.Issue
if err := json.Unmarshal([]byte(line), &issue); err != nil {
return fmt.Errorf("failed to parse issue at line %d: %w", i+1, err)
}
issues = append(issues, &issue)
}
// Import issues directly using storage interface
// Step 1: Create all issues first (without dependencies/labels)
type savedMetadata struct {
deps []*types.Dependency
labels []string
}
metadata := make(map[string]savedMetadata)
for _, issue := range issues {
// Save dependencies and labels for later
metadata[issue.ID] = savedMetadata{
deps: issue.Dependencies,
labels: issue.Labels,
}
issue.Dependencies = nil
issue.Labels = nil
if err := store.CreateIssue(ctx, issue, "fixture"); err != nil {
// Ignore duplicate errors
if !strings.Contains(err.Error(), "UNIQUE constraint failed") {
return fmt.Errorf("failed to create issue %s: %w", issue.ID, err)
}
}
}
// Step 2: Add all dependencies (now that all issues exist)
for issueID, meta := range metadata {
for _, dep := range meta.deps {
if err := store.AddDependency(ctx, dep, "fixture"); err != nil {
// Ignore duplicate and cycle errors
if !strings.Contains(err.Error(), "already exists") &&
!strings.Contains(err.Error(), "cycle") {
return fmt.Errorf("failed to add dependency for %s: %w", issueID, err)
}
}
}
// Add labels
for _, label := range meta.labels {
_ = store.AddLabel(ctx, issueID, label, "fixture")
}
}
return nil
}
// splitLines splits a string by newlines
func splitLines(s string) []string {
var lines []string
start := 0
for i := 0; i < len(s); i++ {
if s[i] == '\n' {
lines = append(lines, s[start:i])
start = i + 1
}
}
if start < len(s) {
lines = append(lines, s[start:])
}
return lines
}
// randomStatus returns a random status with given open ratio
func randomStatus(rng *rand.Rand, openRatio float64) types.Status {
r := rng.Float64()
if r < openRatio {
// Open statuses: open, in_progress, blocked
statuses := []types.Status{types.StatusOpen, types.StatusInProgress, types.StatusBlocked}
return statuses[rng.Intn(len(statuses))]
}
return types.StatusClosed
}
// randomPriority returns a random priority with realistic distribution
// P0: 5%, P1: 15%, P2: 50%, P3: 25%, P4: 5%
func randomPriority(rng *rand.Rand) int {
r := rng.Intn(100)
switch {
case r < 5:
return 0
case r < 20:
return 1
case r < 70:
return 2
case r < 95:
return 3
default:
return 4
}
}
// randomTime returns a random time up to maxDaysAgo days in the past
func randomTime(rng *rand.Rand, maxDaysAgo int) time.Time {
daysAgo := rng.Intn(maxDaysAgo)
return time.Now().Add(-time.Duration(daysAgo) * 24 * time.Hour)
}