544 lines
16 KiB
Go
544 lines
16 KiB
Go
// Package fixtures provides realistic test data generation for benchmarks and tests.
|
|
package fixtures
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"math/rand"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/steveyegge/beads/internal/storage"
|
|
"github.com/steveyegge/beads/internal/types"
|
|
)
|
|
|
|
// labels used across all fixtures
|
|
var commonLabels = []string{
|
|
"backend",
|
|
"frontend",
|
|
"urgent",
|
|
"tech-debt",
|
|
"documentation",
|
|
"performance",
|
|
"security",
|
|
"ux",
|
|
"api",
|
|
"database",
|
|
}
|
|
|
|
// assignees used across all fixtures
|
|
var commonAssignees = []string{
|
|
"alice",
|
|
"bob",
|
|
"charlie",
|
|
"diana",
|
|
"eve",
|
|
"frank",
|
|
}
|
|
|
|
// epic titles for realistic data
|
|
var epicTitles = []string{
|
|
"User Authentication System",
|
|
"Payment Processing Integration",
|
|
"Mobile App Redesign",
|
|
"Performance Optimization",
|
|
"API v2 Migration",
|
|
"Search Functionality Enhancement",
|
|
"Analytics Dashboard",
|
|
"Multi-tenant Support",
|
|
"Notification System",
|
|
"Data Export Feature",
|
|
}
|
|
|
|
// feature titles (under epics)
|
|
var featureTitles = []string{
|
|
"OAuth2 Integration",
|
|
"Password Reset Flow",
|
|
"Two-Factor Authentication",
|
|
"Session Management",
|
|
"API Endpoints",
|
|
"Database Schema",
|
|
"UI Components",
|
|
"Background Jobs",
|
|
"Error Handling",
|
|
"Testing Infrastructure",
|
|
}
|
|
|
|
// task titles (under features)
|
|
var taskTitles = []string{
|
|
"Implement login endpoint",
|
|
"Add validation logic",
|
|
"Write unit tests",
|
|
"Update documentation",
|
|
"Fix memory leak",
|
|
"Optimize query performance",
|
|
"Add error logging",
|
|
"Refactor helper functions",
|
|
"Update database migrations",
|
|
"Configure deployment",
|
|
}
|
|
|
|
// Fixture size rationale:
|
|
// We only provide Large (10K) and XLarge (20K) fixtures because:
|
|
// - Performance characteristics only emerge at scale (10K+ issues)
|
|
// - Smaller fixtures don't provide meaningful optimization insights
|
|
// - Code weight matters; we avoid unused complexity
|
|
// - Target use case: repositories with thousands of issues
|
|
|
|
// DataConfig controls the distribution and characteristics of generated test data
|
|
type DataConfig struct {
|
|
TotalIssues int // total number of issues to generate
|
|
EpicRatio float64 // percentage of issues that are epics (e.g., 0.1 for 10%)
|
|
FeatureRatio float64 // percentage of issues that are features (e.g., 0.3 for 30%)
|
|
OpenRatio float64 // percentage of issues that are open (e.g., 0.5 for 50%)
|
|
CrossLinkRatio float64 // percentage of tasks with cross-epic blocking dependencies (e.g., 0.2 for 20%)
|
|
MaxEpicAgeDays int // maximum age in days for epics (e.g., 180)
|
|
MaxFeatureAgeDays int // maximum age in days for features (e.g., 150)
|
|
MaxTaskAgeDays int // maximum age in days for tasks (e.g., 120)
|
|
MaxClosedAgeDays int // maximum days since closure (e.g., 30)
|
|
RandSeed int64 // random seed for reproducibility
|
|
}
|
|
|
|
// DefaultLargeConfig returns configuration for 10K issue dataset
|
|
func DefaultLargeConfig() DataConfig {
|
|
return DataConfig{
|
|
TotalIssues: 10000,
|
|
EpicRatio: 0.1,
|
|
FeatureRatio: 0.3,
|
|
OpenRatio: 0.5,
|
|
CrossLinkRatio: 0.2,
|
|
MaxEpicAgeDays: 180,
|
|
MaxFeatureAgeDays: 150,
|
|
MaxTaskAgeDays: 120,
|
|
MaxClosedAgeDays: 30,
|
|
RandSeed: 42,
|
|
}
|
|
}
|
|
|
|
// DefaultXLargeConfig returns configuration for 20K issue dataset
|
|
func DefaultXLargeConfig() DataConfig {
|
|
return DataConfig{
|
|
TotalIssues: 20000,
|
|
EpicRatio: 0.1,
|
|
FeatureRatio: 0.3,
|
|
OpenRatio: 0.5,
|
|
CrossLinkRatio: 0.2,
|
|
MaxEpicAgeDays: 180,
|
|
MaxFeatureAgeDays: 150,
|
|
MaxTaskAgeDays: 120,
|
|
MaxClosedAgeDays: 30,
|
|
RandSeed: 43,
|
|
}
|
|
}
|
|
|
|
// LargeSQLite creates a 10K issue database with realistic patterns
|
|
func LargeSQLite(ctx context.Context, store storage.Storage) error {
|
|
cfg := DefaultLargeConfig()
|
|
return generateIssuesWithConfig(ctx, store, cfg)
|
|
}
|
|
|
|
// XLargeSQLite creates a 20K issue database with realistic patterns
|
|
func XLargeSQLite(ctx context.Context, store storage.Storage) error {
|
|
cfg := DefaultXLargeConfig()
|
|
return generateIssuesWithConfig(ctx, store, cfg)
|
|
}
|
|
|
|
// LargeFromJSONL creates a 10K issue database by exporting to JSONL and reimporting
|
|
func LargeFromJSONL(ctx context.Context, store storage.Storage, tempDir string) error {
|
|
cfg := DefaultLargeConfig()
|
|
cfg.RandSeed = 44 // different seed for JSONL path
|
|
return generateFromJSONL(ctx, store, tempDir, cfg)
|
|
}
|
|
|
|
// XLargeFromJSONL creates a 20K issue database by exporting to JSONL and reimporting
|
|
func XLargeFromJSONL(ctx context.Context, store storage.Storage, tempDir string) error {
|
|
cfg := DefaultXLargeConfig()
|
|
cfg.RandSeed = 45 // different seed for JSONL path
|
|
return generateFromJSONL(ctx, store, tempDir, cfg)
|
|
}
|
|
|
|
// generateIssuesWithConfig creates issues with realistic epic hierarchies and cross-links using provided configuration
|
|
func generateIssuesWithConfig(ctx context.Context, store storage.Storage, cfg DataConfig) error {
|
|
rng := rand.New(rand.NewSource(cfg.RandSeed)) // #nosec G404 -- deterministic math/rand used for repeatable fixture data
|
|
|
|
// Calculate breakdown using configuration ratios
|
|
numEpics := int(float64(cfg.TotalIssues) * cfg.EpicRatio)
|
|
numFeatures := int(float64(cfg.TotalIssues) * cfg.FeatureRatio)
|
|
numTasks := cfg.TotalIssues - numEpics - numFeatures
|
|
|
|
// Track created issues for cross-linking
|
|
var allIssues []*types.Issue
|
|
epicIssues := make([]*types.Issue, 0, numEpics)
|
|
featureIssues := make([]*types.Issue, 0, numFeatures)
|
|
taskIssues := make([]*types.Issue, 0, numTasks)
|
|
|
|
// Progress tracking
|
|
createdIssues := 0
|
|
lastPctLogged := -1
|
|
|
|
logProgress := func() {
|
|
pct := (createdIssues * 100) / cfg.TotalIssues
|
|
if pct >= lastPctLogged+10 {
|
|
fmt.Printf(" Progress: %d%% (%d/%d issues created)\n", pct, createdIssues, cfg.TotalIssues)
|
|
lastPctLogged = pct
|
|
}
|
|
}
|
|
|
|
// Create epics
|
|
for i := 0; i < numEpics; i++ {
|
|
issue := &types.Issue{
|
|
Title: fmt.Sprintf("%s (Epic %d)", epicTitles[i%len(epicTitles)], i),
|
|
Description: fmt.Sprintf("Epic for %s", epicTitles[i%len(epicTitles)]),
|
|
Status: randomStatus(rng, cfg.OpenRatio),
|
|
Priority: randomPriority(rng),
|
|
IssueType: types.TypeEpic,
|
|
Assignee: commonAssignees[rng.Intn(len(commonAssignees))],
|
|
CreatedAt: randomTime(rng, cfg.MaxEpicAgeDays),
|
|
UpdatedAt: time.Now(),
|
|
}
|
|
|
|
if issue.Status == types.StatusClosed {
|
|
closedAt := randomTime(rng, cfg.MaxClosedAgeDays)
|
|
issue.ClosedAt = &closedAt
|
|
}
|
|
|
|
if err := store.CreateIssue(ctx, issue, "fixture"); err != nil {
|
|
return fmt.Errorf("failed to create epic: %w", err)
|
|
}
|
|
|
|
// Add labels to epics
|
|
for j := 0; j < rng.Intn(3)+1; j++ {
|
|
label := commonLabels[rng.Intn(len(commonLabels))]
|
|
_ = store.AddLabel(ctx, issue.ID, label, "fixture")
|
|
}
|
|
|
|
epicIssues = append(epicIssues, issue)
|
|
allIssues = append(allIssues, issue)
|
|
createdIssues++
|
|
logProgress()
|
|
}
|
|
|
|
// Create features under epics
|
|
for i := 0; i < numFeatures; i++ {
|
|
parentEpic := epicIssues[i%len(epicIssues)]
|
|
|
|
issue := &types.Issue{
|
|
Title: fmt.Sprintf("%s (Feature %d)", featureTitles[i%len(featureTitles)], i),
|
|
Description: fmt.Sprintf("Feature under %s", parentEpic.Title),
|
|
Status: randomStatus(rng, cfg.OpenRatio),
|
|
Priority: randomPriority(rng),
|
|
IssueType: types.TypeFeature,
|
|
Assignee: commonAssignees[rng.Intn(len(commonAssignees))],
|
|
CreatedAt: randomTime(rng, cfg.MaxFeatureAgeDays),
|
|
UpdatedAt: time.Now(),
|
|
}
|
|
|
|
if issue.Status == types.StatusClosed {
|
|
closedAt := randomTime(rng, cfg.MaxClosedAgeDays)
|
|
issue.ClosedAt = &closedAt
|
|
}
|
|
|
|
if err := store.CreateIssue(ctx, issue, "fixture"); err != nil {
|
|
return fmt.Errorf("failed to create feature: %w", err)
|
|
}
|
|
|
|
// Add parent-child dependency to epic
|
|
dep := &types.Dependency{
|
|
IssueID: issue.ID,
|
|
DependsOnID: parentEpic.ID,
|
|
Type: types.DepParentChild,
|
|
CreatedAt: time.Now(),
|
|
CreatedBy: "fixture",
|
|
}
|
|
if err := store.AddDependency(ctx, dep, "fixture"); err != nil {
|
|
return fmt.Errorf("failed to add feature-epic dependency: %w", err)
|
|
}
|
|
|
|
// Add labels
|
|
for j := 0; j < rng.Intn(3)+1; j++ {
|
|
label := commonLabels[rng.Intn(len(commonLabels))]
|
|
_ = store.AddLabel(ctx, issue.ID, label, "fixture")
|
|
}
|
|
|
|
featureIssues = append(featureIssues, issue)
|
|
allIssues = append(allIssues, issue)
|
|
createdIssues++
|
|
logProgress()
|
|
}
|
|
|
|
// Create tasks under features
|
|
for i := 0; i < numTasks; i++ {
|
|
parentFeature := featureIssues[i%len(featureIssues)]
|
|
|
|
issue := &types.Issue{
|
|
Title: fmt.Sprintf("%s (Task %d)", taskTitles[i%len(taskTitles)], i),
|
|
Description: fmt.Sprintf("Task under %s", parentFeature.Title),
|
|
Status: randomStatus(rng, cfg.OpenRatio),
|
|
Priority: randomPriority(rng),
|
|
IssueType: types.TypeTask,
|
|
Assignee: commonAssignees[rng.Intn(len(commonAssignees))],
|
|
CreatedAt: randomTime(rng, cfg.MaxTaskAgeDays),
|
|
UpdatedAt: time.Now(),
|
|
}
|
|
|
|
if issue.Status == types.StatusClosed {
|
|
closedAt := randomTime(rng, cfg.MaxClosedAgeDays)
|
|
issue.ClosedAt = &closedAt
|
|
}
|
|
|
|
if err := store.CreateIssue(ctx, issue, "fixture"); err != nil {
|
|
return fmt.Errorf("failed to create task: %w", err)
|
|
}
|
|
|
|
// Add parent-child dependency to feature
|
|
dep := &types.Dependency{
|
|
IssueID: issue.ID,
|
|
DependsOnID: parentFeature.ID,
|
|
Type: types.DepParentChild,
|
|
CreatedAt: time.Now(),
|
|
CreatedBy: "fixture",
|
|
}
|
|
if err := store.AddDependency(ctx, dep, "fixture"); err != nil {
|
|
return fmt.Errorf("failed to add task-feature dependency: %w", err)
|
|
}
|
|
|
|
// Add labels
|
|
for j := 0; j < rng.Intn(2)+1; j++ {
|
|
label := commonLabels[rng.Intn(len(commonLabels))]
|
|
_ = store.AddLabel(ctx, issue.ID, label, "fixture")
|
|
}
|
|
|
|
taskIssues = append(taskIssues, issue)
|
|
allIssues = append(allIssues, issue)
|
|
createdIssues++
|
|
logProgress()
|
|
}
|
|
|
|
fmt.Printf(" Progress: 100%% (%d/%d issues created) - Complete!\n", cfg.TotalIssues, cfg.TotalIssues)
|
|
|
|
// Add cross-links between tasks across epics using configured ratio
|
|
numCrossLinks := int(float64(numTasks) * cfg.CrossLinkRatio)
|
|
for i := 0; i < numCrossLinks; i++ {
|
|
fromTask := taskIssues[rng.Intn(len(taskIssues))]
|
|
toTask := taskIssues[rng.Intn(len(taskIssues))]
|
|
|
|
// Avoid self-dependencies
|
|
if fromTask.ID == toTask.ID {
|
|
continue
|
|
}
|
|
|
|
dep := &types.Dependency{
|
|
IssueID: fromTask.ID,
|
|
DependsOnID: toTask.ID,
|
|
Type: types.DepBlocks,
|
|
CreatedAt: time.Now(),
|
|
CreatedBy: "fixture",
|
|
}
|
|
|
|
// Ignore cycle errors for cross-links (they're expected)
|
|
_ = store.AddDependency(ctx, dep, "fixture")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// generateFromJSONL creates issues, exports to JSONL, clears DB, and reimports
|
|
func generateFromJSONL(ctx context.Context, store storage.Storage, tempDir string, cfg DataConfig) error {
|
|
// First generate issues normally
|
|
if err := generateIssuesWithConfig(ctx, store, cfg); err != nil {
|
|
return fmt.Errorf("failed to generate issues: %w", err)
|
|
}
|
|
|
|
// Export to JSONL
|
|
jsonlPath := filepath.Join(tempDir, "issues.jsonl")
|
|
if err := exportToJSONL(ctx, store, jsonlPath); err != nil {
|
|
return fmt.Errorf("failed to export to JSONL: %w", err)
|
|
}
|
|
|
|
// Clear all issues (we'll reimport them)
|
|
allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get all issues: %w", err)
|
|
}
|
|
|
|
for _, issue := range allIssues {
|
|
if err := store.DeleteIssue(ctx, issue.ID); err != nil {
|
|
return fmt.Errorf("failed to delete issue %s: %w", issue.ID, err)
|
|
}
|
|
}
|
|
|
|
// Import from JSONL
|
|
if err := importFromJSONL(ctx, store, jsonlPath); err != nil {
|
|
return fmt.Errorf("failed to import from JSONL: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// exportToJSONL exports all issues to a JSONL file
|
|
func exportToJSONL(ctx context.Context, store storage.Storage, path string) error {
|
|
// Get all issues
|
|
allIssues, err := store.SearchIssues(ctx, "", types.IssueFilter{})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to query issues: %w", err)
|
|
}
|
|
|
|
// Populate dependencies and labels for each issue
|
|
allDeps, err := store.GetAllDependencyRecords(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get dependencies: %w", err)
|
|
}
|
|
|
|
for _, issue := range allIssues {
|
|
issue.Dependencies = allDeps[issue.ID]
|
|
|
|
labels, err := store.GetLabels(ctx, issue.ID)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get labels for %s: %w", issue.ID, err)
|
|
}
|
|
issue.Labels = labels
|
|
}
|
|
|
|
// Write to JSONL file
|
|
// #nosec G304 -- fixture exports to deterministic file controlled by tests
|
|
f, err := os.Create(path)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create JSONL file: %w", err)
|
|
}
|
|
defer f.Close()
|
|
|
|
encoder := json.NewEncoder(f)
|
|
for _, issue := range allIssues {
|
|
if err := encoder.Encode(issue); err != nil {
|
|
return fmt.Errorf("failed to encode issue: %w", err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// importFromJSONL imports issues from a JSONL file
|
|
func importFromJSONL(ctx context.Context, store storage.Storage, path string) error {
|
|
// Read JSONL file
|
|
// #nosec G304 -- fixture imports from deterministic file created earlier in test
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to read JSONL file: %w", err)
|
|
}
|
|
|
|
// Parse issues
|
|
var issues []*types.Issue
|
|
lines := string(data)
|
|
for i, line := range splitLines(lines) {
|
|
if len(line) == 0 {
|
|
continue
|
|
}
|
|
|
|
var issue types.Issue
|
|
if err := json.Unmarshal([]byte(line), &issue); err != nil {
|
|
return fmt.Errorf("failed to parse issue at line %d: %w", i+1, err)
|
|
}
|
|
|
|
issues = append(issues, &issue)
|
|
}
|
|
|
|
// Import issues directly using storage interface
|
|
// Step 1: Create all issues first (without dependencies/labels)
|
|
type savedMetadata struct {
|
|
deps []*types.Dependency
|
|
labels []string
|
|
}
|
|
metadata := make(map[string]savedMetadata)
|
|
|
|
for _, issue := range issues {
|
|
// Save dependencies and labels for later
|
|
metadata[issue.ID] = savedMetadata{
|
|
deps: issue.Dependencies,
|
|
labels: issue.Labels,
|
|
}
|
|
issue.Dependencies = nil
|
|
issue.Labels = nil
|
|
|
|
if err := store.CreateIssue(ctx, issue, "fixture"); err != nil {
|
|
// Ignore duplicate errors
|
|
if !strings.Contains(err.Error(), "UNIQUE constraint failed") {
|
|
return fmt.Errorf("failed to create issue %s: %w", issue.ID, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Step 2: Add all dependencies (now that all issues exist)
|
|
for issueID, meta := range metadata {
|
|
for _, dep := range meta.deps {
|
|
if err := store.AddDependency(ctx, dep, "fixture"); err != nil {
|
|
// Ignore duplicate and cycle errors
|
|
if !strings.Contains(err.Error(), "already exists") &&
|
|
!strings.Contains(err.Error(), "cycle") {
|
|
return fmt.Errorf("failed to add dependency for %s: %w", issueID, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Add labels
|
|
for _, label := range meta.labels {
|
|
_ = store.AddLabel(ctx, issueID, label, "fixture")
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// splitLines splits a string by newlines
|
|
func splitLines(s string) []string {
|
|
var lines []string
|
|
start := 0
|
|
for i := 0; i < len(s); i++ {
|
|
if s[i] == '\n' {
|
|
lines = append(lines, s[start:i])
|
|
start = i + 1
|
|
}
|
|
}
|
|
if start < len(s) {
|
|
lines = append(lines, s[start:])
|
|
}
|
|
return lines
|
|
}
|
|
|
|
// randomStatus returns a random status with given open ratio
|
|
func randomStatus(rng *rand.Rand, openRatio float64) types.Status {
|
|
r := rng.Float64()
|
|
if r < openRatio {
|
|
// Open statuses: open, in_progress, blocked
|
|
statuses := []types.Status{types.StatusOpen, types.StatusInProgress, types.StatusBlocked}
|
|
return statuses[rng.Intn(len(statuses))]
|
|
}
|
|
return types.StatusClosed
|
|
}
|
|
|
|
// randomPriority returns a random priority with realistic distribution
|
|
// P0: 5%, P1: 15%, P2: 50%, P3: 25%, P4: 5%
|
|
func randomPriority(rng *rand.Rand) int {
|
|
r := rng.Intn(100)
|
|
switch {
|
|
case r < 5:
|
|
return 0
|
|
case r < 20:
|
|
return 1
|
|
case r < 70:
|
|
return 2
|
|
case r < 95:
|
|
return 3
|
|
default:
|
|
return 4
|
|
}
|
|
}
|
|
|
|
// randomTime returns a random time up to maxDaysAgo days in the past
|
|
func randomTime(rng *rand.Rand, maxDaysAgo int) time.Time {
|
|
daysAgo := rng.Intn(maxDaysAgo)
|
|
return time.Now().Add(-time.Duration(daysAgo) * 24 * time.Hour)
|
|
}
|