Complete bd-95: Add content-addressable identity (ContentHash field)

This commit is contained in:
Steve Yegge
2025-10-28 18:57:16 -07:00
parent ad267b5de6
commit d9eb273e15
10 changed files with 287 additions and 45 deletions

View File

@@ -375,6 +375,14 @@ func RemapCollisions(ctx context.Context, s *SQLiteStorage, collisions []*Collis
// Process each collision based on which version should be remapped
for _, collision := range collisions {
// Skip collisions with nil issues (shouldn't happen but be defensive)
if collision.IncomingIssue == nil {
return nil, fmt.Errorf("collision %s has nil IncomingIssue", collision.ID)
}
if collision.ExistingIssue == nil {
return nil, fmt.Errorf("collision %s has nil ExistingIssue", collision.ID)
}
oldID := collision.ID
// Allocate new ID using atomic counter

View File

@@ -640,34 +640,18 @@ func TestScoreCollisions(t *testing.T) {
t.Fatalf("ScoreCollisions failed: %v", err)
}
// Verify scores were calculated
// bd-4: 0 references (no mentions, no deps)
// bd-1: 1 reference (bd-1 → bd-2 dependency)
// bd-3: 1 reference (bd-3 → bd-2 dependency)
// bd-2: high references (mentioned in bd-1, bd-3 multiple times + 2 deps as target)
// bd-1 desc (1) + bd-3 desc (3: "bd-2 multiple", "bd-2 and", "bd-2") + bd-3 notes (1) + 2 deps = 7
if collisions[0].ID != "bd-4" {
t.Errorf("expected first collision to be bd-4 (lowest score), got %s", collisions[0].ID)
}
if collisions[0].ReferenceScore != 0 {
t.Errorf("expected bd-4 to have score 0, got %d", collisions[0].ReferenceScore)
}
// bd-2 should be last (highest score)
lastIdx := len(collisions) - 1
if collisions[lastIdx].ID != "bd-2" {
t.Errorf("expected last collision to be bd-2 (highest score), got %s", collisions[lastIdx].ID)
}
if collisions[lastIdx].ReferenceScore != 7 {
t.Errorf("expected bd-2 to have score 7, got %d", collisions[lastIdx].ReferenceScore)
}
// Verify sorting (ascending order)
for i := 1; i < len(collisions); i++ {
if collisions[i].ReferenceScore < collisions[i-1].ReferenceScore {
t.Errorf("collisions not sorted: collision[%d] score %d < collision[%d] score %d",
i, collisions[i].ReferenceScore, i-1, collisions[i-1].ReferenceScore)
// Verify RemapIncoming was set based on content hashes (bd-95)
// ScoreCollisions now uses content-based hashing instead of reference counting
// Each collision should have RemapIncoming set based on hash comparison
for _, collision := range collisions {
existingHash := hashIssueContent(collision.ExistingIssue)
incomingHash := hashIssueContent(collision.IncomingIssue)
expectedRemapIncoming := existingHash < incomingHash
if collision.RemapIncoming != expectedRemapIncoming {
t.Errorf("collision %s: RemapIncoming=%v but expected %v (existingHash=%s, incomingHash=%s)",
collision.ID, collision.RemapIncoming, expectedRemapIncoming,
existingHash[:8], incomingHash[:8])
}
}
}
@@ -832,9 +816,35 @@ func TestRemapCollisions(t *testing.T) {
t.Fatalf("failed to create existing issue: %v", err)
}
// Create existing issues in DB that will collide with incoming issues
dbIssue2 := &types.Issue{
ID: "bd-2",
Title: "Existing issue bd-2",
Description: "Original content for bd-2",
Status: types.StatusOpen,
Priority: 2,
IssueType: types.TypeTask,
}
if err := store.CreateIssue(ctx, dbIssue2, "test"); err != nil {
t.Fatalf("failed to create dbIssue2: %v", err)
}
dbIssue3 := &types.Issue{
ID: "bd-3",
Title: "Existing issue bd-3",
Description: "Original content for bd-3",
Status: types.StatusOpen,
Priority: 2,
IssueType: types.TypeTask,
}
if err := store.CreateIssue(ctx, dbIssue3, "test"); err != nil {
t.Fatalf("failed to create dbIssue3: %v", err)
}
// Create collisions (incoming issues with same IDs as DB but different content)
collision1 := &CollisionDetail{
ID: "bd-2",
ExistingIssue: dbIssue2,
IncomingIssue: &types.Issue{
ID: "bd-2",
Title: "Collision 2 (has fewer references)",
@@ -843,11 +853,13 @@ func TestRemapCollisions(t *testing.T) {
Priority: 1,
IssueType: types.TypeTask,
},
RemapIncoming: true, // Incoming will be remapped
ReferenceScore: 2, // Fewer references
}
collision2 := &CollisionDetail{
ID: "bd-3",
ExistingIssue: dbIssue3,
IncomingIssue: &types.Issue{
ID: "bd-3",
Title: "Collision 3 (has more references)",
@@ -856,11 +868,12 @@ func TestRemapCollisions(t *testing.T) {
Priority: 1,
IssueType: types.TypeTask,
},
RemapIncoming: true, // Incoming will be remapped
ReferenceScore: 5, // More references
}
collisions := []*CollisionDetail{collision1, collision2}
allIssues := []*types.Issue{existingIssue, collision1.IncomingIssue, collision2.IncomingIssue}
allIssues := []*types.Issue{existingIssue, dbIssue2, dbIssue3, collision1.IncomingIssue, collision2.IncomingIssue}
// Remap collisions
idMapping, err := RemapCollisions(ctx, store, collisions, allIssues)

View File

@@ -358,7 +358,7 @@ func (s *SQLiteStorage) removeDependencyIfExists(ctx context.Context, issueID, d
// GetDependencies returns issues that this issue depends on
func (s *SQLiteStorage) GetDependencies(ctx context.Context, issueID string) ([]*types.Issue, error) {
rows, err := s.db.QueryContext(ctx, `
SELECT i.id, i.title, i.description, i.design, i.acceptance_criteria, i.notes,
SELECT i.id, i.content_hash, i.title, i.description, i.design, i.acceptance_criteria, i.notes,
i.status, i.priority, i.issue_type, i.assignee, i.estimated_minutes,
i.created_at, i.updated_at, i.closed_at, i.external_ref
FROM issues i
@@ -377,7 +377,7 @@ func (s *SQLiteStorage) GetDependencies(ctx context.Context, issueID string) ([]
// GetDependents returns issues that depend on this issue
func (s *SQLiteStorage) GetDependents(ctx context.Context, issueID string) ([]*types.Issue, error) {
rows, err := s.db.QueryContext(ctx, `
SELECT i.id, i.title, i.description, i.design, i.acceptance_criteria, i.notes,
SELECT i.id, i.content_hash, i.title, i.description, i.design, i.acceptance_criteria, i.notes,
i.status, i.priority, i.issue_type, i.assignee, i.estimated_minutes,
i.created_at, i.updated_at, i.closed_at, i.external_ref
FROM issues i
@@ -710,13 +710,14 @@ func (s *SQLiteStorage) scanIssues(ctx context.Context, rows *sql.Rows) ([]*type
var issues []*types.Issue
for rows.Next() {
var issue types.Issue
var contentHash sql.NullString
var closedAt sql.NullTime
var estimatedMinutes sql.NullInt64
var assignee sql.NullString
var externalRef sql.NullString
err := rows.Scan(
&issue.ID, &issue.Title, &issue.Description, &issue.Design,
&issue.ID, &contentHash, &issue.Title, &issue.Description, &issue.Design,
&issue.AcceptanceCriteria, &issue.Notes, &issue.Status,
&issue.Priority, &issue.IssueType, &assignee, &estimatedMinutes,
&issue.CreatedAt, &issue.UpdatedAt, &closedAt, &externalRef,
@@ -725,6 +726,9 @@ func (s *SQLiteStorage) scanIssues(ctx context.Context, rows *sql.Rows) ([]*type
return nil, fmt.Errorf("failed to scan issue: %w", err)
}
if contentHash.Valid {
issue.ContentHash = contentHash.String
}
if closedAt.Valid {
issue.ClosedAt = &closedAt.Time
}

View File

@@ -99,7 +99,7 @@ func (s *SQLiteStorage) GetLabels(ctx context.Context, issueID string) ([]string
// GetIssuesByLabel returns issues with a specific label
func (s *SQLiteStorage) GetIssuesByLabel(ctx context.Context, label string) ([]*types.Issue, error) {
rows, err := s.db.QueryContext(ctx, `
SELECT i.id, i.title, i.description, i.design, i.acceptance_criteria, i.notes,
SELECT i.id, i.content_hash, i.title, i.description, i.design, i.acceptance_criteria, i.notes,
i.status, i.priority, i.issue_type, i.assignee, i.estimated_minutes,
i.created_at, i.updated_at, i.closed_at, i.external_ref
FROM issues i

View File

@@ -85,7 +85,7 @@ func (s *SQLiteStorage) GetReadyWork(ctx context.Context, filter types.WorkFilte
)
-- Step 3: Select ready issues (excluding all blocked)
SELECT i.id, i.title, i.description, i.design, i.acceptance_criteria, i.notes,
SELECT i.id, i.content_hash, i.title, i.description, i.design, i.acceptance_criteria, i.notes,
i.status, i.priority, i.issue_type, i.assignee, i.estimated_minutes,
i.created_at, i.updated_at, i.closed_at, i.external_ref
FROM issues i

View File

@@ -863,7 +863,7 @@ func TestExplainQueryPlanReadyWork(t *testing.T) {
WHERE d.type = 'parent-child'
AND bt.depth < 50
)
SELECT i.id, i.title, i.description, i.design, i.acceptance_criteria, i.notes,
SELECT i.id, i.content_hash, i.title, i.description, i.design, i.acceptance_criteria, i.notes,
i.status, i.priority, i.issue_type, i.assignee, i.estimated_minutes,
i.created_at, i.updated_at, i.closed_at, i.external_ref
FROM issues i

View File

@@ -4,6 +4,7 @@ const schema = `
-- Issues table
CREATE TABLE IF NOT EXISTS issues (
id TEXT PRIMARY KEY,
content_hash TEXT,
title TEXT NOT NULL CHECK(length(title) <= 500),
description TEXT NOT NULL DEFAULT '',
design TEXT NOT NULL DEFAULT '',

View File

@@ -121,6 +121,11 @@ func New(path string) (*SQLiteStorage, error) {
return nil, fmt.Errorf("failed to migrate export_hashes table: %w", err)
}
// Migrate existing databases to add content_hash column (bd-95)
if err := migrateContentHashColumn(db); err != nil {
return nil, fmt.Errorf("failed to migrate content_hash column: %w", err)
}
// Convert to absolute path for consistency
absPath, err := filepath.Abs(path)
if err != nil {
@@ -518,6 +523,102 @@ func migrateExportHashesTable(db *sql.DB) error {
return nil
}
// migrateContentHashColumn adds the content_hash column to the issues table if missing (bd-95).
// This enables global N-way collision resolution by providing content-addressable identity.
func migrateContentHashColumn(db *sql.DB) error {
// Check if content_hash column exists
var colName string
err := db.QueryRow(`
SELECT name FROM pragma_table_info('issues')
WHERE name = 'content_hash'
`).Scan(&colName)
if err == sql.ErrNoRows {
// Column doesn't exist, add it
_, err := db.Exec(`ALTER TABLE issues ADD COLUMN content_hash TEXT`)
if err != nil {
return fmt.Errorf("failed to add content_hash column: %w", err)
}
// Create index on content_hash for fast lookups
_, err = db.Exec(`CREATE INDEX IF NOT EXISTS idx_issues_content_hash ON issues(content_hash)`)
if err != nil {
return fmt.Errorf("failed to create content_hash index: %w", err)
}
// Populate content_hash for all existing issues
rows, err := db.Query(`
SELECT id, title, description, design, acceptance_criteria, notes,
status, priority, issue_type, assignee, external_ref
FROM issues
`)
if err != nil {
return fmt.Errorf("failed to query existing issues: %w", err)
}
defer rows.Close()
// Collect issues and compute hashes
updates := make(map[string]string) // id -> content_hash
for rows.Next() {
var issue types.Issue
var assignee sql.NullString
var externalRef sql.NullString
err := rows.Scan(
&issue.ID, &issue.Title, &issue.Description, &issue.Design,
&issue.AcceptanceCriteria, &issue.Notes, &issue.Status,
&issue.Priority, &issue.IssueType, &assignee, &externalRef,
)
if err != nil {
return fmt.Errorf("failed to scan issue: %w", err)
}
if assignee.Valid {
issue.Assignee = assignee.String
}
if externalRef.Valid {
issue.ExternalRef = &externalRef.String
}
// Compute and store hash
updates[issue.ID] = issue.ComputeContentHash()
}
if err := rows.Err(); err != nil {
return fmt.Errorf("error iterating issues: %w", err)
}
// Apply hash updates in batch
tx, err := db.Begin()
if err != nil {
return fmt.Errorf("failed to begin transaction: %w", err)
}
defer tx.Rollback()
stmt, err := tx.Prepare(`UPDATE issues SET content_hash = ? WHERE id = ?`)
if err != nil {
return fmt.Errorf("failed to prepare update statement: %w", err)
}
defer stmt.Close()
for id, hash := range updates {
if _, err := stmt.Exec(hash, id); err != nil {
return fmt.Errorf("failed to update content_hash for issue %s: %w", id, err)
}
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("failed to commit transaction: %w", err)
}
return nil
}
if err != nil {
return fmt.Errorf("failed to check content_hash column: %w", err)
}
// Column already exists
return nil
}
// getNextIDForPrefix atomically generates the next ID for a given prefix
// Uses the issue_counters table for atomic, cross-process ID generation
func (s *SQLiteStorage) getNextIDForPrefix(ctx context.Context, prefix string) (int, error) {
@@ -588,6 +689,11 @@ func (s *SQLiteStorage) CreateIssue(ctx context.Context, issue *types.Issue, act
issue.CreatedAt = now
issue.UpdatedAt = now
// Compute content hash (bd-95)
if issue.ContentHash == "" {
issue.ContentHash = issue.ComputeContentHash()
}
// Acquire a dedicated connection for the transaction.
// This is necessary because we need to execute raw SQL ("BEGIN IMMEDIATE", "COMMIT")
// on the same connection, and database/sql's connection pool would otherwise
@@ -677,12 +783,12 @@ func (s *SQLiteStorage) CreateIssue(ctx context.Context, issue *types.Issue, act
// Insert issue
_, err = conn.ExecContext(ctx, `
INSERT INTO issues (
id, title, description, design, acceptance_criteria, notes,
id, content_hash, title, description, design, acceptance_criteria, notes,
status, priority, issue_type, assignee, estimated_minutes,
created_at, updated_at, closed_at, external_ref
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
issue.ID, issue.Title, issue.Description, issue.Design,
issue.ID, issue.ContentHash, issue.Title, issue.Description, issue.Design,
issue.AcceptanceCriteria, issue.Notes, issue.Status,
issue.Priority, issue.IssueType, issue.Assignee,
issue.EstimatedMinutes, issue.CreatedAt, issue.UpdatedAt,
@@ -800,13 +906,17 @@ func generateBatchIDs(ctx context.Context, conn *sql.Conn, issues []*types.Issue
return fmt.Errorf("failed to generate ID range: %w", err)
}
// Assign IDs sequentially from the reserved range
// Assign IDs sequentially from the reserved range and compute content hashes
currentID := nextID - needIDCount + 1
for i := range issues {
if issues[i].ID == "" {
issues[i].ID = fmt.Sprintf("%s-%d", prefix, currentID)
currentID++
}
// Compute content hash if not already set (bd-95)
if issues[i].ContentHash == "" {
issues[i].ContentHash = issues[i].ComputeContentHash()
}
}
return nil
}
@@ -815,10 +925,10 @@ func generateBatchIDs(ctx context.Context, conn *sql.Conn, issues []*types.Issue
func bulkInsertIssues(ctx context.Context, conn *sql.Conn, issues []*types.Issue) error {
stmt, err := conn.PrepareContext(ctx, `
INSERT INTO issues (
id, title, description, design, acceptance_criteria, notes,
id, content_hash, title, description, design, acceptance_criteria, notes,
status, priority, issue_type, assignee, estimated_minutes,
created_at, updated_at, closed_at, external_ref
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`)
if err != nil {
return fmt.Errorf("failed to prepare statement: %w", err)
@@ -827,7 +937,7 @@ func bulkInsertIssues(ctx context.Context, conn *sql.Conn, issues []*types.Issue
for _, issue := range issues {
_, err = stmt.ExecContext(ctx,
issue.ID, issue.Title, issue.Description, issue.Design,
issue.ID, issue.ContentHash, issue.Title, issue.Description, issue.Design,
issue.AcceptanceCriteria, issue.Notes, issue.Status,
issue.Priority, issue.IssueType, issue.Assignee,
issue.EstimatedMinutes, issue.CreatedAt, issue.UpdatedAt,
@@ -1004,16 +1114,17 @@ func (s *SQLiteStorage) GetIssue(ctx context.Context, id string) (*types.Issue,
var compactedAt sql.NullTime
var originalSize sql.NullInt64
var contentHash sql.NullString
var compactedAtCommit sql.NullString
err := s.db.QueryRowContext(ctx, `
SELECT id, title, description, design, acceptance_criteria, notes,
SELECT id, content_hash, title, description, design, acceptance_criteria, notes,
status, priority, issue_type, assignee, estimated_minutes,
created_at, updated_at, closed_at, external_ref,
compaction_level, compacted_at, compacted_at_commit, original_size
FROM issues
WHERE id = ?
`, id).Scan(
&issue.ID, &issue.Title, &issue.Description, &issue.Design,
&issue.ID, &contentHash, &issue.Title, &issue.Description, &issue.Design,
&issue.AcceptanceCriteria, &issue.Notes, &issue.Status,
&issue.Priority, &issue.IssueType, &assignee, &estimatedMinutes,
&issue.CreatedAt, &issue.UpdatedAt, &closedAt, &externalRef,
@@ -1027,6 +1138,9 @@ func (s *SQLiteStorage) GetIssue(ctx context.Context, id string) (*types.Issue,
return nil, fmt.Errorf("failed to get issue: %w", err)
}
if contentHash.Valid {
issue.ContentHash = contentHash.String
}
if closedAt.Valid {
issue.ClosedAt = &closedAt.Time
}
@@ -1232,6 +1346,66 @@ func (s *SQLiteStorage) UpdateIssue(ctx context.Context, id string, updates map[
// Auto-manage closed_at when status changes (enforce invariant)
setClauses, args = manageClosedAt(oldIssue, updates, setClauses, args)
// Recompute content_hash if any content fields changed (bd-95)
contentChanged := false
contentFields := []string{"title", "description", "design", "acceptance_criteria", "notes", "status", "priority", "issue_type", "assignee", "external_ref"}
for _, field := range contentFields {
if _, exists := updates[field]; exists {
contentChanged = true
break
}
}
if contentChanged {
// Get updated issue to compute hash
updatedIssue := *oldIssue
for key, value := range updates {
switch key {
case "title":
updatedIssue.Title = value.(string)
case "description":
updatedIssue.Description = value.(string)
case "design":
updatedIssue.Design = value.(string)
case "acceptance_criteria":
updatedIssue.AcceptanceCriteria = value.(string)
case "notes":
updatedIssue.Notes = value.(string)
case "status":
// Handle both string and types.Status
if s, ok := value.(types.Status); ok {
updatedIssue.Status = s
} else {
updatedIssue.Status = types.Status(value.(string))
}
case "priority":
updatedIssue.Priority = value.(int)
case "issue_type":
// Handle both string and types.IssueType
if t, ok := value.(types.IssueType); ok {
updatedIssue.IssueType = t
} else {
updatedIssue.IssueType = types.IssueType(value.(string))
}
case "assignee":
if value == nil {
updatedIssue.Assignee = ""
} else {
updatedIssue.Assignee = value.(string)
}
case "external_ref":
if value == nil {
updatedIssue.ExternalRef = nil
} else {
str := value.(string)
updatedIssue.ExternalRef = &str
}
}
}
newHash := updatedIssue.ComputeContentHash()
setClauses = append(setClauses, "content_hash = ?")
args = append(args, newHash)
}
args = append(args, id)
// Start transaction
@@ -1861,7 +2035,7 @@ func (s *SQLiteStorage) SearchIssues(ctx context.Context, query string, filter t
// #nosec G201 - safe SQL with controlled formatting
querySQL := fmt.Sprintf(`
SELECT id, title, description, design, acceptance_criteria, notes,
SELECT id, content_hash, title, description, design, acceptance_criteria, notes,
status, priority, issue_type, assignee, estimated_minutes,
created_at, updated_at, closed_at, external_ref
FROM issues