refactor(suggest): extract magic numbers to named constants (#353)
Extract 9 hardcoded scoring weights from similarity() into documented package-level constants: - ScoreExactMatch (1000) - identical string match - ScorePrefixWeight (20) - per-char prefix bonus - ScoreContainsFullWeight (15) - search term in candidate - ScoreSuffixWeight (10) - per-char suffix bonus - ScoreContainsPartialWeight (10) - candidate in search term - ScoreDistanceWeight (5) - Levenshtein close match - ScoreCommonCharsWeight (2) - shared character bonus - LengthDiffThreshold (5) - penalty trigger threshold - LengthDiffPenalty (2) - per-char length difference penalty No behavior change - same scores, now with godoc documentation. Closes: gt-kf7fw Co-authored-by: furiosa <gt@gastown.local>
This commit is contained in:
@@ -7,6 +7,42 @@ import (
|
|||||||
"unicode"
|
"unicode"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Scoring weights for similarity calculation.
|
||||||
|
// Higher values indicate stronger signal for a match.
|
||||||
|
const (
|
||||||
|
// ScoreExactMatch is awarded when two strings are identical.
|
||||||
|
ScoreExactMatch = 1000
|
||||||
|
|
||||||
|
// ScorePrefixWeight is the per-character bonus for matching prefixes.
|
||||||
|
// Prefix matches are weighted highly as users often type the start of commands.
|
||||||
|
ScorePrefixWeight = 20
|
||||||
|
|
||||||
|
// ScoreContainsFullWeight is awarded per character when the search term
|
||||||
|
// is fully contained within the candidate.
|
||||||
|
ScoreContainsFullWeight = 15
|
||||||
|
|
||||||
|
// ScoreSuffixWeight is the per-character bonus for matching suffixes.
|
||||||
|
ScoreSuffixWeight = 10
|
||||||
|
|
||||||
|
// ScoreContainsPartialWeight is awarded per character when the candidate
|
||||||
|
// is contained within the search term.
|
||||||
|
ScoreContainsPartialWeight = 10
|
||||||
|
|
||||||
|
// ScoreDistanceWeight is the per-character bonus for close Levenshtein distance.
|
||||||
|
// Applied when edit distance is at most half the longer string's length.
|
||||||
|
ScoreDistanceWeight = 5
|
||||||
|
|
||||||
|
// ScoreCommonCharsWeight is the per-character bonus for shared characters.
|
||||||
|
ScoreCommonCharsWeight = 2
|
||||||
|
|
||||||
|
// LengthDiffThreshold is the length difference above which a penalty applies.
|
||||||
|
LengthDiffThreshold = 5
|
||||||
|
|
||||||
|
// LengthDiffPenalty is the per-character penalty for length differences
|
||||||
|
// exceeding LengthDiffThreshold.
|
||||||
|
LengthDiffPenalty = 2
|
||||||
|
)
|
||||||
|
|
||||||
// Match represents a potential match with its score.
|
// Match represents a potential match with its score.
|
||||||
type Match struct {
|
type Match struct {
|
||||||
Value string
|
Value string
|
||||||
@@ -55,7 +91,7 @@ func FindSimilar(target string, candidates []string, maxResults int) []string {
|
|||||||
// - Common substring matching
|
// - Common substring matching
|
||||||
func similarity(a, b string) int {
|
func similarity(a, b string) int {
|
||||||
if a == b {
|
if a == b {
|
||||||
return 1000 // Exact match
|
return ScoreExactMatch
|
||||||
}
|
}
|
||||||
|
|
||||||
score := 0
|
score := 0
|
||||||
@@ -63,20 +99,20 @@ func similarity(a, b string) int {
|
|||||||
// Prefix matching - high value
|
// Prefix matching - high value
|
||||||
prefixLen := commonPrefixLength(a, b)
|
prefixLen := commonPrefixLength(a, b)
|
||||||
if prefixLen > 0 {
|
if prefixLen > 0 {
|
||||||
score += prefixLen * 20
|
score += prefixLen * ScorePrefixWeight
|
||||||
}
|
}
|
||||||
|
|
||||||
// Suffix matching
|
// Suffix matching
|
||||||
suffixLen := commonSuffixLength(a, b)
|
suffixLen := commonSuffixLength(a, b)
|
||||||
if suffixLen > 0 {
|
if suffixLen > 0 {
|
||||||
score += suffixLen * 10
|
score += suffixLen * ScoreSuffixWeight
|
||||||
}
|
}
|
||||||
|
|
||||||
// Contains matching
|
// Contains matching
|
||||||
if strings.Contains(b, a) {
|
if strings.Contains(b, a) {
|
||||||
score += len(a) * 15
|
score += len(a) * ScoreContainsFullWeight
|
||||||
} else if strings.Contains(a, b) {
|
} else if strings.Contains(a, b) {
|
||||||
score += len(b) * 10
|
score += len(b) * ScoreContainsPartialWeight
|
||||||
}
|
}
|
||||||
|
|
||||||
// Levenshtein distance for close matches
|
// Levenshtein distance for close matches
|
||||||
@@ -84,19 +120,19 @@ func similarity(a, b string) int {
|
|||||||
maxLen := max(len(a), len(b))
|
maxLen := max(len(a), len(b))
|
||||||
if maxLen > 0 && dist <= maxLen/2 {
|
if maxLen > 0 && dist <= maxLen/2 {
|
||||||
// Closer distance = higher score
|
// Closer distance = higher score
|
||||||
score += (maxLen - dist) * 5
|
score += (maxLen - dist) * ScoreDistanceWeight
|
||||||
}
|
}
|
||||||
|
|
||||||
// Common characters bonus (order-independent)
|
// Common characters bonus (order-independent)
|
||||||
common := commonChars(a, b)
|
common := commonChars(a, b)
|
||||||
if common > 0 {
|
if common > 0 {
|
||||||
score += common * 2
|
score += common * ScoreCommonCharsWeight
|
||||||
}
|
}
|
||||||
|
|
||||||
// Penalize very different lengths
|
// Penalize very different lengths
|
||||||
lenDiff := abs(len(a) - len(b))
|
lenDiff := abs(len(a) - len(b))
|
||||||
if lenDiff > 5 {
|
if lenDiff > LengthDiffThreshold {
|
||||||
score -= lenDiff * 2
|
score -= lenDiff * LengthDiffPenalty
|
||||||
}
|
}
|
||||||
|
|
||||||
return score
|
return score
|
||||||
|
|||||||
Reference in New Issue
Block a user