@@ -3,8 +3,30 @@ package spellchecker
33import (
44 "bufio"
55 "math"
6+
7+ "github.com/agext/levenshtein"
68)
79
10+ // WithOpt set spellchecker options
11+ func (s * Spellchecker ) WithOpts (opts ... OptionFunc ) error {
12+ s .mtx .Lock ()
13+ defer s .mtx .Unlock ()
14+
15+ for _ , o := range opts {
16+ if err := o (s ); err != nil {
17+ return err
18+ }
19+ }
20+
21+ if s .scoreFunc != nil {
22+ s .dict .filterFunc = wrapScoreFunc (s .scoreFunc , s .maxErrors )
23+ } else {
24+ s .dict .filterFunc = s .filterFunc
25+ }
26+
27+ return nil
28+ }
29+
830// WithSplitter set splitter func for AddFrom() reader
931func WithSplitter (f bufio.SplitFunc ) OptionFunc {
1032 return func (s * Spellchecker ) error {
@@ -15,28 +37,82 @@ func WithSplitter(f bufio.SplitFunc) OptionFunc {
1537
1638// WithMaxErrors sets maxErrors — the maximum allowed difference in bits
1739// between the "search word" and a "dictionary word".
18- // For example, replacing a single character (problam => problem)
19- // is treated as a two-bit difference.
20- // It is not recommended to set a value greater than 2,
21- // as it can significantly impact performance.
40+ // - deletion is a 1-bit change (proble → problem)
41+ // - insertion is a 1-bit change (problemm → problem)
42+ // - substitution is a 2-bit change (problam → problem)
43+ // - transposition is a 0-bit change (problme → problem)
44+ //
45+ // It is not recommended to set this value greater than 2,
46+ // as it can significantly affect performance.
2247func WithMaxErrors (maxErrors int ) OptionFunc {
2348 return func (s * Spellchecker ) error {
2449 s .maxErrors = maxErrors
50+
2551 return nil
2652 }
2753}
2854
29- type ScoreFunc = scoreFunc
55+ // FilterFunc compares the source word with a candidate word.
56+ // It returns the candidate's score and a boolean flag.
57+ // If the flag is false, the candidate will be completely filtered out.
58+ type FilterFunc func (src , candidate []rune , count uint ) (float64 , bool )
59+
60+ // WithFilterFunc set custom scoring function
61+ func WithFilterFunc (f FilterFunc ) OptionFunc {
62+ return func (s * Spellchecker ) error {
63+ s .filterFunc = f
64+ return nil
65+ }
66+ }
67+
68+ // ScoreFunc custom scoring function type
69+ //
70+ // Deprecated: use FilterFunc instead
71+ type ScoreFunc func (src []rune , candidate []rune , distance int , cnt uint ) float64
3072
3173// WithScoreFunc specify a function that will be used for scoring
74+ //
75+ // Deprecated: use WithFilterFunc instead
3276func WithScoreFunc (f ScoreFunc ) OptionFunc {
3377 return func (s * Spellchecker ) error {
34- s .dict . scoreFunc = f
78+ s .scoreFunc = f
3579 return nil
3680 }
3781}
3882
39- var defaultScorefunc scoreFunc = func (src , candidate []rune , distance int , cnt uint ) float64 {
83+ func defaultFilterFunc (maxErrors int ) FilterFunc {
84+ return func (src , candidate []rune , count uint ) (float64 , bool ) {
85+ distance , _ , _ := levenshtein .Calculate (src , candidate , 0 , 1 , 1 , 1 )
86+ if distance > maxErrors {
87+ return 0 , false
88+ }
89+
90+ mult := math .Log1p (float64 (count ))
91+ // if first letters are the same, increase score
92+ if src [0 ] == candidate [0 ] {
93+ mult *= 1.5
94+ // if second letters are the same too, increase score even more
95+ if len (src ) > 1 && len (candidate ) > 1 && src [1 ] == candidate [1 ] {
96+ mult *= 1.5
97+ }
98+ }
99+
100+ return 1 / (1 + float64 (distance * distance )) * mult , true
101+ }
102+ }
103+
104+ func wrapScoreFunc (f ScoreFunc , maxErrors int ) FilterFunc {
105+ return func (src , candidate []rune , count uint ) (float64 , bool ) {
106+ distance , _ , _ := levenshtein .Calculate (src , candidate , 0 , 1 , 1 , 1 )
107+ if distance > maxErrors {
108+ return 0 , false
109+ }
110+
111+ return f (src , candidate , distance , count ), true
112+ }
113+ }
114+
115+ var defaultScoreFunc ScoreFunc = func (src , candidate []rune , distance int , cnt uint ) float64 {
40116 mult := math .Log1p (float64 (cnt ))
41117 // if first letters are the same, increase score
42118 if src [0 ] == candidate [0 ] {
0 commit comments