-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathtokens_test.go
More file actions
84 lines (72 loc) · 1.68 KB
/
tokens_test.go
File metadata and controls
84 lines (72 loc) · 1.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
package classifier
import (
"bytes"
"context"
"io"
"strings"
"testing"
"time"
"unicode"
)
var (
text = "The quick brown fox jumped over the lazy dog"
expected = 7
)
type assertion func(t *testing.T, v string)
func TestTokenize(t *testing.T) {
tests := []struct {
Name string
Opts []StdOption
Assertions []assertion
}{
{"Standard Tokenizer", options(), assertions()},
{"Buffered Tokenizer", options(BufferSize(1)), assertions()},
{"ToUpper Tokenizer", options(Transforms(toUpper)), assertions(isUpper)},
{"Stopword Tokenizer", options(Filters(IsNotStopWord)), assertions(isStopWord)},
}
for _, test := range tests {
t.Run(test.Name, func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
tokens := NewTokenizer(test.Opts...).Tokenize(ctx, toReader(text))
doTokenizeTest(t, tokens)
})
}
}
func isStopWord(t *testing.T, v string) {
if IsStopWord(v) {
t.Errorf("value is a stopword")
}
}
func isUpper(t *testing.T, v string) {
for _, c := range v {
if !unicode.IsUpper(c) {
t.Errorf("value is not in uppercase")
return
}
}
}
func toUpper(s string) string {
return strings.ToUpper(s)
}
func toReader(text string) io.Reader {
return bytes.NewBuffer([]byte(text))
}
func doTokenizeTest(t *testing.T, tokens chan string, assertions ...assertion) {
actual := 0
for v := range tokens {
for _, assert := range assertions {
assert(t, v)
}
actual++
}
if actual != expected {
t.Errorf("Expected %d tokens; actual: %d", expected, actual)
}
}
func options(opts ...StdOption) []StdOption {
return opts
}
func assertions(assertions ...assertion) []assertion {
return assertions
}