-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathclassifier.go
More file actions
30 lines (27 loc) · 899 Bytes
/
classifier.go
File metadata and controls
30 lines (27 loc) · 899 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
package classifier
import (
"context"
"io"
)
// Classifier provides a simple interface for different text classifiers
type Classifier interface {
// Train allows clients to train the classifier
Train(io.Reader, string) error
// TrainString allows clients to train the classifier using a string
TrainString(string, string) error
// Classify performs a classification on the input corpus and assumes that
// the underlying classifier has been trained.
Classify(io.Reader) (string, error)
// ClassifyString performs text classification using a string
ClassifyString(string) (string, error)
}
// WordCounts extracts term frequencies from a text corpus
func WordCounts(r io.Reader) (map[string]int, error) {
ctx := context.Background()
instream := NewTokenizer().Tokenize(ctx, r)
wc := make(map[string]int)
for token := range instream {
wc[token] = wc[token] + 1
}
return wc, nil
}