-
Notifications
You must be signed in to change notification settings - Fork 22
Expand file tree
/
Copy pathAbstractSourcedTokenizedStringDistance.java
More file actions
45 lines (36 loc) · 1.53 KB
/
AbstractSourcedTokenizedStringDistance.java
File metadata and controls
45 lines (36 loc) · 1.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
package com.wcohen.ss;
import java.util.*;
import com.wcohen.ss.tokens.*;
import com.wcohen.ss.api.*;
/**
* Abstract distance metric for tokenized strings.
*/
abstract public class AbstractSourcedTokenizedStringDistance extends AbstractStringDistance
{
protected SourcedTokenizer tokenizer;
// cached, tokenized version of wrappers
private List tokenizedWrappers;
public AbstractSourcedTokenizedStringDistance(Tokenizer tokenizer) { this.tokenizer = (SourcedTokenizer)tokenizer; }
public AbstractSourcedTokenizedStringDistance() { this(SimpleSourcedTokenizer.DEFAULT_SOURCED_TOKENIZER); }
final public void setStringWrapperPool(StringWrapperIterator i) {
train(i);
}
abstract public void train(StringWrapperIterator i);
final public StringWrapperIterator prepare(StringWrapperIterator i0) {
SourcedStringWrapperIterator i = (SourcedStringWrapperIterator)i0;
tokenizedWrappers = new ArrayList();
while (i.hasNext()) {
tokenizedWrappers.add( asBagOfSourcedTokens(i.nextSourcedStringWrapper()) );
}
return new BasicSourcedStringWrapperIterator(tokenizedWrappers.iterator());
}
// convert to a bag of tokens
final protected BagOfSourcedTokens asBagOfSourcedTokens(SourcedStringWrapper w)
{
if (w instanceof BagOfSourcedTokens) return (BagOfSourcedTokens)w;
else {
SourcedToken[] toks = tokenizer.sourcedTokenize(w.unwrap(), w.getSource());
return new BagOfSourcedTokens(w.unwrap(), toks);
}
}
}