-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathSubVocab.cpp
More file actions
104 lines (90 loc) · 2.13 KB
/
SubVocab.cpp
File metadata and controls
104 lines (90 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
/*
* SubVocab.cc --
* Vocabulary subset class
*
*/
#include "stdafx.h"
#ifndef lint
static char Copyright[] = "Copyright (c) 1996-2012 SRI International. All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/CVS/srilm/lm/src/SubVocab.cc,v 1.9 2012/10/18 20:55:22 mcintyre Exp $";
#endif
#ifdef PRE_ISO_CXX
# include <iostream.h>
#else
# include <iostream>
using namespace std;
#endif
#include <string.h>
#include <ctype.h>
#include <assert.h>
#include "SubVocab.h"
#include "LHash.h"
#include "Array.h"
SubVocab::SubVocab(Vocab &baseVocab)
: _baseVocab(baseVocab)
{
/*
* These defaults are inherited from the base vocab.
*/
//setOutputVocab(&baseVocab);
/*
* sub-vocabularies don't have any special tokens by default
*/
remove(_unkIndex);
remove(_ssIndex);
remove(_seIndex);
remove(_pauseIndex);
}
// Add word to vocabulary
VocabIndex
SubVocab::addWord(VocabString name)
{
/*
* Try to find word in base vocabulary
* If it doesn't exist there, add it first to the base vocabulary.
* Then use the same index here.
*/
VocabIndex wid = _baseVocab.addWord(name);
if (wid == Vocab_None) {
return Vocab_None;
} else {
return addWord(wid);
}
}
// Add index to sub-vocabulary
VocabIndex
SubVocab::addWord(VocabIndex wid)
{
/*
* Index has to already exist in the base vocabulary.
* If not, we fail.
*/
VocabString baseName = _baseVocab.getWord(wid);
if (baseName == 0) {
return Vocab_None;
} else {
Boolean found;
// use baseName here in case base Vocab changed capitalization
VocabIndex *indexPtr = byName.insert(baseName, found);
if (found) {
assert(*indexPtr == wid);
} else {
*indexPtr = wid;
byIndex[wid] = byName.getInternalKey(baseName);
/*
* Initialize word strings between last highest and new index
* (so that lookups return 0)
*/
for (unsigned i = nextIndex; i < wid; i ++) {
byIndex[i] = 0;
}
/*
* nextIndex is 1 plus the highest word index used.
*/
if (wid + 1 > nextIndex) {
nextIndex = wid + 1;
}
}
return wid;
}
}