-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathProb.cpp
More file actions
198 lines (177 loc) · 4.6 KB
/
Prob.cpp
File metadata and controls
198 lines (177 loc) · 4.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
/*
* Prob.cc --
* Functions for handling Probs
*
*/
#include "stdafx.h"
#ifndef lint
static char Copyright[] = "Copyright (c) 1995-2011 SRI International, 2012 Microsoft Corp. All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/anstolck/srilm/lm/src/RCS/Prob.cc,v 1.17 2012/01/26 17:21:23 anstolck Exp $";
#endif
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <math.h>
#include "Prob.h"
#include "Array.cpp"
const LogP LogP_Zero = -HUGE_VAL; /* log(0) */
const LogP LogP_Inf = HUGE_VAL; /* log(Inf) */
const LogP LogP_One = 0.0; /* log(1) */
const int LogP_Precision = 7; /* number of significant decimals in a LogP */
const Prob Prob_Epsilon = 3e-06;/* probability sums less than this in
* magnitude are effectively considered 0
* (assuming they were obtained by summing
* LogP's) */
/*
* parseLogP --
* Fast parsing of floats representing log probabilities
*
* Results:
* true if string can be parsed as a float, false otherwise.
*
* Side effects:
* Result is set to float value if successful.
*
*/
Boolean
parseLogP(const char *str, LogP &result)
{
const unsigned maxDigits = 8; // number of decimals in an integer
const char *cp = str;
const char *cp0;
Boolean minus = false;
if (*cp == '\0') {
/* empty input */
return false;
}
/*
* Log probabilties are typically negative values of magnitude > 0.0001,
* and thus are usually formatted without exponential notation.
* We parse this type of format using integer arithmetic for speed,
* and fall back onto scanf() in all other cases.
* We also use scanf() when there are too many digits to handle with
* integers.
* Finally, we also parse +/- infinity values as they are printed by
* printf(). These are "[Ii]nf" or "[Ii]nfinity" or "1.#INF".
*/
/*
* Parse optional sign
*/
if (*cp == '-') {
minus = true;
cp++;
} else if (*cp == '+') {
cp++;
}
cp0 = cp;
unsigned digits = 0; // total value of parsed digits
unsigned decimals = 1; // scaling factor from decimal point
unsigned precision = 0; // total number of parsed digits
/*
* Parse digits before decimal point
*/
while (isdigit(*cp)) {
digits = digits * 10 + (*(cp++) - '0');
precision ++;
}
if (*cp == '.') {
cp++;
/*
* Parse digits after decimal point
*/
while (isdigit(*cp)) {
digits = digits * 10 + (*(cp++) - '0');
precision ++;
decimals *= 10;
}
}
/*
* If we're at the end of the string then we're done.
* Otherwise there was either an error or some format we can't
* handle, so fall back on scanf(), after checking for infinity
* values.
*/
if (*cp == '\0' && precision <= maxDigits) {
result = (minus ? - (LogP)digits : (LogP)digits) / (LogP)decimals;
return true;
} else if ((*cp0 == 'i' || *cp0 == 'I' ||
(cp0[0] == '1' && cp0[1] == '.' && cp0[2] == '#')) &&
(strncmp(cp0, "Inf", 3) == 0 || strncmp(cp0, "inf", 3) == 0 ||
strncmp(cp0, "1.#INF", 6) == 0))
{
result = (minus ? LogP_Zero : LogP_Inf);
return true;
} else {
return (sscanf(str, "%f", &result) == 1);
}
}
/*
* Codebooks for quantized log probs
*/
//Boolean
//PQCodebook::read(File &file)
//{
// char *line;
// char buffer[10];
//
// line = file.getline();
//
// if (!line || sscanf(line, "VQSize %u", &numBins) != 1) {
// file.position() << "missing VQSize spec\n";
// return false;
// }
//
// for (unsigned i = 0; i < numBins; i ++) {
// binMeans[i] = LogP_Inf;
// binCounts[i] = 0;
// }
//
// line = file.getline();
// if (!line || sscanf(line, "Codeword Mean %s", buffer) != 1 ||
// strcmp(buffer, "Count") != 0)
// {
// file.position() << "malformed Codeword header\n";
// return false;
// }
//
// while ((line = file.getline())) {
// unsigned bin;
// double prob;
// unsigned long count;
// if (sscanf(line, "%u %lf %lu", &bin, &prob, &count) != 3) {
// file.position() << "malformed codeword line\n";
// return false;
// }
//
// binMeans[bin] = prob;
// binCounts[bin] = count;
// }
//
// return true;
//}
//
//Boolean
//PQCodebook::write(File &file)
//{
// fprintf(file, "VQSize %u\n", numBins);
// fprintf(file, "Codeword Mean Count\n");
//
// for (unsigned i = 0; i < numBins; i ++) {
// fprintf(file, "%8d %20.16lg %12lu\n",
// i, (double)binMeans[i],
// (unsigned long)binCounts[i]);
// }
//
// return true;
//}
//
//LogP2
//PQCodebook::getProb(unsigned bin)
//{
// if (bin < numBins) {
// return binMeans[bin];
// } else {
// return LogP_Inf;
// }
//}