Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions configure.ac
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
AC_PREREQ(2.61)

AC_INIT([lexd], [1.3.5], [awesomeevildudes@gmail.com])
AC_INIT([lexd], [1.4.0], [awesomeevildudes@gmail.com])
AM_INIT_AUTOMAKE
AC_CONFIG_MACRO_DIR([m4])

Expand All @@ -15,7 +15,7 @@ AC_ARG_ENABLE(debug,
[ --enable-debug Enable "-g" compiler options],
[CXXFLAGS="-g $CXXFLAGS";CFLAGS="-g $CFLAGS"])

PKG_CHECK_MODULES([LTTOOLBOX], [lttoolbox >= 3.7.1])
PKG_CHECK_MODULES([LTTOOLBOX], [lttoolbox >= 3.8.2])
PKG_CHECK_MODULES([ICU_UC], [icu-uc])
PKG_CHECK_MODULES([ICU_IO], [icu-io])

Expand Down
39 changes: 34 additions & 5 deletions src/lexdcompiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,7 @@ LexdCompiler::processLexiconSegment(char_iter& iter, UnicodeString& line, unsign
die("Expected %d parts, found %d", currentLexiconPartCount, part_count);
for(; !iter.at_end(); ++iter)
{
bool have_weight = false;
if((*iter).startsWith(" ") || *iter == ']')
break;
else if(*iter == "[")
Expand All @@ -563,10 +564,30 @@ LexdCompiler::processLexiconSegment(char_iter& iter, UnicodeString& line, unsign
}
else if((*iter).startsWith(":"))
{
if(inleft)
bool got_weight = false;
++iter;
if (*iter == ":") {
if (have_weight) die("Weight already specified for this segment.");
have_weight = true;
++iter;
auto begin = iter.span().first;
while (iter != iter.end() && (*iter).length() > 0 &&
*iter != " " && *iter != "[" && *iter != "]")
++iter;
auto weight = line.tempSubStringBetween(begin, iter.span().first);
try {
seg.weight = StringUtils::stod(to_ustring(weight));
} catch (const std::invalid_argument& e) {
die("Invalid weight '%S'.", err(weight));
}
got_weight = true;
}
--iter;
if (got_weight) ;
else if (inleft)
inleft = false;
else
die("Lexicon entry contains multiple colons");
die("Lexicon entry contains multiple colons.");
if ((*iter).length() > 1) readSymbol(iter, line, seg.right);
}
else readSymbol(iter, line, (inleft ? seg.left : seg.right));
Expand Down Expand Up @@ -1862,7 +1883,7 @@ LexdCompiler::insertEntry(Transducer* trans, const lex_seg_t &seg)
{
trans_sym_t l = (i < seg.left.symbols.size()) ? seg.left.symbols[i] : trans_sym_t();
trans_sym_t r = (i < seg.right.symbols.size()) ? seg.right.symbols[i] : trans_sym_t();
state = trans->insertSingleTransduction(alphabet((int)l, (int)r), state);
state = trans->insertSingleTransduction(alphabet((int)l, (int)r), state, (i == 0 ? seg.weight : 0.000));
}
}
else
Expand Down Expand Up @@ -1949,7 +1970,9 @@ LexdCompiler::insertEntry(Transducer* trans, const lex_seg_t &seg)
state = trans->insertSingleTransduction((int)symbol, state);
}
}
trans->setFinal(state);
double w = seg.weight;
if (!seg.left.symbols.empty() || !seg.right.symbols.empty()) w = 0.000;
trans->setFinal(state, w);
}

void
Expand Down Expand Up @@ -2007,7 +2030,10 @@ LexdCompiler::getLexiconTransducer(pattern_element_t tok, unsigned int entry_ind
if (tok.left.name != tok.right.name)
die("Cannot collate %S with %S - %S contains a regex", err(name(tok.left.name)), err(name(tok.right.name)), err(name((le.regex != nullptr ? tok.left.name : tok.right.name))));
}
insertEntry(t, {.left=le.left, .right=re.right, .regex=le.regex, .tags=tags});
double w = le.weight;
if (tok.left.name != tok.right.name || tok.left.part != tok.right.part)
w += re.weight;
insertEntry(t, {.left=le.left, .right=re.right, .regex=le.regex, .weight=w, .tags=tags});
did_anything = true;
if(!free)
{
Expand Down Expand Up @@ -2136,6 +2162,9 @@ LexdCompiler::getLexiconTransducerWithFlags(pattern_element_t& tok, bool free)
die("Cannot collate %S with %S - %S contains a regex", err(name(tok.left.name)), err(name(tok.right.name)), err(name((le.regex != nullptr ? tok.left.name : tok.right.name))));
seg.regex = le.regex;
}
seg.weight = le.weight;
if (tok.left.name != tok.right.name || tok.left.part != tok.right.part)
seg.weight += re.weight;
if(!free && tok.left.name.valid())
{
trans_sym_t flag = getFlag(Unification, tok.left.name, i);
Expand Down
1 change: 1 addition & 0 deletions src/lexdcompiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ struct lex_token_t {
struct lex_seg_t {
lex_token_t left, right;
Transducer* regex = nullptr;
double weight = 0.000;
tags_t tags;
bool operator == (const lex_seg_t &t) const
{
Expand Down
12 changes: 11 additions & 1 deletion tests/feature/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,14 @@ negsources = $(foreach test,$(negtests),negtest-$(test).lexd)

check-neg: $(foreach src,$(negsources),$(O)/$(src).txt.error)

check: check-pos check-neg
atttests = \
weight \

attsources = $(foreach test,$(atttests),test-$(test).lexd)

check-att: $(foreach src,$(attsources),$(O)/$(src).txt.check)

check: check-pos check-neg check-att

O=.

Expand All @@ -66,7 +73,10 @@ $(O)/%.strings.check: $(O)/%.strings.diff
[ -s "$<" ] && cat "$<" && exit 1; touch $@
$(O)/%.lexd.txt.error: ../../src/lexd %.lexd | $(O)
$^ $(LEXD_TEST_FLAGS) > /dev/null 2> $@; [ $$? = 1 ]
$(O)/%.lexd.txt.check: $(O)/%.lexd.txt %.lexd.txt.$(O).gold
diff -U0 $^ > $@; [ $$? != 2 ]
clean:
rm $(foreach src,$(sources),$(O)/$(src).txt.strings.check)
rm $(foreach src,$(negsources),$(O)/$(src).txt.error)
rm $(foreach src,$(attsources),$(O)/$(src).txt.check)
rmdir $(O)
9 changes: 9 additions & 0 deletions tests/feature/test-weight.lexd
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
PATTERNS
[a:a::1.0]
[b::2.0]
lex

LEXICON lex
c:c::3.0
d::4.0
ef::inf
7 changes: 7 additions & 0 deletions tests/feature/test-weight.lexd.txt.flags.gold
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
0 1 a a 1.000000
0 1 b b 2.000000
0 1 c c 3.000000
0 1 d d 4.000000
0 2 e e INF
2 1 f f 0.000000
1 0.000000
18 changes: 18 additions & 0 deletions tests/feature/test-weight.lexd.txt.minimize-tags.gold
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
0 1 @P.D.C@ @P.D.C@ 0.000000
0 2 @P.D.D@ @P.D.D@ 0.000000
0 3 @P.D.E@ @P.D.E@ 0.000000
1 4 @U.E.B@ @U.E.B@ 1.000000
2 5 @U.F.B@ @U.F.B@ 2.000000
3 6 @U.G.B@ @U.G.B@ 3.000000
3 7 @U.G.C@ @U.G.C@ 4.000000
3 8 @U.G.D@ @U.G.D@ INF
4 9 a a 0.000000
5 10 b b 0.000000
6 11 c c 0.000000
7 11 d d 0.000000
8 12 e e 0.000000
9 13 @R.D.C@ @R.D.C@ 0.000000
10 13 @R.D.D@ @R.D.D@ 0.000000
11 13 @R.D.E@ @R.D.E@ 0.000000
12 11 f f 0.000000
13 0.000000
18 changes: 18 additions & 0 deletions tests/feature/test-weight.lexd.txt.minimize.gold
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
0 1 @P.D.C@ @P.D.C@ 0.000000
0 2 @P.D.D@ @P.D.D@ 0.000000
0 3 @P.D.E@ @P.D.E@ 0.000000
1 4 @U.E.B@ @U.E.B@ 1.000000
2 5 @U.F.B@ @U.F.B@ 2.000000
3 6 @U.G.B@ @U.G.B@ 3.000000
3 7 @U.G.C@ @U.G.C@ 4.000000
3 8 @U.G.D@ @U.G.D@ INF
4 9 a a 0.000000
5 10 b b 0.000000
6 11 c c 0.000000
7 11 d d 0.000000
8 12 e e 0.000000
9 13 @R.D.C@ @R.D.C@ 0.000000
10 13 @R.D.D@ @R.D.D@ 0.000000
11 13 @R.D.E@ @R.D.E@ 0.000000
12 11 f f 0.000000
13 0.000000
7 changes: 7 additions & 0 deletions tests/feature/test-weight.lexd.txt.plain.gold
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
0 1 a a 1.000000
0 1 b b 2.000000
0 1 c c 3.000000
0 1 d d 4.000000
0 2 e e INF
2 1 f f 0.000000
1 0.000000
13 changes: 13 additions & 0 deletions tests/feature/test-weight.lexd.txt.single.gold
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
0 1 @P.D.B@ @P.D.B@ 0.000000
0 2 @P.D.C@ @P.D.C@ 0.000000
0 3 @P.D.D@ @P.D.D@ 0.000000
1 4 a a 1.000000
2 5 b b 2.000000
3 6 c c 3.000000
3 6 d d 4.000000
3 7 e e INF
4 8 @R.D.B@ @R.D.B@ 0.000000
5 8 @R.D.C@ @R.D.C@ 0.000000
6 8 @R.D.D@ @R.D.D@ 0.000000
7 6 f f 0.000000
8 0.000000
7 changes: 7 additions & 0 deletions tests/feature/test-weight.lexd.txt.tags.gold
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
0 1 a a 1.000000
0 1 b b 2.000000
0 1 c c 3.000000
0 1 d d 4.000000
0 2 e e INF
2 1 f f 0.000000
1 0.000000