Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lttoolbox/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ set(LIBLTTOOLBOX_HEADERS
node.h
pattern_list.h
regexp_compiler.h
reusable_state.h
serialiser.h
sorted_vector.h
sorted_vector.hpp
Expand Down Expand Up @@ -54,6 +55,7 @@ set(LIBLTTOOLBOX_SOURCES
node.cc
pattern_list.cc
regexp_compiler.cc
reusable_state.cc
sorted_vector.cc
state.cc
stream_reader.cc
Expand Down
51 changes: 38 additions & 13 deletions lttoolbox/fst_processor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,20 @@ FSTProcessor::filterFinals(const State& state, UStringView casefrom)
uppercase, firstupper, 0);
}

UString
FSTProcessor::filterFinals(const ReusableState& state, UStringView casefrom)
{
bool firstupper = false, uppercase = false;
if (!dictionaryCase) {
firstupper = u_isupper(casefrom[0]);
uppercase = (casefrom.size() > 1 &&
firstupper && u_isupper(casefrom[casefrom.size()-1]));
}
return state.filterFinals(all_finals, alphabet, escaped_chars,
displayWeightsMode, maxAnalyses, maxWeightClasses,
uppercase, firstupper, 0);
}

void
FSTProcessor::writeEscaped(UStringView str, UFILE *output)
{
Expand Down Expand Up @@ -674,6 +688,7 @@ void
FSTProcessor::initBiltrans()
{
initGeneration();
escaped_chars.insert('*');
}


Expand Down Expand Up @@ -803,7 +818,8 @@ FSTProcessor::analysis(InputFile& input, UFILE *output)
bool last_incond = false;
bool last_postblank = false;
bool last_preblank = false;
State current_state = initial_state;
ReusableState current_state;
current_state.init(&root);
UString lf; // analysis (lexical form and tags)
UString sf; // surface form
UString lf_spcmp; // space compound analysis
Expand Down Expand Up @@ -1044,7 +1060,7 @@ FSTProcessor::analysis(InputFile& input, UFILE *output)
}
}

current_state = initial_state;
current_state.init(&root);
lf.clear();
sf.clear();
last_start = input_buffer.getPos();
Expand Down Expand Up @@ -1265,7 +1281,8 @@ FSTProcessor::generation(InputFile& input, UFILE *output, GenerationMode mode)
{
StreamReader reader(&input);
reader.alpha = &alphabet;
State current_state;
ReusableState current_state;
current_state.init(&root);

while (!reader.at_eof) {
reader.next();
Expand Down Expand Up @@ -1322,7 +1339,7 @@ FSTProcessor::generation(InputFile& input, UFILE *output, GenerationMode mode)
break;
}
if (!skip) {
current_state = initial_state;
current_state.init(&root);
for (auto& sym : reader.readings[0].symbols) {
if (!alphabet.isTag(sym) && u_isupper(sym) &&
!beCaseSensitive(current_state)) {
Expand Down Expand Up @@ -1410,7 +1427,8 @@ FSTProcessor::transliteration(InputFile& input, UFILE *output)
size_t cur_word = 0;
size_t cur_pos = 0;
size_t match_pos = 0;
State current_state = initial_state;
ReusableState current_state;
current_state.init(&root);
UString last_match;
int space_diff = 0;

Expand Down Expand Up @@ -1590,7 +1608,7 @@ FSTProcessor::transliteration(InputFile& input, UFILE *output)
firstupper = false;
have_first = false;
have_second = false;
current_state = initial_state;
current_state.init(&root);
}
}
}
Expand Down Expand Up @@ -1728,6 +1746,8 @@ FSTProcessor::bilingual(InputFile& input, UFILE *output, GenerationMode mode)
StreamReader reader(&input);
reader.alpha = &alphabet;
reader.add_unknowns = true;
ReusableState current_state;
current_state.init(&root);

size_t index = (biltransSurfaceForms || biltransSurfaceFormsKeep ? 1 : 0);

Expand Down Expand Up @@ -1769,7 +1789,7 @@ FSTProcessor::bilingual(InputFile& input, UFILE *output, GenerationMode mode)
continue;
}

State current_state = initial_state;
current_state.reinit(&root);

bool firstupper = (symbols[0] > 0 && u_isupper(symbols[0]));
bool uppercase = (firstupper && symbols.size() > 1 &&
Expand All @@ -1791,11 +1811,11 @@ FSTProcessor::bilingual(InputFile& input, UFILE *output, GenerationMode mode)
}
if (current_state.isFinal(all_finals)) {
queue_start = i;
current_state.filterFinalsArray(result,
all_finals, alphabet, escaped_chars,
displayWeightsMode, maxAnalyses,
maxWeightClasses, uppercase,
firstupper, 0);
result = current_state.filterFinalsArray(all_finals, alphabet,
escaped_chars,
displayWeightsMode, maxAnalyses,
maxWeightClasses, uppercase,
firstupper, 0);
}
}
// if there are no tags, we only return complete matches
Expand Down Expand Up @@ -1847,7 +1867,12 @@ FSTProcessor::bilingual(InputFile& input, UFILE *output, GenerationMode mode)
u_fputc('/', output);

if (!result.empty()) {
write(compose(result, source.substr(queue_pos)), output);
UString queue = source.substr(queue_pos);
for (auto& piece : result) {
u_fputc('/', output);
write(piece, output);
write(queue, output);
}
} else {
u_fputc((mode == gm_all ? '#' : '@'), output);
write(source, output);
Expand Down
10 changes: 8 additions & 2 deletions lttoolbox/fst_processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <lttoolbox/buffer.h>
#include <lttoolbox/my_stdio.h>
#include <lttoolbox/state.h>
#include <lttoolbox/reusable_state.h>
#include <lttoolbox/trans_exe.h>
#include <lttoolbox/input_file.h>
#include <libxml/xmlreader.h>
Expand Down Expand Up @@ -328,6 +329,7 @@ class FSTProcessor
* Assumes that casefrom is non-empty
*/
UString filterFinals(const State& state, UStringView casefrom);
UString filterFinals(const ReusableState& state, UStringView casefrom);

/**
* Write a string to an output stream,
Expand Down Expand Up @@ -456,11 +458,11 @@ class FSTProcessor
*
* @return running with --case-sensitive or state size exceeds max
*/
bool beCaseSensitive(const State& state) {
bool beCaseSensitive(size_t size) {
if(caseSensitive) {
return true;
}
else if(state.size() < max_case_insensitive_state_size) {
else if(size < max_case_insensitive_state_size) {
return false; // ie. do case-folding
}
else {
Expand All @@ -473,6 +475,10 @@ class FSTProcessor
}
}

bool beCaseSensitive(const State& s) { return beCaseSensitive(s.size()); }
bool beCaseSensitive(const ReusableState& s) {
return beCaseSensitive(s.size());
}
void appendEscaped(UString& to, const UString& from) {
for(auto &c : from) {
if (escaped_chars.find(c) != escaped_chars.end()) {
Expand Down
3 changes: 3 additions & 0 deletions lttoolbox/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <map>

class State;
class ReusableState;
class Node;


Expand All @@ -35,6 +36,7 @@ class Dest
double *out_weight;

friend class State;
friend class ReusableState;
friend class Node;

void copy(Dest const &d)
Expand Down Expand Up @@ -112,6 +114,7 @@ class Node
{
private:
friend class State;
friend class ReusableState;

/**
* The outgoing transitions of this node.
Expand Down
Loading
Loading