Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 37 additions & 20 deletions src/ccmain/ltrresultiterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ char *LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
std::string text;
PAGE_RES_IT res_it(*it_);
WERD_CHOICE *best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != nullptr);
if (best_choice == nullptr) {
return nullptr; // No recognition results available
}
if (level == RIL_SYMBOL) {
text = res_it.word()->BestUTF8(blob_index_, false);
} else if (level == RIL_WORD) {
Expand All @@ -61,7 +63,9 @@ char *LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
do { // for each text line in a paragraph
do { // for each word in a text line
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != nullptr);
if (best_choice == nullptr) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there an example which triggers the assertion here? I am not sure whether the break is the right solution here. Maybe the loop should continue at line 71.

break; // Skip words without recognition results
}
text += best_choice->unichar_string();
text += " ";
res_it.forward();
Expand Down Expand Up @@ -104,37 +108,47 @@ float LTRResultIterator::Confidence(PageIteratorLevel level) const {
case RIL_BLOCK:
do {
best_choice = res_it.word()->best_choice;
mean_certainty += best_choice->certainty();
++certainty_count;
if (best_choice != nullptr) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there situations where the condition is false?

mean_certainty += best_choice->certainty();
++certainty_count;
}
res_it.forward();
} while (res_it.block() == res_it.prev_block());
break;
case RIL_PARA:
do {
best_choice = res_it.word()->best_choice;
mean_certainty += best_choice->certainty();
++certainty_count;
if (best_choice != nullptr) {
mean_certainty += best_choice->certainty();
++certainty_count;
}
res_it.forward();
} while (res_it.block() == res_it.prev_block() &&
res_it.row()->row->para() == res_it.prev_row()->row->para());
break;
case RIL_TEXTLINE:
do {
best_choice = res_it.word()->best_choice;
mean_certainty += best_choice->certainty();
++certainty_count;
if (best_choice != nullptr) {
mean_certainty += best_choice->certainty();
++certainty_count;
}
res_it.forward();
} while (res_it.row() == res_it.prev_row());
break;
case RIL_WORD:
best_choice = res_it.word()->best_choice;
mean_certainty = best_choice->certainty();
certainty_count = 1;
if (best_choice != nullptr) {
mean_certainty = best_choice->certainty();
certainty_count = 1;
}
break;
case RIL_SYMBOL:
best_choice = res_it.word()->best_choice;
mean_certainty = best_choice->certainty(blob_index_);
certainty_count = 1;
if (best_choice != nullptr) {
mean_certainty = best_choice->certainty(blob_index_);
certainty_count = 1;
}
}
if (certainty_count > 0) {
mean_certainty /= certainty_count;
Expand Down Expand Up @@ -226,8 +240,8 @@ StrongScriptDirection LTRResultIterator::WordDirection() const {

// Returns true if the current word was found in a dictionary.
bool LTRResultIterator::WordIsFromDictionary() const {
if (it_->word() == nullptr) {
return false; // Already at the end!
if (it_->word() == nullptr || it_->word()->best_choice == nullptr) {
return false; // Already at the end or no recognition results!
}
int permuter = it_->word()->best_choice->permuter();
return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM || permuter == USER_DAWG_PERM;
Expand All @@ -243,8 +257,8 @@ int LTRResultIterator::BlanksBeforeWord() const {

// Returns true if the current word is numeric.
bool LTRResultIterator::WordIsNumeric() const {
if (it_->word() == nullptr) {
return false; // Already at the end!
if (it_->word() == nullptr || it_->word()->best_choice == nullptr) {
return false; // Already at the end or no recognition results!
}
int permuter = it_->word()->best_choice->permuter();
return permuter == NUMBER_PERM;
Expand Down Expand Up @@ -315,8 +329,11 @@ char *LTRResultIterator::WordNormedUTF8Text() const {
if (it_->word() == nullptr) {
return nullptr; // Already at the end!
}
std::string ocr_text;
WERD_CHOICE *best_choice = it_->word()->best_choice;
if (best_choice == nullptr) {
return nullptr; // No recognition results available
}
std::string ocr_text;
const UNICHARSET *unicharset = it_->word()->uch_set;
for (unsigned i = 0; i < best_choice->length(); ++i) {
ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
Expand All @@ -341,7 +358,7 @@ const char *LTRResultIterator::WordLattice(int *lattice_size) const {
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool LTRResultIterator::SymbolIsSuperscript() const {
if (cblob_it_ == nullptr && it_->word() != nullptr) {
if (cblob_it_ == nullptr && it_->word() != nullptr && it_->word()->best_choice != nullptr) {
return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUPERSCRIPT;
}
return false;
Expand All @@ -351,7 +368,7 @@ bool LTRResultIterator::SymbolIsSuperscript() const {
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool LTRResultIterator::SymbolIsSubscript() const {
if (cblob_it_ == nullptr && it_->word() != nullptr) {
if (cblob_it_ == nullptr && it_->word() != nullptr && it_->word()->best_choice != nullptr) {
return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUBSCRIPT;
}
return false;
Expand All @@ -361,7 +378,7 @@ bool LTRResultIterator::SymbolIsSubscript() const {
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool LTRResultIterator::SymbolIsDropcap() const {
if (cblob_it_ == nullptr && it_->word() != nullptr) {
if (cblob_it_ == nullptr && it_->word() != nullptr && it_->word()->best_choice != nullptr) {
return it_->word()->best_choice->BlobPosition(blob_index_) == SP_DROPCAP;
}
return false;
Expand Down
19 changes: 12 additions & 7 deletions src/ccmain/output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,
const UNICHARSET &uchset = *word->uch_set;
UNICHAR_ID space = uchset.unichar_to_id(" ");

if ((word->unlv_crunch_mode != CR_NONE || word->best_choice->empty()) &&
if ((word->unlv_crunch_mode != CR_NONE || (word->best_choice != nullptr && word->best_choice->empty())) &&
!tessedit_zero_kelvin_rejection && !tessedit_word_for_word) {
bool need_reject = false;
if ((word->unlv_crunch_mode != CR_DELETE) &&
Expand Down Expand Up @@ -149,15 +149,15 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,

if (unlv_tilde_crunching && stats_.last_char_was_tilde && (word->word->space() == 0) &&
!(word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes) &&
(word->best_choice->unichar_id(0) == space)) {
(word->best_choice != nullptr && word->best_choice->unichar_id(0) == space)) {
/* Prevent adjacent tilde across words - we know that adjacent tildes within
words have been removed */
word->MergeAdjacentBlobs(0);
}
if (newline_type || (word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes)) {
stats_.last_char_was_tilde = false;
} else {
if (word->reject_map.length() > 0) {
if (word->reject_map.length() > 0 && word->best_choice != nullptr) {
if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space) {
stats_.last_char_was_tilde = true;
} else {
Expand All @@ -169,15 +169,17 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,
/* else it is unchanged as there are no output chars */
}

ASSERT_HOST(word->best_choice->length() == word->reject_map.length());
if (word->best_choice != nullptr) {
ASSERT_HOST(word->best_choice->length() == word->reject_map.length());
}

set_unlv_suspects(word);
check_debug_pt(word, 120);
if (tessedit_rejection_debug) {
if (tessedit_rejection_debug && word->best_choice != nullptr) {
tprintf("Dict word: \"%s\": %d\n", word->best_choice->debug_string().c_str(),
dict_word(*(word->best_choice)));
}
if (!word->word->flag(W_REP_CHAR) || !tessedit_write_rep_codes) {
if ((!word->word->flag(W_REP_CHAR) || !tessedit_write_rep_codes) && word->best_choice != nullptr) {
if (tessedit_zero_rejection) {
/* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
for (unsigned i = 0; i < word->best_choice->length(); ++i) {
Expand Down Expand Up @@ -250,7 +252,7 @@ UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) { // what char is repeated?
;
}

if (i < word->reject_map.length()) {
if (i < word->reject_map.length() && word->best_choice != nullptr) {
return word->best_choice->unichar_id(i);
} else {
return word->uch_set->unichar_to_id(unrecognised_char.c_str());
Expand All @@ -268,6 +270,9 @@ UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) { // what char is repeated?
* tessedit_minimal_rejection.
*************************************************************************/
void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
if (word_res->best_choice == nullptr) {
return; // No recognition results available
}
int len = word_res->reject_map.length();
const WERD_CHOICE &word = *(word_res->best_choice);
const UNICHARSET &uchset = *word.unicharset();
Expand Down
4 changes: 3 additions & 1 deletion src/ccmain/resultiterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,9 @@ void ResultIterator::AppendUTF8WordText(std::string *text) const {
if (!it_->word()) {
return;
}
ASSERT_HOST(it_->word()->best_choice != nullptr);
if (it_->word()->best_choice == nullptr) {
return; // No recognition results available
}
bool reading_direction_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_;
if (at_beginning_of_minor_run_) {
*text += reading_direction_is_ltr ? kLRM : kRLM;
Expand Down
Loading