@@ -548,7 +548,7 @@ def fallback_text_to_md(textlines, ignore_code: bool = False, clip=None):
548548 for tl in textlines :
549549 ltext = "|" + "|" .join ([s ["text" ].strip () for s in tl ["spans" ]]) + "|\n "
550550 output += ltext
551- output += "**----- End of picture text -----**<br>\n "
551+ output += "\n **----- End of picture text -----**<br>\n "
552552 return output + "\n \n "
553553
554554
@@ -631,7 +631,7 @@ def to_markdown(
631631 continue
632632
633633 # pictures and formulas: either write image file or embed
634- if btype in ("picture" , "formula" , "fallback" ):
634+ if btype in ("picture" , "formula" , "table- fallback" ):
635635 if isinstance (box .image , str ):
636636 output += GRAPHICS_TEXT % box .image + "\n \n "
637637 elif isinstance (box .image , bytes ):
@@ -650,7 +650,7 @@ def to_markdown(
650650 ignore_code = ignore_code or page .full_ocred ,
651651 clip = clip ,
652652 )
653- elif btype == "fallback" :
653+ elif btype == "table- fallback" :
654654 output += fallback_text_to_md (
655655 box .textlines ,
656656 ignore_code = ignore_code or page .full_ocred ,
@@ -741,7 +741,7 @@ def to_text(
741741 continue
742742 if btype == "page-footer" and footer is False :
743743 continue
744- if btype in ("picture" , "formula" , "fallback" ):
744+ if btype in ("picture" , "formula" , "table- fallback" ):
745745 output += f"==> picture [{ clip .width } x { clip .height } ] <==\n \n "
746746 if box .textlines :
747747 if btype == "picture" :
@@ -750,7 +750,7 @@ def to_text(
750750 ignore_code = ignore_code or page .full_ocred ,
751751 clip = clip ,
752752 )
753- elif btype == "fallback" :
753+ elif btype == "table- fallback" :
754754 output += fallback_text_to_text (
755755 box .textlines ,
756756 ignore_code = ignore_code or page .full_ocred ,
@@ -1018,7 +1018,7 @@ def parse_document(
10181018
10191019 except Exception as e :
10201020 # print(f"table detection error '{e}' on page {page.number+1}")
1021- layoutbox .boxclass = "fallback"
1021+ layoutbox .boxclass = "table- fallback"
10221022 # table structure not detected: treat like an image
10231023 if document .embed_images or document .write_images :
10241024 pix = page .get_pixmap (clip = clip , dpi = document .image_dpi )
0 commit comments