Pleasing mypy & typing imports under Python 3.8

Cimon Lucas (LCM) · Cimon Lucas (LCM) · commit 573278ece1e9 · 2024-11-07T17:08:47.000+01:00
diff --git a/pdfly/update_offsets.py b/pdfly/update_offsets.py
@@ -24,7 +24,10 @@
    update-offsets --verbose --encoding ISO-8859-1 issue-297.pdf issue-297.out.pdf
 """
 
-from collections.abc import Iterable
+if sys.version_info >= (3, 9):
+    List = list
+else:
+    from typing import List
 from pathlib import Path
 from rich.console import Console
 import re
@@ -41,13 +44,13 @@
 
 
 def update_lines(
-    lines_in: Iterable[str], encoding: str, console: Console, verbose: bool
-) -> Iterable[str]:
+    lines_in: List[str], encoding: str, console: Console, verbose: bool
+) -> List[str]:
     """Iterates over the lines of a pdf-files and updates offsets.
 
     The input is expected to be a pdf without binary-sections.
 
-    :param lines_in: An Iterable over the lines including line-breaks.
+    :param lines_in: A list over the lines including line-breaks.
     :param encoding: The encoding, e.g. "iso-8859-1" or "UTF-8".
     :param console: Console used to print messages.
     :param verbose: True to activate logging of info-messages.
@@ -184,7 +187,12 @@ def update_lines(
 
     for curr_obj, stream_len in map_stream_len.items():
         if curr_obj in map_obj_length_line:
-            m_length = RE_LENGTH.match(map_obj_length_line[curr_obj])
+            line = map_obj_length_line[curr_obj]
+            m_length = RE_LENGTH.match(line)
+            if m_length is None:
+                raise RuntimeError(
+                    f"Invalid PDF file: line '{line}' does not contain a valid /Length."
+                )
             prev_length = m_length.group(2)
             len_digits = len(prev_length)
             len_format = "%%0%dd" % len_digits
@@ -229,16 +237,16 @@ def update_lines(
     return lines_out
 
 
-def read_binary_file(file_path: str, encoding: str) -> Iterable[str]:
+def read_binary_file(file_path: Path, encoding: str) -> List[str]:
     """Reads a binary file line by line and returns these lines as a list of strings in the given encoding.
     Encoding utf-8 can't be used to read random binary data.
 
     :param file_path: file to be read line by line
     :param encoding: encoding to be used (e.g. "iso-8859-1")
     :return lines including line-breaks
     """
-    chunks = []
-    with open(file_path, "rb") as file:
+    chunks: List[str] = []
+    with file_path.open("rb") as file:
         buffer = bytearray()
         while True:
             chunk = file.read(4096)  # Read in chunks of 4096 bytes
@@ -253,7 +261,7 @@ def read_binary_file(file_path: str, encoding: str) -> Iterable[str]:
                 if not match:
                     break  # No more line breaks found, process the remaining buffer
 
-                start, end = match.start(), match.end()
+                end = match.end()
                 chunk_str = buffer[:end].decode(encoding, errors="strict")
                 buffer = buffer[end:]
 
@@ -277,4 +285,4 @@ def main(file_in: Path, file_out: Path, encoding: str, verbose: bool) -> None:
         for line in lines_out:
             f.write(line.encode(encoding))
 
-    console.print(f"Wrote {file_out}")
+    console.print(f"Wrote {file_out}", soft_wrap=True)
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,6 +1,6 @@
 """Utilities and fixtures that are available automatically for all tests."""
 
-import io, os
+import os
 from pathlib import Path
 
 from fpdf import FPDF
@@ -58,7 +58,7 @@ def pdf_file_100(tmp_path):
     for i in range(100):
         pdf.add_page()
         pdf.set_font("helvetica", size=12)
-        pdf.cell(200, 10, txt=f"{i}", ln=True, align="C")
+        pdf.cell(200, 10, text=f"{i}", ln=True, align="C")
 
     pdf_filepath = tmp_path / "pdf_file_100.pdf"
     pdf.output(pdf_filepath)
@@ -73,7 +73,7 @@ def pdf_file_abc(tmp_path):
     for char in [chr(i) for i in range(ord("a"), ord("z") + 1)]:
         pdf.add_page()
         pdf.set_font("helvetica", size=12)
-        pdf.cell(200, 10, txt=f"{char}", ln=True, align="C")
+        pdf.cell(200, 10, text=f"{char}", ln=True, align="C")
 
     pdf_filepath = tmp_path / "abc.pdf"
     pdf.output(pdf_filepath)