Skip to content

Commit b62f298

Browse files
author
Cimon Lucas (LCM)
committed
Pleasing mypy & typing imports under Python 3.8
1 parent c3a6c88 commit b62f298

File tree

2 files changed

+23
-14
lines changed

2 files changed

+23
-14
lines changed

pdfly/update_offsets.py

+20-11
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,15 @@
2424
update-offsets --verbose --encoding ISO-8859-1 issue-297.pdf issue-297.out.pdf
2525
"""
2626

27-
from collections.abc import Iterable
2827
from pathlib import Path
29-
from rich.console import Console
3028
import re
3129
import sys
30+
if sys.version_info >= (3, 9):
31+
List = list
32+
else: # Support for Python 3.8
33+
from typing import List
34+
35+
from rich.console import Console
3236

3337
# Here, only simple regular expressions are used.
3438
# Beyond a certain level of complexity, switching to a proper PDF dictionary parser would be better.
@@ -41,13 +45,13 @@
4145

4246

4347
def update_lines(
44-
lines_in: Iterable[str], encoding: str, console: Console, verbose: bool
45-
) -> Iterable[str]:
48+
lines_in: List[str], encoding: str, console: Console, verbose: bool
49+
) -> List[str]:
4650
"""Iterates over the lines of a pdf-files and updates offsets.
4751
4852
The input is expected to be a pdf without binary-sections.
4953
50-
:param lines_in: An Iterable over the lines including line-breaks.
54+
:param lines_in: A list over the lines including line-breaks.
5155
:param encoding: The encoding, e.g. "iso-8859-1" or "UTF-8".
5256
:param console: Console used to print messages.
5357
:param verbose: True to activate logging of info-messages.
@@ -184,7 +188,12 @@ def update_lines(
184188

185189
for curr_obj, stream_len in map_stream_len.items():
186190
if curr_obj in map_obj_length_line:
187-
m_length = RE_LENGTH.match(map_obj_length_line[curr_obj])
191+
line = map_obj_length_line[curr_obj]
192+
m_length = RE_LENGTH.match(line)
193+
if m_length is None:
194+
raise RuntimeError(
195+
f"Invalid PDF file: line '{line}' does not contain a valid /Length."
196+
)
188197
prev_length = m_length.group(2)
189198
len_digits = len(prev_length)
190199
len_format = "%%0%dd" % len_digits
@@ -229,16 +238,16 @@ def update_lines(
229238
return lines_out
230239

231240

232-
def read_binary_file(file_path: str, encoding: str) -> Iterable[str]:
241+
def read_binary_file(file_path: Path, encoding: str) -> List[str]:
233242
"""Reads a binary file line by line and returns these lines as a list of strings in the given encoding.
234243
Encoding utf-8 can't be used to read random binary data.
235244
236245
:param file_path: file to be read line by line
237246
:param encoding: encoding to be used (e.g. "iso-8859-1")
238247
:return lines including line-breaks
239248
"""
240-
chunks = []
241-
with open(file_path, "rb") as file:
249+
chunks: List[str] = []
250+
with file_path.open("rb") as file:
242251
buffer = bytearray()
243252
while True:
244253
chunk = file.read(4096) # Read in chunks of 4096 bytes
@@ -253,7 +262,7 @@ def read_binary_file(file_path: str, encoding: str) -> Iterable[str]:
253262
if not match:
254263
break # No more line breaks found, process the remaining buffer
255264

256-
start, end = match.start(), match.end()
265+
end = match.end()
257266
chunk_str = buffer[:end].decode(encoding, errors="strict")
258267
buffer = buffer[end:]
259268

@@ -277,4 +286,4 @@ def main(file_in: Path, file_out: Path, encoding: str, verbose: bool) -> None:
277286
for line in lines_out:
278287
f.write(line.encode(encoding))
279288

280-
console.print(f"Wrote {file_out}")
289+
console.print(f"Wrote {file_out}", soft_wrap=True)

tests/conftest.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Utilities and fixtures that are available automatically for all tests."""
22

3-
import io, os
3+
import os
44
from pathlib import Path
55

66
from fpdf import FPDF
@@ -58,7 +58,7 @@ def pdf_file_100(tmp_path):
5858
for i in range(100):
5959
pdf.add_page()
6060
pdf.set_font("helvetica", size=12)
61-
pdf.cell(200, 10, txt=f"{i}", ln=True, align="C")
61+
pdf.cell(200, 10, text=f"{i}", ln=True, align="C")
6262

6363
pdf_filepath = tmp_path / "pdf_file_100.pdf"
6464
pdf.output(pdf_filepath)
@@ -73,7 +73,7 @@ def pdf_file_abc(tmp_path):
7373
for char in [chr(i) for i in range(ord("a"), ord("z") + 1)]:
7474
pdf.add_page()
7575
pdf.set_font("helvetica", size=12)
76-
pdf.cell(200, 10, txt=f"{char}", ln=True, align="C")
76+
pdf.cell(200, 10, text=f"{char}", ln=True, align="C")
7777

7878
pdf_filepath = tmp_path / "abc.pdf"
7979
pdf.output(pdf_filepath)

0 commit comments

Comments
 (0)