Skip to content

Commit fc42eb4

Browse files
author
Cimon Lucas (LCM)
committed
Pleasing mypy & typing imports under Python 3.8
1 parent c3a6c88 commit fc42eb4

File tree

4 files changed

+41
-30
lines changed

4 files changed

+41
-30
lines changed

pdfly/cli.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -236,10 +236,10 @@ def update_offsets(
236236
encoding: str = typer.Option(
237237
"ISO-8859-1",
238238
help="Encoding used to read and write the files, e.g. UTF-8.",
239-
), # noqa
239+
),
240240
verbose: bool = typer.Option(
241241
False, help="Show progress while processing."
242-
), # noqa
242+
),
243243
) -> None:
244244
pdfly.update_offsets.main(file_in, file_out, encoding, verbose)
245245

pdfly/update_offsets.py

+34-23
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#!/usr/bin/env python
21
"""
32
Updates offsets and lengths in a simple PDF file.
43
@@ -20,15 +19,21 @@
2019
It expects that the /Length-entries have default values containing
2120
enough digits, e.g. /Length 000 when the stream consists of 576 bytes.
2221
23-
EXAMPLE
22+
Example:
2423
update-offsets --verbose --encoding ISO-8859-1 issue-297.pdf issue-297.out.pdf
24+
2525
"""
2626

27-
from collections.abc import Iterable
28-
from pathlib import Path
29-
from rich.console import Console
3027
import re
3128
import sys
29+
from pathlib import Path
30+
31+
if sys.version_info >= (3, 9):
32+
List = list
33+
else: # Support for Python 3.8
34+
from typing import List
35+
36+
from rich.console import Console
3237

3338
# Here, only simple regular expressions are used.
3439
# Beyond a certain level of complexity, switching to a proper PDF dictionary parser would be better.
@@ -41,20 +46,20 @@
4146

4247

4348
def update_lines(
44-
lines_in: Iterable[str], encoding: str, console: Console, verbose: bool
45-
) -> Iterable[str]:
46-
"""Iterates over the lines of a pdf-files and updates offsets.
49+
lines_in: List[str], encoding: str, console: Console, verbose: bool
50+
) -> List[str]:
51+
"""
52+
Iterates over the lines of a pdf-files and updates offsets.
4753
4854
The input is expected to be a pdf without binary-sections.
4955
50-
:param lines_in: An Iterable over the lines including line-breaks.
56+
:param lines_in: A list over the lines including line-breaks.
5157
:param encoding: The encoding, e.g. "iso-8859-1" or "UTF-8".
5258
:param console: Console used to print messages.
5359
:param verbose: True to activate logging of info-messages.
5460
:return The output is a list of lines to be written
5561
in the given encoding.
5662
"""
57-
5863
lines_out = [] # lines to be written
5964
map_line_offset = {} # map from line-number to offset
6065
map_obj_offset = {} # map from object-number to offset
@@ -184,22 +189,27 @@ def update_lines(
184189

185190
for curr_obj, stream_len in map_stream_len.items():
186191
if curr_obj in map_obj_length_line:
187-
m_length = RE_LENGTH.match(map_obj_length_line[curr_obj])
192+
line = map_obj_length_line[curr_obj]
193+
m_length = RE_LENGTH.match(line)
194+
if m_length is None:
195+
raise RuntimeError(
196+
f"Invalid PDF file: line '{line}' does not contain a valid /Length."
197+
)
188198
prev_length = m_length.group(2)
189199
len_digits = len(prev_length)
190200
len_format = "%%0%dd" % len_digits
191201
updated_length = len_format % stream_len
192202
if len(updated_length) > len_digits:
193203
raise RuntimeError(
194204
f"Not enough digits in /Length-entry {prev_length}"
195-
+ f" of object {curr_obj}:"
196-
+ f" too short to take /Length {updated_length}"
205+
f" of object {curr_obj}:"
206+
f" too short to take /Length {updated_length}"
197207
)
198208
line = m_length.group(1) + updated_length + m_length.group(3)
199209
lines_out[map_obj_length_line_no[curr_obj] - 1] = line
200210
elif curr_obj in map_obj_length_ref:
201211
len_obj = map_obj_length_ref[curr_obj]
202-
if not len_obj in map_obj_line:
212+
if len_obj not in map_obj_line:
203213
raise RuntimeError(
204214
f"obj {curr_obj} has unknown length-obj {len_obj}"
205215
)
@@ -211,8 +221,8 @@ def update_lines(
211221
if len(updated_length) > len_digits:
212222
raise RuntimeError(
213223
f"Not enough digits in /Length-ref-entry {prev_length}"
214-
+ f" of object {curr_obj} and len-object {len_obj}:"
215-
+ f" too short to take /Length {updated_length}"
224+
f" of object {curr_obj} and len-object {len_obj}:"
225+
f" too short to take /Length {updated_length}"
216226
)
217227
if prev_length != updated_length:
218228
if verbose:
@@ -223,22 +233,23 @@ def update_lines(
223233
else:
224234
raise RuntimeError(
225235
f"obj {curr_obj} with stream-len {stream_len}"
226-
+ f" has no object-length-line: {map_obj_length_line}"
236+
f" has no object-length-line: {map_obj_length_line}"
227237
)
228238

229239
return lines_out
230240

231241

232-
def read_binary_file(file_path: str, encoding: str) -> Iterable[str]:
233-
"""Reads a binary file line by line and returns these lines as a list of strings in the given encoding.
242+
def read_binary_file(file_path: Path, encoding: str) -> List[str]:
243+
"""
244+
Reads a binary file line by line and returns these lines as a list of strings in the given encoding.
234245
Encoding utf-8 can't be used to read random binary data.
235246
236247
:param file_path: file to be read line by line
237248
:param encoding: encoding to be used (e.g. "iso-8859-1")
238249
:return lines including line-breaks
239250
"""
240-
chunks = []
241-
with open(file_path, "rb") as file:
251+
chunks: List[str] = []
252+
with file_path.open("rb") as file:
242253
buffer = bytearray()
243254
while True:
244255
chunk = file.read(4096) # Read in chunks of 4096 bytes
@@ -253,7 +264,7 @@ def read_binary_file(file_path: str, encoding: str) -> Iterable[str]:
253264
if not match:
254265
break # No more line breaks found, process the remaining buffer
255266

256-
start, end = match.start(), match.end()
267+
end = match.end()
257268
chunk_str = buffer[:end].decode(encoding, errors="strict")
258269
buffer = buffer[end:]
259270

@@ -277,4 +288,4 @@ def main(file_in: Path, file_out: Path, encoding: str, verbose: bool) -> None:
277288
for line in lines_out:
278289
f.write(line.encode(encoding))
279290

280-
console.print(f"Wrote {file_out}")
291+
console.print(f"Wrote {file_out}", soft_wrap=True)

tests/conftest.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Utilities and fixtures that are available automatically for all tests."""
22

3-
import io, os
3+
import os
44
from pathlib import Path
55

66
from fpdf import FPDF
@@ -58,7 +58,7 @@ def pdf_file_100(tmp_path):
5858
for i in range(100):
5959
pdf.add_page()
6060
pdf.set_font("helvetica", size=12)
61-
pdf.cell(200, 10, txt=f"{i}", ln=True, align="C")
61+
pdf.cell(200, 10, text=f"{i}", ln=True, align="C")
6262

6363
pdf_filepath = tmp_path / "pdf_file_100.pdf"
6464
pdf.output(pdf_filepath)
@@ -73,7 +73,7 @@ def pdf_file_abc(tmp_path):
7373
for char in [chr(i) for i in range(ord("a"), ord("z") + 1)]:
7474
pdf.add_page()
7575
pdf.set_font("helvetica", size=12)
76-
pdf.cell(200, 10, txt=f"{char}", ln=True, align="C")
76+
pdf.cell(200, 10, text=f"{char}", ln=True, align="C")
7777

7878
pdf_filepath = tmp_path / "abc.pdf"
7979
pdf.output(pdf_filepath)

tests/test_update_offsets.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ def test_update_offsets(capsys, tmp_path: Path) -> None:
3434
assert not captured.err
3535
assert re.search(r"Wrote\s+" + re.escape(str(output)), captured.out)
3636
assert output.exists()
37-
with open(file_expected, "r", encoding="iso-8859-1") as file_exp:
37+
with open(file_expected, encoding="iso-8859-1") as file_exp:
3838
lines_exp = file_exp.readlines()
39-
with open(output, "r", encoding="iso-8859-1") as file_act:
39+
with open(output, encoding="iso-8859-1") as file_act:
4040
lines_act = file_act.readlines()
4141
assert len(lines_exp) == len(
4242
lines_act

0 commit comments

Comments
 (0)