Skip to content

Commit 791b121

Browse files
committed
BUG: Fix detection of inline images followed by names or numbers
Closes py-pdf#3172.
1 parent 7143554 commit 791b121

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

pypdf/generic/_image_inline.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,15 @@ def is_followed_by_binary_data(stream: IO[bytes], length: int = 10) -> bool:
285285
if operator_end is None:
286286
# We probably are inside an operation.
287287
operator_end = length
288-
if operator_end - operator_start > 3: # noqa: SIM103
288+
operator_length = operator_end - operator_start
289+
operator = data[operator_start:operator_end]
290+
if operator.startswith(b"/") and operator_length > 1:
291+
# Name object.
292+
return False
293+
if operator.replace(b".", b"").isdigit():
294+
# Graphics operator, for example a move. A number (integer or float).
295+
return False
296+
if operator_length > 3: # noqa: SIM103
289297
# Usually, the operators inside a content stream should not have more than three characters,
290298
# especially after an inline image.
291299
return True

tests/generic/test_image_inline.py

+11
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,14 @@ def test_is_followed_by_binary_data():
4848
# No `operator_start`.
4949
stream = BytesIO(b"ABCDEFG")
5050
assert is_followed_by_binary_data(stream)
51+
52+
# Name object.
53+
stream = BytesIO(b"/R10 gs\n/R12 cs\n")
54+
assert not is_followed_by_binary_data(stream)
55+
56+
# Numbers.
57+
stream = BytesIO(b"1337 42 m\n")
58+
assert not is_followed_by_binary_data(stream)
59+
60+
stream = BytesIO(b"1234.56 42 13 37 10 20 c\n")
61+
assert not is_followed_by_binary_data(stream)

0 commit comments

Comments
 (0)