Skip to content

Commit 9b23ac3

Browse files
authored
SEC: Infinite recursion when using PdfWriter(clone_from=reader) (#2264)
Use a visited memo to check if the current object in the clone operation has already been visited, and if so, do not add it to the list of objects. This avoids infinite recursion in case there are links to the same object inside a PDF.
1 parent 56e191d commit 9b23ac3

File tree

1 file changed

+19
-5
lines changed

1 file changed

+19
-5
lines changed

pypdf/generic/_data_structures.py

+19-5
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
List,
4141
Optional,
4242
Sequence,
43+
Set,
4344
Tuple,
4445
Union,
4546
cast,
@@ -187,14 +188,15 @@ def clone(
187188
except Exception:
188189
pass
189190

191+
visited: Set[Tuple[int, int]] = set()
190192
d__ = cast(
191193
"DictionaryObject",
192194
self._reference_clone(self.__class__(), pdf_dest, force_duplicate),
193195
)
194196
if ignore_fields is None:
195197
ignore_fields = []
196198
if len(d__.keys()) == 0:
197-
d__._clone(self, pdf_dest, force_duplicate, ignore_fields)
199+
d__._clone(self, pdf_dest, force_duplicate, ignore_fields, visited)
198200
return d__
199201

200202
def _clone(
@@ -203,6 +205,7 @@ def _clone(
203205
pdf_dest: PdfWriterProtocol,
204206
force_duplicate: bool,
205207
ignore_fields: Optional[Sequence[Union[str, int]]],
208+
visited: Set[Tuple[int, int]],
206209
) -> None:
207210
"""
208211
Update the object from src.
@@ -270,6 +273,14 @@ def _clone(
270273
cur_obj.__class__(), pdf_dest, force_duplicate
271274
),
272275
)
276+
# check to see if we've previously processed our item
277+
if clon.indirect_reference is not None:
278+
idnum = clon.indirect_reference.idnum
279+
generation = clon.indirect_reference.generation
280+
if (idnum, generation) in visited:
281+
cur_obj = None
282+
break
283+
visited.add((idnum, generation))
273284
objs.append((cur_obj, clon))
274285
assert prev_obj is not None
275286
prev_obj[NameObject(k)] = clon.indirect_reference
@@ -282,7 +293,7 @@ def _clone(
282293
except Exception:
283294
cur_obj = None
284295
for s, c in objs:
285-
c._clone(s, pdf_dest, force_duplicate, ignore_fields)
296+
c._clone(s, pdf_dest, force_duplicate, ignore_fields, visited)
286297

287298
for k, v in src.items():
288299
if k not in ignore_fields:
@@ -798,6 +809,7 @@ def _clone(
798809
pdf_dest: PdfWriterProtocol,
799810
force_duplicate: bool,
800811
ignore_fields: Optional[Sequence[Union[str, int]]],
812+
visited: Set[Tuple[int, int]],
801813
) -> None:
802814
"""
803815
Update the object from src.
@@ -820,7 +832,7 @@ def _clone(
820832
)
821833
except Exception:
822834
pass
823-
super()._clone(src, pdf_dest, force_duplicate, ignore_fields)
835+
super()._clone(src, pdf_dest, force_duplicate, ignore_fields, visited)
824836

825837
def get_data(self) -> Union[bytes, str]:
826838
return self._data
@@ -1048,6 +1060,7 @@ def clone(
10481060
except Exception:
10491061
pass
10501062

1063+
visited: Set[Tuple[int, int]] = set()
10511064
d__ = cast(
10521065
"ContentStream",
10531066
self._reference_clone(
@@ -1056,7 +1069,7 @@ def clone(
10561069
)
10571070
if ignore_fields is None:
10581071
ignore_fields = []
1059-
d__._clone(self, pdf_dest, force_duplicate, ignore_fields)
1072+
d__._clone(self, pdf_dest, force_duplicate, ignore_fields, visited)
10601073
return d__
10611074

10621075
def _clone(
@@ -1065,6 +1078,7 @@ def _clone(
10651078
pdf_dest: PdfWriterProtocol,
10661079
force_duplicate: bool,
10671080
ignore_fields: Optional[Sequence[Union[str, int]]],
1081+
visited: Set[Tuple[int, int]],
10681082
) -> None:
10691083
"""
10701084
Update the object from src.
@@ -1081,7 +1095,7 @@ def _clone(
10811095
self._operations = list(src_cs._operations)
10821096
self.forced_encoding = src_cs.forced_encoding
10831097
# no need to call DictionaryObjection or anything
1084-
# like super(DictionaryObject,self)._clone(src, pdf_dest, force_duplicate, ignore_fields)
1098+
# like super(DictionaryObject,self)._clone(src, pdf_dest, force_duplicate, ignore_fields, visited)
10851099

10861100
def _parse_content_stream(self, stream: StreamType) -> None:
10871101
# 7.8.2 Content Streams

0 commit comments

Comments
 (0)