Skip to content

Commit 854c467

Browse files
committed
ENH: add decode_as_image() to ContentStreams
closes py-pdf#2613
1 parent b1b55e6 commit 854c467

File tree

2 files changed

+41
-0
lines changed

2 files changed

+41
-0
lines changed

pypdf/generic/_data_structures.py

+20
Original file line numberDiff line numberDiff line change
@@ -947,6 +947,26 @@ def flate_encode(self, level: int = -1) -> "EncodedStreamObject":
947947
retval._data = FlateDecode.encode(b_(self._data), level)
948948
return retval
949949

950+
def decode_as_image(self) -> Any:
951+
"""
952+
Try to decode the stream object as an image
953+
954+
Returns:
955+
a PIL image if proper decoding has been found
956+
"""
957+
from ..filters import _xobj_to_image
958+
959+
if self.get("/Subtype", "") != "/Image":
960+
try:
961+
msg = f"{self.indirect_reference} does not seems to be an Image" # pragma: no cover
962+
except AttributeError:
963+
msg = f"{self.__repr__()} object does not seems to be an Image" # pragma: no cover
964+
logger_warning(msg, __name__)
965+
extension, byte_stream, img = _xobj_to_image(self)
966+
if extension is None:
967+
return None # pragma: no cover
968+
return img
969+
950970

951971
class DecodedStreamObject(StreamObject):
952972
pass

tests/test_images.py

+21
Original file line numberDiff line numberDiff line change
@@ -346,3 +346,24 @@ def test_corrupted_jpeg_iss2266(pdf, pdf_name, images, images_name, filtr):
346346
print(fn) # noqa: T201
347347
img = Image.open(BytesIO(zf.read(fn)))
348348
assert image_similarity(reader.pages[p].images[i].image, img) >= 0.99
349+
350+
351+
@pytest.mark.enable_socket()
352+
def test_extract_image_from_object(caplog):
353+
url = "https://github.com/py-pdf/pypdf/files/15176076/B2.pdf"
354+
name = "iss2613.pdf"
355+
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
356+
image = reader.pages[0]["/Resources"]["/Pattern"]["/P1"]["/Resources"]["/XObject"][
357+
"/X1"
358+
].decode_as_image()
359+
assert isinstance(image, Image.Image)
360+
with pytest.raises(Exception):
361+
co = reader.pages[0].get_contents()
362+
co.decode_as_image()
363+
assert "does not seems to be an Image" in caplog.text
364+
caplog.clear()
365+
co.indirect_reference = "for_test"
366+
with pytest.raises(Exception):
367+
co = reader.pages[0].get_contents()
368+
co.decode_as_image()
369+
assert "does not seems to be an Image" in caplog.text

0 commit comments

Comments
 (0)