forked from py-pdf/pdfly
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuncompress.py
52 lines (42 loc) · 1.74 KB
/
uncompress.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
"""Module for uncompressing PDF content streams."""
import zlib
from pathlib import Path
from typing import Optional
from pypdf import PdfReader, PdfWriter
from pypdf.generic import IndirectObject, PdfObject
def main(pdf: Path, output: Path) -> None:
reader = PdfReader(pdf)
writer = PdfWriter()
for page in reader.pages:
if "/Contents" in page:
contents: Optional[PdfObject] = page["/Contents"]
if isinstance(contents, IndirectObject):
contents = contents.get_object()
if contents is not None:
if isinstance(contents, list):
for content in contents:
if isinstance(content, IndirectObject):
decompress_content_stream(content)
elif isinstance(contents, IndirectObject):
decompress_content_stream(contents)
writer.add_page(page)
with open(output, "wb") as fp:
writer.write(fp)
orig_size = pdf.stat().st_size
uncomp_size = output.stat().st_size
print(f"Original Size : {orig_size:,}")
print(
f"Uncompressed Size: {uncomp_size:,} ({(uncomp_size / orig_size) * 100:.1f}% of original)"
)
def decompress_content_stream(content: IndirectObject) -> None:
"""Decompress a content stream if it uses FlateDecode."""
if content.get("/Filter") == "/FlateDecode":
try:
compressed_data = content.get_data()
uncompressed_data = zlib.decompress(compressed_data)
content.set_data(uncompressed_data)
del content["/Filter"]
except zlib.error as error:
print(
f"Some content stream with /FlateDecode failed to be decompressed: {error}"
)