Skip to content

Commit 575f137

Browse files
committed
🐛 with extract-annotated-pages command
1 parent 7518a6c commit 575f137

File tree

3 files changed

+82
-24
lines changed

3 files changed

+82
-24
lines changed

README.md

+25-24
Original file line numberDiff line numberDiff line change
@@ -26,23 +26,24 @@ $ pdfly --help
2626

2727
pdfly is a pure-python cli application for manipulating PDF files.
2828

29-
╭─ Options ───────────────────────────────────────────────────────────────────╮
30-
│ --version │
31-
│ --help Show this message and exit. │
32-
╰─────────────────────────────────────────────────────────────────────────────╯
33-
╭─ Commands ──────────────────────────────────────────────────────────────────╮
34-
│ 2-up Create a booklet-style PDF from a single input. │
35-
│ cat Concatenate pages from PDF files into a single PDF file. │
36-
│ compress Compress a PDF. │
37-
| uncompress Uncompresses a PDF. │
38-
│ extract-images Extract images from PDF without resampling or altering. │
39-
│ extract-text Extract text from a PDF file. │
40-
│ meta Show metadata of a PDF file │
41-
│ pagemeta Give details about a single page. │
42-
│ rm Remove pages from PDF files. │
43-
│ update-offsets Updates offsets and lengths in a simple PDF file. │
44-
│ x2pdf Convert one or more files to PDF. Each file is a page. │
45-
╰─────────────────────────────────────────────────────────────────────────────╯
29+
╭─ Options ────────────────────────────────────────────────────────────────────────────╮
30+
│ --version │
31+
│ --help Show this message and exit. │
32+
╰──────────────────────────────────────────────────────────────────────────────────────╯
33+
╭─ Commands ───────────────────────────────────────────────────────────────────────────╮
34+
│ 2-up Create a booklet-style PDF from a single input. │
35+
│ cat Concatenate pages from PDF files into a single PDF file. │
36+
│ compress Compress a PDF. │
37+
| uncompress Uncompresses a PDF. │
38+
| extract-annotated-pages Extract only the annotated pages from a PDF. |
39+
│ extract-images Extract images from PDF without resampling or altering. │
40+
│ extract-text Extract text from a PDF file. │
41+
│ meta Show metadata of a PDF file │
42+
│ pagemeta Give details about a single page. │
43+
│ rm Remove pages from PDF files. │
44+
│ update-offsets Updates offsets and lengths in a simple PDF file. │
45+
│ x2pdf Convert one or more files to PDF. Each file is a page. │
46+
╰──────────────────────────────────────────────────────────────────────────────────────╯
4647
```
4748

4849
You can see the help of every subcommand by typing:
@@ -56,13 +57,13 @@ $ pdfly 2-up --help
5657
Pairs of two pages will be put on one page (left and right)
5758
usage: python 2-up.py input_file output_file
5859

59-
╭─ Arguments ─────────────────────────────────────────────────────────────────╮
60-
│ * pdf PATH [default: None] [required] │
61-
│ * out PATH [default: None] [required] │
62-
╰─────────────────────────────────────────────────────────────────────────────╯
63-
╭─ Options ───────────────────────────────────────────────────────────────────╮
64-
│ --help Show this message and exit. │
65-
╰─────────────────────────────────────────────────────────────────────────────╯
60+
╭─ Arguments ──────────────────────────────────────────────────────────────────────────
61+
│ * pdf PATH [default: None] [required]
62+
│ * out PATH [default: None] [required]
63+
╰──────────────────────────────────────────────────────────────────────────────────────
64+
╭─ Options ────────────────────────────────────────────────────────────────────────────
65+
│ --help Show this message and exit.
66+
╰──────────────────────────────────────────────────────────────────────────────────────
6667
```
6768

6869
## Contributors ✨

pdfly/cli.py

+25
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import pdfly.booklet
1414
import pdfly.cat
1515
import pdfly.compress
16+
import pdfly.extract_annotated_pages
1617
import pdfly.extract_images
1718
import pdfly.metadata
1819
import pdfly.pagemeta
@@ -319,3 +320,27 @@ def x2pdf(
319320
exit_code = pdfly.x2pdf.main(x, output)
320321
if exit_code:
321322
raise typer.Exit(code=exit_code)
323+
324+
325+
@entry_point.command(name="extract-annotated-pages", help=pdfly.extract_annotated_pages.__doc__) # type: ignore[misc]
326+
def extract_annotated_pages(
327+
input_pdf: Annotated[
328+
Path,
329+
typer.Argument(
330+
dir_okay=False,
331+
exists=True,
332+
resolve_path=True,
333+
help="Input PDF file.",
334+
),
335+
],
336+
output_pdf: Annotated[
337+
Optional[Path],
338+
typer.Option(
339+
"--output",
340+
"-o",
341+
writable=True,
342+
help="Output PDF file. Defaults to 'input_pdf_annotated'.",
343+
),
344+
] = None,
345+
) -> None:
346+
pdfly.extract_annotated_pages.main(input_pdf, output_pdf)

pdfly/extract_annotated_pages.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
"""
2+
Extract only the annotated pages from a PDF.
3+
4+
Q: Why does this help?
5+
A: https://github.com/py-pdf/pdfly/issues/97
6+
"""
7+
8+
from pathlib import Path
9+
from pypdf import PdfReader, PdfWriter
10+
11+
12+
# Check if an annotation is manipulable.
13+
def is_manipulable(annot) -> bool:
14+
return annot.get("/Subtype") not in ["/Link"]
15+
16+
17+
# Main function.
18+
def main(input_pdf: Path, output_pdf: Path) -> None:
19+
if not output_pdf:
20+
output_pdf = input_pdf.with_stem(input_pdf.stem + "_annotated")
21+
input = PdfReader(input_pdf)
22+
output = PdfWriter()
23+
output_pages = 0
24+
# Copy only the pages with annotations
25+
for page in input.pages:
26+
if not "/Annots" in page: continue
27+
if not any(is_manipulable(annot) for annot in page["/Annots"]): continue
28+
output.add_page(page)
29+
output_pages += 1
30+
# Save the output PDF
31+
output.write(output_pdf)
32+
print(f"Extracted {output_pages} pages with annotations to {output_pdf}")

0 commit comments

Comments
 (0)