Skip to content

Commit 703df33

Browse files
Add booklet command (#77)
1 parent c05a660 commit 703df33

File tree

5 files changed

+326
-1
lines changed

5 files changed

+326
-1
lines changed

pdfly/booklet.py

+159
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
"""
2+
Reorder and two-up PDF pages for booklet printing.
3+
4+
If the number of pages is not a multiple of four, pages are
5+
added until it is a multiple of four. This includes a centerfold
6+
in the middle of the booklet and a single page on the inside
7+
back cover. The content of those pages are from the
8+
centerfold-file and blank-page-file files, if specified, otherwise
9+
they are blank pages.
10+
11+
Example:
12+
pdfly booklet input.pdf output.pdf
13+
14+
"""
15+
16+
# Copyright (c) 2014, Steve Witham <[email protected]>.
17+
# All rights reserved. This software is available under a BSD license;
18+
# see https://github.com/py-pdf/pypdf/LICENSE
19+
20+
import sys
21+
import traceback
22+
from pathlib import Path
23+
from typing import Generator, Optional, Tuple
24+
25+
from pypdf import (
26+
PageObject,
27+
PdfReader,
28+
PdfWriter,
29+
)
30+
from pypdf.generic import RectangleObject
31+
32+
33+
def main(
34+
filename: Path,
35+
output: Path,
36+
inside_cover_file: Optional[Path],
37+
centerfold_file: Optional[Path],
38+
) -> None:
39+
try:
40+
# Set up the streams
41+
reader = PdfReader(filename)
42+
pages = list(reader.pages)
43+
writer = PdfWriter()
44+
45+
# Add blank pages to make the number of pages a multiple of 4
46+
# If the user specified an inside-back-cover file, use it.
47+
blank_page = PageObject.create_blank_page(
48+
width=pages[0].mediabox.width, height=pages[0].mediabox.height
49+
)
50+
if len(pages) % 2 == 1:
51+
if inside_cover_file:
52+
ic_reader_page = fetch_first_page(inside_cover_file)
53+
pages.insert(-1, ic_reader_page)
54+
else:
55+
pages.insert(-1, blank_page)
56+
if len(pages) % 4 == 2:
57+
pages.insert(len(pages) // 2, blank_page)
58+
pages.insert(len(pages) // 2, blank_page)
59+
requires_centerfold = True
60+
else:
61+
requires_centerfold = False
62+
63+
# Reorder the pages and place two pages side by side (2-up) on each sheet
64+
for lhs, rhs in page_iter(len(pages)):
65+
pages[lhs].merge_translated_page(
66+
page2=pages[rhs],
67+
tx=pages[lhs].mediabox.width,
68+
ty=0,
69+
expand=True,
70+
over=True,
71+
)
72+
writer.add_page(pages[lhs])
73+
74+
# If a centerfold was required, it is already
75+
# present as a pair of blank pages. If the user
76+
# specified a centerfold file, use it instead.
77+
if requires_centerfold and centerfold_file:
78+
centerfold_page = fetch_first_page(centerfold_file)
79+
last_page = writer.pages[-1]
80+
if centerfold_page.rotation != 0:
81+
centerfold_page.transfer_rotation_to_content()
82+
if requires_rotate(centerfold_page.mediabox, last_page.mediabox):
83+
centerfold_page = centerfold_page.rotate(270)
84+
if centerfold_page.rotation != 0:
85+
centerfold_page.transfer_rotation_to_content()
86+
last_page.merge_page(centerfold_page)
87+
88+
# Everything looks good! Write the output file.
89+
with open(output, "wb") as output_fh:
90+
writer.write(output_fh)
91+
92+
except Exception:
93+
print(traceback.format_exc(), file=sys.stderr)
94+
print(f"Error while reading {filename}", file=sys.stderr)
95+
sys.exit(1)
96+
97+
98+
def requires_rotate(a: RectangleObject, b: RectangleObject) -> bool:
99+
"""
100+
Return True if a and b are rotated relative to each other.
101+
102+
Args:
103+
a (RectangleObject): The first rectangle.
104+
b (RectangleObject): The second rectangle.
105+
106+
"""
107+
a_portrait = a.height > a.width
108+
b_portrait = b.height > b.width
109+
return a_portrait != b_portrait
110+
111+
112+
def fetch_first_page(filename: Path) -> PageObject:
113+
"""
114+
Fetch the first page of a PDF file.
115+
116+
Args:
117+
filename (Path): The path to the PDF file.
118+
119+
Returns:
120+
PageObject: The first page of the PDF file.
121+
122+
"""
123+
return PdfReader(filename).pages[0]
124+
125+
126+
# This function written with inspiration, assistance, and code
127+
# from claude.ai & Github Copilot
128+
def page_iter(num_pages: int) -> Generator[Tuple[int, int], None, None]:
129+
"""
130+
Generate pairs of page numbers for printing a booklet.
131+
This function assumes that the total number of pages is divisible by 4.
132+
It yields tuples of page numbers that should be printed on the same sheet
133+
of paper to create a booklet.
134+
135+
Args:
136+
num_pages (int): The total number of pages in the document. Must be divisible by 4.
137+
138+
Yields:
139+
Generator[Tuple[int, int], None, None]: Tuples containing pairs of page numbers.
140+
Each tuple represents the page numbers to be printed on one side of a sheet.
141+
142+
Raises:
143+
ValueError: If the number of pages is not divisible by 4.
144+
145+
"""
146+
if num_pages % 4 != 0:
147+
raise ValueError("Number of pages must be divisible by 4")
148+
149+
for sheet in range(num_pages // 4):
150+
# Outside the fold
151+
last_page = num_pages - sheet * 2 - 1
152+
first_page = sheet * 2
153+
154+
# Inside the fold
155+
second_page = sheet * 2 + 1
156+
second_to_last_page = num_pages - sheet * 2 - 2
157+
158+
yield last_page, first_page
159+
yield second_page, second_to_last_page

pdfly/cli.py

+46-1
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@
55
"""
66

77
from pathlib import Path
8-
from typing import List
8+
from typing import List, Optional
99

1010
import typer
1111
from typing_extensions import Annotated
1212

13+
import pdfly.booklet
1314
import pdfly.cat
1415
import pdfly.compress
1516
import pdfly.extract_images
@@ -98,6 +99,50 @@ def cat(
9899
pdfly.cat.main(filename, fn_pgrgs, output, verbose)
99100

100101

102+
@entry_point.command(name="booklet", help=pdfly.booklet.__doc__) # type: ignore[misc]
103+
def booklet(
104+
filename: Annotated[
105+
Path,
106+
typer.Argument(
107+
dir_okay=False,
108+
exists=True,
109+
resolve_path=True,
110+
),
111+
],
112+
output: Annotated[
113+
Path,
114+
typer.Argument(
115+
dir_okay=False,
116+
exists=False,
117+
resolve_path=False,
118+
),
119+
],
120+
blank_page: Annotated[
121+
Optional[Path],
122+
typer.Option(
123+
"-b",
124+
"--blank-page-file",
125+
help="page added if input is odd number of pages",
126+
dir_okay=False,
127+
exists=True,
128+
resolve_path=True,
129+
),
130+
] = None,
131+
centerfold: Annotated[
132+
Optional[Path],
133+
typer.Option(
134+
"-c",
135+
"--centerfold-file",
136+
help="double-page added if input is missing >= 2 pages",
137+
dir_okay=False,
138+
exists=True,
139+
resolve_path=True,
140+
),
141+
] = None,
142+
) -> None:
143+
pdfly.booklet.main(filename, output, blank_page, centerfold)
144+
145+
101146
@entry_point.command(name="rm", help=pdfly.rm.__doc__)
102147
def rm(
103148
filename: Annotated[

resources/b.pdf

8.41 KB
Binary file not shown.

resources/c.pdf

8.44 KB
Binary file not shown.

tests/test_booklet.py

+121
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
import pytest
2+
from pypdf import PdfReader
3+
4+
from .conftest import RESOURCES_ROOT, chdir, run_cli
5+
6+
7+
def test_booklet_fewer_args(capsys, tmp_path):
8+
with chdir(tmp_path):
9+
exit_code = run_cli(["cat", str(RESOURCES_ROOT / "box.pdf")])
10+
assert exit_code == 2
11+
captured = capsys.readouterr()
12+
assert "Missing argument" in captured.err
13+
14+
15+
def test_booklet_extra_args(capsys, tmp_path):
16+
with chdir(tmp_path):
17+
exit_code = run_cli(
18+
["booklet", str(RESOURCES_ROOT / "box.pdf"), "a.pdf", "b.pdf"]
19+
)
20+
assert exit_code == 2
21+
captured = capsys.readouterr()
22+
assert "unexpected extra argument" in captured.err
23+
24+
25+
def test_booklet_page_size(capsys, tmp_path):
26+
in_fname = str(RESOURCES_ROOT / "input8.pdf")
27+
28+
with chdir(tmp_path):
29+
exit_code = run_cli(
30+
[
31+
"booklet",
32+
in_fname,
33+
"output8.pdf",
34+
]
35+
)
36+
in_reader = PdfReader(in_fname)
37+
out_reader = PdfReader("output8.pdf")
38+
39+
assert exit_code == 0
40+
41+
assert len(in_reader.pages) == 8
42+
assert len(out_reader.pages) == 4
43+
44+
in_height = in_reader.pages[0].mediabox.height
45+
in_width = in_reader.pages[0].mediabox.width
46+
out_height = out_reader.pages[0].mediabox.height
47+
out_width = out_reader.pages[0].mediabox.width
48+
49+
assert out_width == in_width * 2
50+
assert in_height == out_height
51+
52+
53+
@pytest.mark.parametrize(
54+
("page_count", "expected", "expected_bc"),
55+
[
56+
("8", "81\n27\n63\n45\n", "81\n27\n63\n45\n"),
57+
("7", "71\n2\n63\n45\n", "71\n2b\n63\n45\n"),
58+
("6", "61\n25\n43\n\n", "61\n25\n43\nc\n"),
59+
("5", "51\n2\n43\n\n", "51\n2b\n43\nc\n"),
60+
("4", "41\n23\n", "41\n23\n"),
61+
("3", "31\n2\n", "31\n2b\n"),
62+
("2", "21\n\n", "21\nc\n"),
63+
("1", "1\n\n", "1b\nc\n"),
64+
],
65+
)
66+
def test_booklet_order(capsys, tmp_path, page_count, expected, expected_bc):
67+
with chdir(tmp_path):
68+
exit_code = run_cli(
69+
[
70+
"cat",
71+
"-o",
72+
f"input{page_count}.pdf",
73+
str(RESOURCES_ROOT / "input8.pdf"),
74+
f":{page_count}",
75+
]
76+
)
77+
assert exit_code == 0
78+
79+
exit_code = run_cli(
80+
[
81+
"booklet",
82+
f"input{page_count}.pdf",
83+
f"output{page_count}.pdf",
84+
]
85+
)
86+
captured = capsys.readouterr()
87+
assert exit_code == 0, captured.err
88+
89+
exit_code = run_cli(
90+
[
91+
"extract-text",
92+
f"output{page_count}.pdf",
93+
]
94+
)
95+
captured = capsys.readouterr()
96+
assert exit_code == 0, captured.err
97+
assert captured.out == expected
98+
99+
exit_code = run_cli(
100+
[
101+
"booklet",
102+
"--centerfold-file",
103+
str(RESOURCES_ROOT / "c.pdf"),
104+
"--blank-page-file",
105+
str(RESOURCES_ROOT / "b.pdf"),
106+
f"input{page_count}.pdf",
107+
f"outputbc{page_count}.pdf",
108+
]
109+
)
110+
captured = capsys.readouterr()
111+
assert exit_code == 0, captured.err
112+
113+
exit_code = run_cli(
114+
[
115+
"extract-text",
116+
f"outputbc{page_count}.pdf",
117+
]
118+
)
119+
captured = capsys.readouterr()
120+
assert exit_code == 0, captured.err
121+
assert captured.out == expected_bc

0 commit comments

Comments
 (0)