Skip to content

Commit c4aa15b

Browse files
committed
MAINT: Refactor code from _writer to _appearance_stream
This patch introduces a new module - appearance_stream - and copies two methods from _writer to this new module. Currently, these methods are needed to develop an appearance stream for a text annotation. They are: update_field_annotation (renamed from _update_field_annotation) generate_appearance_stream The update_field_annotation was a PdfWriter method, which means that the current code needs some refactoring, since it now has a circular import of PdfWriter. Other than changing self to writer in update_field_annotation, and changing the code in PdfWriter to call update_field_annotation from _appearance_stream, this patch changes nothing. In a future change, we might want to make a class TextAppearanceStream based on generate_appearance_stream, with .from_annotation(Annotation) as a class method (based on update_field_annotaion). scale_text would also be a method in this class.
1 parent e625cb6 commit c4aa15b

File tree

2 files changed

+209
-190
lines changed

2 files changed

+209
-190
lines changed

pypdf/_appearance_stream.py

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
from typing import Any, cast
2+
3+
from ._cmap import _default_fonts_space_width, build_char_map_from_dict
4+
from ._page import PageObject
5+
from ._utils import logger_warning
6+
from .constants import AnnotationDictionaryAttributes as AA
7+
from .constants import CatalogDictionary
8+
from .constants import FieldDictionaryAttributes as FA
9+
from .generic import (
10+
DecodedStreamObject,
11+
DictionaryObject,
12+
IndirectObject,
13+
NameObject,
14+
RectangleObject,
15+
StreamObject,
16+
)
17+
from .generic._base import ByteStringObject, TextStringObject, is_null_or_none
18+
19+
DEFAULT_FONT_HEIGHT_IN_MULTILINE = 12
20+
21+
22+
def generate_appearance_stream(
23+
txt: str,
24+
sel: list[str],
25+
da: str,
26+
font_full_rev: dict[str, bytes],
27+
rct: RectangleObject,
28+
font_height: float,
29+
y_offset: float,
30+
) -> bytes:
31+
ap_stream = f"q\n/Tx BMC \nq\n1 1 {rct.width - 1} {rct.height - 1} re\nW\nBT\n{da}\n".encode()
32+
for line_number, line in enumerate(txt.replace("\n", "\r").split("\r")):
33+
if line in sel:
34+
# may be improved but cannot find how to get fill working => replaced with lined box
35+
ap_stream += (
36+
f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n"
37+
f"0.5 0.5 0.5 rg s\n{da}\n"
38+
).encode()
39+
if line_number == 0:
40+
ap_stream += f"2 {y_offset} Td\n".encode()
41+
else:
42+
# Td is a relative translation
43+
ap_stream += f"0 {- font_height * 1.4} Td\n".encode()
44+
enc_line: list[bytes] = [
45+
font_full_rev.get(c, c.encode("utf-16-be")) for c in line
46+
]
47+
if any(len(c) >= 2 for c in enc_line):
48+
ap_stream += b"<" + (b"".join(enc_line)).hex().encode() + b"> Tj\n"
49+
else:
50+
ap_stream += b"(" + b"".join(enc_line) + b") Tj\n"
51+
ap_stream += b"ET\nQ\nEMC\nQ\n"
52+
return ap_stream
53+
54+
55+
def update_field_annotation(
56+
writer,
57+
page: PageObject,
58+
field: DictionaryObject,
59+
annotation: DictionaryObject,
60+
font_name: str = "",
61+
font_size: float = -1,
62+
) -> StreamObject:
63+
# Calculate rectangle dimensions
64+
_rct = cast(RectangleObject, annotation[AA.Rect])
65+
rct = RectangleObject((0, 0, abs(_rct[2] - _rct[0]), abs(_rct[3] - _rct[1])))
66+
67+
# Extract font information
68+
da = annotation.get_inherited(
69+
AA.DA,
70+
cast(DictionaryObject, writer.root_object[CatalogDictionary.ACRO_FORM]).get(
71+
AA.DA, None
72+
),
73+
)
74+
if da is None:
75+
da = TextStringObject("/Helv 0 Tf 0 g")
76+
else:
77+
da = da.get_object()
78+
font_properties = da.replace("\n", " ").replace("\r", " ").split(" ")
79+
font_properties = [x for x in font_properties if x != ""]
80+
if font_name:
81+
font_properties[font_properties.index("Tf") - 2] = font_name
82+
else:
83+
font_name = font_properties[font_properties.index("Tf") - 2]
84+
font_height = (
85+
font_size
86+
if font_size >= 0
87+
else float(font_properties[font_properties.index("Tf") - 1])
88+
)
89+
if font_height == 0:
90+
if field.get(FA.Ff, 0) & FA.FfBits.Multiline:
91+
font_height = DEFAULT_FONT_HEIGHT_IN_MULTILINE
92+
else:
93+
font_height = rct.height - 2
94+
font_properties[font_properties.index("Tf") - 1] = str(font_height)
95+
da = " ".join(font_properties)
96+
y_offset = rct.height - 1 - font_height
97+
98+
# Retrieve font information from local DR ...
99+
dr: Any = cast(
100+
DictionaryObject,
101+
cast(
102+
DictionaryObject,
103+
annotation.get_inherited(
104+
"/DR",
105+
cast(
106+
DictionaryObject, writer.root_object[CatalogDictionary.ACRO_FORM]
107+
).get("/DR", DictionaryObject()),
108+
),
109+
).get_object(),
110+
)
111+
dr = dr.get("/Font", DictionaryObject()).get_object()
112+
# _default_fonts_space_width keys is the list of Standard fonts
113+
if font_name not in dr and font_name not in _default_fonts_space_width:
114+
# ...or AcroForm dictionary
115+
dr = cast(
116+
dict[Any, Any],
117+
cast(
118+
DictionaryObject, writer.root_object[CatalogDictionary.ACRO_FORM]
119+
).get("/DR", {}),
120+
)
121+
dr = dr.get_object().get("/Font", DictionaryObject()).get_object()
122+
font_res = dr.get(font_name, None)
123+
if not is_null_or_none(font_res):
124+
font_res = cast(DictionaryObject, font_res.get_object())
125+
_font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
126+
200, font_res
127+
)
128+
try: # remove width stored in -1 key
129+
del font_map[-1]
130+
except KeyError:
131+
pass
132+
font_full_rev: dict[str, bytes]
133+
if isinstance(font_encoding, str):
134+
font_full_rev = {
135+
v: k.encode(font_encoding) for k, v in font_map.items()
136+
}
137+
else:
138+
font_full_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
139+
font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
140+
for key, value in font_map.items():
141+
font_full_rev[value] = font_encoding_rev.get(key, key)
142+
else:
143+
logger_warning(f"Font dictionary for {font_name} not found.", __name__)
144+
font_full_rev = {}
145+
146+
# Retrieve field text and selected values
147+
field_flags = field.get(FA.Ff, 0)
148+
if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
149+
txt = "\n".join(annotation.get_inherited(FA.Opt, []))
150+
sel = field.get("/V", [])
151+
if not isinstance(sel, list):
152+
sel = [sel]
153+
else: # /Tx
154+
txt = field.get("/V", "")
155+
sel = []
156+
# Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
157+
txt = txt.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")
158+
# Generate appearance stream
159+
ap_stream = generate_appearance_stream(
160+
txt, sel, da, font_full_rev, rct, font_height, y_offset
161+
)
162+
163+
# Create appearance dictionary
164+
dct = DecodedStreamObject.initialize_from_dictionary(
165+
{
166+
NameObject("/Type"): NameObject("/XObject"),
167+
NameObject("/Subtype"): NameObject("/Form"),
168+
NameObject("/BBox"): rct,
169+
"__streamdata__": ByteStringObject(ap_stream),
170+
"/Length": 0,
171+
}
172+
)
173+
if AA.AP in annotation:
174+
for k, v in cast(DictionaryObject, annotation[AA.AP]).get("/N", {}).items():
175+
if k not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}:
176+
dct[k] = v
177+
178+
# Update Resources with font information if necessary
179+
if font_res is not None:
180+
dct[NameObject("/Resources")] = DictionaryObject(
181+
{
182+
NameObject("/Font"): DictionaryObject(
183+
{
184+
NameObject(font_name): getattr(
185+
font_res, "indirect_reference", font_res
186+
)
187+
}
188+
)
189+
}
190+
)
191+
if AA.AP not in annotation:
192+
annotation[NameObject(AA.AP)] = DictionaryObject(
193+
{NameObject("/N"): writer._add_object(dct)}
194+
)
195+
elif "/N" not in cast(DictionaryObject, annotation[AA.AP]):
196+
cast(DictionaryObject, annotation[NameObject(AA.AP)])[
197+
NameObject("/N")
198+
] = writer._add_object(dct)
199+
else: # [/AP][/N] exists
200+
n = annotation[AA.AP]["/N"].indirect_reference.idnum # type: ignore
201+
writer._objects[n - 1] = dct
202+
dct.indirect_reference = IndirectObject(n, 0, writer)
203+
204+
return dct

0 commit comments

Comments
 (0)