Skip to content

Commit 79f84cb

Browse files
committed
MAINT: Refactor code from _writer to _appearance_stream
This patch introduces a new module - appearance_stream - and copies two methods from _writer to this new module. Currently, these methods are needed to develop an appearance stream for a text annotation. They are: update_field_annotation (renamed from _update_field_annotation) generate_appearance_stream The update_field_annotation was a PdfWriter method, which means that the current code needs some refactoring, since it now has a circular import of PdfWriter. Other than changing self to writer in update_field_annotation, and changing the code in PdfWriter to call update_field_annotation from _appearance_stream, this patch changes nothing. In a future change, we might want to make a class TextAppearanceStream based on generate_appearance_stream, with .from_annotation(Annotation) as a class method (based on update_field_annotaion). scale_text would also be a method in this class.
1 parent e625cb6 commit 79f84cb

File tree

2 files changed

+209
-190
lines changed

2 files changed

+209
-190
lines changed

pypdf/_writer.py

Lines changed: 5 additions & 190 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
cast,
4949
)
5050

51-
from ._cmap import _default_fonts_space_width, build_char_map_from_dict
5251
from ._doc_common import DocumentInformation, PdfDocCommon
5352
from ._encryption import EncryptAlgorithm, Encryption
5453
from ._page import PageObject, Transformation
@@ -85,7 +84,6 @@
8584
BooleanObject,
8685
ByteStringObject,
8786
ContentStream,
88-
DecodedStreamObject,
8987
Destination,
9088
DictionaryObject,
9189
EmbeddedFile,
@@ -107,6 +105,7 @@
107105
hex_to_rgb,
108106
is_null_or_none,
109107
)
108+
from .generic._appearance_stream import update_field_annotation
110109
from .pagerange import PageRange, PageRangeSpec
111110
from .types import (
112111
AnnotationSubtype,
@@ -907,157 +906,6 @@ def _add_apstream_object(
907906
xobject_drawing_commands = f"q\n{xobject_cm._to_cm()}\n{xobject_name} Do\nQ".encode()
908907
self._merge_content_stream_to_page(page, xobject_drawing_commands)
909908

910-
def _update_field_annotation(
911-
self,
912-
page: PageObject,
913-
field: DictionaryObject,
914-
annotation: DictionaryObject,
915-
font_name: str = "",
916-
font_size: float = -1,
917-
) -> StreamObject:
918-
# Calculate rectangle dimensions
919-
_rct = cast(RectangleObject, annotation[AA.Rect])
920-
rct = RectangleObject((0, 0, abs(_rct[2] - _rct[0]), abs(_rct[3] - _rct[1])))
921-
922-
# Extract font information
923-
da = annotation.get_inherited(
924-
AA.DA,
925-
cast(DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]).get(
926-
AA.DA, None
927-
),
928-
)
929-
if da is None:
930-
da = TextStringObject("/Helv 0 Tf 0 g")
931-
else:
932-
da = da.get_object()
933-
font_properties = da.replace("\n", " ").replace("\r", " ").split(" ")
934-
font_properties = [x for x in font_properties if x != ""]
935-
if font_name:
936-
font_properties[font_properties.index("Tf") - 2] = font_name
937-
else:
938-
font_name = font_properties[font_properties.index("Tf") - 2]
939-
font_height = (
940-
font_size
941-
if font_size >= 0
942-
else float(font_properties[font_properties.index("Tf") - 1])
943-
)
944-
if font_height == 0:
945-
if field.get(FA.Ff, 0) & FA.FfBits.Multiline:
946-
font_height = DEFAULT_FONT_HEIGHT_IN_MULTILINE
947-
else:
948-
font_height = rct.height - 2
949-
font_properties[font_properties.index("Tf") - 1] = str(font_height)
950-
da = " ".join(font_properties)
951-
y_offset = rct.height - 1 - font_height
952-
953-
# Retrieve font information from local DR ...
954-
dr: Any = cast(
955-
DictionaryObject,
956-
cast(
957-
DictionaryObject,
958-
annotation.get_inherited(
959-
"/DR",
960-
cast(
961-
DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]
962-
).get("/DR", DictionaryObject()),
963-
),
964-
).get_object(),
965-
)
966-
dr = dr.get("/Font", DictionaryObject()).get_object()
967-
# _default_fonts_space_width keys is the list of Standard fonts
968-
if font_name not in dr and font_name not in _default_fonts_space_width:
969-
# ...or AcroForm dictionary
970-
dr = cast(
971-
dict[Any, Any],
972-
cast(
973-
DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]
974-
).get("/DR", {}),
975-
)
976-
dr = dr.get_object().get("/Font", DictionaryObject()).get_object()
977-
font_res = dr.get(font_name, None)
978-
if not is_null_or_none(font_res):
979-
font_res = cast(DictionaryObject, font_res.get_object())
980-
_font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
981-
200, font_res
982-
)
983-
try: # remove width stored in -1 key
984-
del font_map[-1]
985-
except KeyError:
986-
pass
987-
font_full_rev: dict[str, bytes]
988-
if isinstance(font_encoding, str):
989-
font_full_rev = {
990-
v: k.encode(font_encoding) for k, v in font_map.items()
991-
}
992-
else:
993-
font_full_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
994-
font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
995-
for key, value in font_map.items():
996-
font_full_rev[value] = font_encoding_rev.get(key, key)
997-
else:
998-
logger_warning(f"Font dictionary for {font_name} not found.", __name__)
999-
font_full_rev = {}
1000-
1001-
# Retrieve field text and selected values
1002-
field_flags = field.get(FA.Ff, 0)
1003-
if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
1004-
txt = "\n".join(annotation.get_inherited(FA.Opt, []))
1005-
sel = field.get("/V", [])
1006-
if not isinstance(sel, list):
1007-
sel = [sel]
1008-
else: # /Tx
1009-
txt = field.get("/V", "")
1010-
sel = []
1011-
# Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
1012-
txt = txt.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")
1013-
# Generate appearance stream
1014-
ap_stream = generate_appearance_stream(
1015-
txt, sel, da, font_full_rev, rct, font_height, y_offset
1016-
)
1017-
1018-
# Create appearance dictionary
1019-
dct = DecodedStreamObject.initialize_from_dictionary(
1020-
{
1021-
NameObject("/Type"): NameObject("/XObject"),
1022-
NameObject("/Subtype"): NameObject("/Form"),
1023-
NameObject("/BBox"): rct,
1024-
"__streamdata__": ByteStringObject(ap_stream),
1025-
"/Length": 0,
1026-
}
1027-
)
1028-
if AA.AP in annotation:
1029-
for k, v in cast(DictionaryObject, annotation[AA.AP]).get("/N", {}).items():
1030-
if k not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}:
1031-
dct[k] = v
1032-
1033-
# Update Resources with font information if necessary
1034-
if font_res is not None:
1035-
dct[NameObject("/Resources")] = DictionaryObject(
1036-
{
1037-
NameObject("/Font"): DictionaryObject(
1038-
{
1039-
NameObject(font_name): getattr(
1040-
font_res, "indirect_reference", font_res
1041-
)
1042-
}
1043-
)
1044-
}
1045-
)
1046-
if AA.AP not in annotation:
1047-
annotation[NameObject(AA.AP)] = DictionaryObject(
1048-
{NameObject("/N"): self._add_object(dct)}
1049-
)
1050-
elif "/N" not in cast(DictionaryObject, annotation[AA.AP]):
1051-
cast(DictionaryObject, annotation[NameObject(AA.AP)])[
1052-
NameObject("/N")
1053-
] = self._add_object(dct)
1054-
else: # [/AP][/N] exists
1055-
n = annotation[AA.AP]["/N"].indirect_reference.idnum # type: ignore
1056-
self._objects[n - 1] = dct
1057-
dct.indirect_reference = IndirectObject(n, 0, self)
1058-
1059-
return dct
1060-
1061909
FFBITS_NUL = FA.FfBits(0)
1062910

1063911
def update_page_form_field_values(
@@ -1170,12 +1018,12 @@ def update_page_form_field_values(
11701018
):
11711019
# textbox
11721020
if isinstance(value, tuple):
1173-
appearance_stream_obj = self._update_field_annotation(
1174-
page, parent_annotation, annotation, value[1], value[2]
1021+
appearance_stream_obj = update_field_annotation(
1022+
self, page, parent_annotation, annotation, value[1], value[2]
11751023
)
11761024
else:
1177-
appearance_stream_obj = self._update_field_annotation(
1178-
page, parent_annotation, annotation
1025+
appearance_stream_obj = update_field_annotation(
1026+
self, page, parent_annotation, annotation
11791027
)
11801028
elif (
11811029
annotation.get(FA.FT) == "/Sig"
@@ -3409,36 +3257,3 @@ def _create_outline_item(
34093257
format_flag += OutlineFontFlag.bold
34103258
outline_item.update({NameObject("/F"): NumberObject(format_flag)})
34113259
return outline_item
3412-
3413-
3414-
def generate_appearance_stream(
3415-
txt: str,
3416-
sel: list[str],
3417-
da: str,
3418-
font_full_rev: dict[str, bytes],
3419-
rct: RectangleObject,
3420-
font_height: float,
3421-
y_offset: float,
3422-
) -> bytes:
3423-
ap_stream = f"q\n/Tx BMC \nq\n1 1 {rct.width - 1} {rct.height - 1} re\nW\nBT\n{da}\n".encode()
3424-
for line_number, line in enumerate(txt.replace("\n", "\r").split("\r")):
3425-
if line in sel:
3426-
# may be improved but cannot find how to get fill working => replaced with lined box
3427-
ap_stream += (
3428-
f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n"
3429-
f"0.5 0.5 0.5 rg s\n{da}\n"
3430-
).encode()
3431-
if line_number == 0:
3432-
ap_stream += f"2 {y_offset} Td\n".encode()
3433-
else:
3434-
# Td is a relative translation
3435-
ap_stream += f"0 {- font_height * 1.4} Td\n".encode()
3436-
enc_line: list[bytes] = [
3437-
font_full_rev.get(c, c.encode("utf-16-be")) for c in line
3438-
]
3439-
if any(len(c) >= 2 for c in enc_line):
3440-
ap_stream += b"<" + (b"".join(enc_line)).hex().encode() + b"> Tj\n"
3441-
else:
3442-
ap_stream += b"(" + b"".join(enc_line) + b") Tj\n"
3443-
ap_stream += b"ET\nQ\nEMC\nQ\n"
3444-
return ap_stream

0 commit comments

Comments
 (0)