Skip to content

Commit 1d4e256

Browse files
Lucas-CCimon Lucas (LCM)
and
Cimon Lucas (LCM)
authored
metadata: now also displaying CreationDate, Creator, Keywords & Subject (#73)
Co-authored-by: Cimon Lucas (LCM) <[email protected]>
1 parent 1340dd9 commit 1d4e256

File tree

1 file changed

+46
-20
lines changed

1 file changed

+46
-20
lines changed

pdfly/metadata.py

+46-20
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@ class EncryptionData(BaseModel):
1919
class MetaInfo(BaseModel):
2020
encryption: Optional[EncryptionData] = None
2121
pdf_file_version: str
22-
title: Optional[str] = None
23-
producer: Optional[str] = None
24-
author: Optional[str] = None
2522
pages: Optional[int] = None
2623
page_mode: Optional[str] = None
2724
page_layout: Optional[str] = None
@@ -30,6 +27,15 @@ class MetaInfo(BaseModel):
3027
id2: Optional[bytes] = None
3128
images: List[int] = []
3229

30+
# PDF /Info dictionary
31+
author: Optional[str] = None
32+
creation_date: Optional[datetime] = None
33+
creator: Optional[str] = None
34+
keywords: Optional[str] = None
35+
producer: Optional[str] = None
36+
subject: Optional[str] = None
37+
title: Optional[str] = None
38+
3339
# OS Information
3440
file_permissions: str
3541
file_size: int # in bytes
@@ -43,12 +49,14 @@ def main(pdf: Path, output: OutputOptions) -> None:
4349
if reader.is_encrypted:
4450
pdf_stat = pdf.stat()
4551
meta = MetaInfo(
46-
encryption=EncryptionData(
47-
v_value=reader._encryption.V,
48-
revision=reader._encryption.R,
49-
)
50-
if reader.is_encrypted and reader._encryption
51-
else None,
52+
encryption=(
53+
EncryptionData(
54+
v_value=reader._encryption.V,
55+
revision=reader._encryption.R,
56+
)
57+
if reader.is_encrypted and reader._encryption
58+
else None
59+
),
5260
pdf_file_version=reader.stream.read(8).decode("utf-8"),
5361
# OS Info
5462
file_permissions=f"{stat.filemode(pdf_stat.st_mode)}",
@@ -66,12 +74,14 @@ def main(pdf: Path, output: OutputOptions) -> None:
6674
pdf_id = reader.trailer.get("/ID")
6775
meta = MetaInfo(
6876
pages=len(reader.pages),
69-
encryption=EncryptionData(
70-
v_value=reader._encryption.V, # type: ignore
71-
revision=reader._encryption.R, # type: ignore
72-
)
73-
if reader.is_encrypted and reader._encryption
74-
else None,
77+
encryption=(
78+
EncryptionData(
79+
v_value=reader._encryption.V, # type: ignore
80+
revision=reader._encryption.R, # type: ignore
81+
)
82+
if reader.is_encrypted and reader._encryption
83+
else None
84+
),
7585
page_mode=reader.page_mode,
7686
pdf_file_version=pdf_file_version,
7787
page_layout=reader.page_layout,
@@ -91,9 +101,14 @@ def main(pdf: Path, output: OutputOptions) -> None:
91101
],
92102
)
93103
if info is not None:
94-
meta.title = info.title
95-
meta.producer = info.producer
96104
meta.author = info.author
105+
meta.creation_date = info.creation_date
106+
meta.creator = info.creator
107+
# Pending https://github.com/py-pdf/pypdf/pull/2939 to be able to access .keywords:
108+
meta.keywords = info.get("/Keywords")
109+
meta.producer = info.producer
110+
meta.subject = info.subject
111+
meta.title = info.title
97112

98113
if output == OutputOptions.json:
99114
print(meta.json())
@@ -107,9 +122,20 @@ def main(pdf: Path, output: OutputOptions) -> None:
107122
)
108123
table.add_column("Value", style="white")
109124

110-
table.add_row("Title", meta.title)
111-
table.add_row("Producer", meta.producer)
112-
table.add_row("Author", meta.author)
125+
if meta.title:
126+
table.add_row("Title", meta.title)
127+
if meta.author:
128+
table.add_row("Author", meta.author)
129+
if meta.creation_date:
130+
table.add_row("CreationDate", str(meta.creation_date))
131+
if meta.creator:
132+
table.add_row("Creator", meta.creator)
133+
if meta.producer:
134+
table.add_row("Producer", meta.producer)
135+
if meta.subject:
136+
table.add_row("Subject", meta.subject)
137+
if meta.keywords:
138+
table.add_row("Keywords", meta.keywords)
113139
table.add_row("Pages", f"{meta.pages:,}" if meta.pages else "unknown")
114140
table.add_row("Encrypted", f"{meta.encryption}")
115141
table.add_row("PDF File Version", meta.pdf_file_version)

0 commit comments

Comments
 (0)