@@ -19,9 +19,6 @@ class EncryptionData(BaseModel):
19
19
class MetaInfo (BaseModel ):
20
20
encryption : Optional [EncryptionData ] = None
21
21
pdf_file_version : str
22
- title : Optional [str ] = None
23
- producer : Optional [str ] = None
24
- author : Optional [str ] = None
25
22
pages : Optional [int ] = None
26
23
page_mode : Optional [str ] = None
27
24
page_layout : Optional [str ] = None
@@ -30,6 +27,15 @@ class MetaInfo(BaseModel):
30
27
id2 : Optional [bytes ] = None
31
28
images : List [int ] = []
32
29
30
+ # PDF /Info dictionary
31
+ author : Optional [str ] = None
32
+ creation_date : Optional [str ] = None
33
+ creator : Optional [str ] = None
34
+ keywords : Optional [str ] = None
35
+ producer : Optional [str ] = None
36
+ subject : Optional [str ] = None
37
+ title : Optional [str ] = None
38
+
33
39
# OS Information
34
40
file_permissions : str
35
41
file_size : int # in bytes
@@ -43,12 +49,14 @@ def main(pdf: Path, output: OutputOptions) -> None:
43
49
if reader .is_encrypted :
44
50
pdf_stat = pdf .stat ()
45
51
meta = MetaInfo (
46
- encryption = EncryptionData (
47
- v_value = reader ._encryption .V ,
48
- revision = reader ._encryption .R ,
49
- )
50
- if reader .is_encrypted and reader ._encryption
51
- else None ,
52
+ encryption = (
53
+ EncryptionData (
54
+ v_value = reader ._encryption .V ,
55
+ revision = reader ._encryption .R ,
56
+ )
57
+ if reader .is_encrypted and reader ._encryption
58
+ else None
59
+ ),
52
60
pdf_file_version = reader .stream .read (8 ).decode ("utf-8" ),
53
61
# OS Info
54
62
file_permissions = f"{ stat .filemode (pdf_stat .st_mode )} " ,
@@ -66,12 +74,14 @@ def main(pdf: Path, output: OutputOptions) -> None:
66
74
pdf_id = reader .trailer .get ("/ID" )
67
75
meta = MetaInfo (
68
76
pages = len (reader .pages ),
69
- encryption = EncryptionData (
70
- v_value = reader ._encryption .V , # type: ignore
71
- revision = reader ._encryption .R , # type: ignore
72
- )
73
- if reader .is_encrypted and reader ._encryption
74
- else None ,
77
+ encryption = (
78
+ EncryptionData (
79
+ v_value = reader ._encryption .V , # type: ignore
80
+ revision = reader ._encryption .R , # type: ignore
81
+ )
82
+ if reader .is_encrypted and reader ._encryption
83
+ else None
84
+ ),
75
85
page_mode = reader .page_mode ,
76
86
pdf_file_version = pdf_file_version ,
77
87
page_layout = reader .page_layout ,
@@ -91,9 +101,14 @@ def main(pdf: Path, output: OutputOptions) -> None:
91
101
],
92
102
)
93
103
if info is not None :
94
- meta .title = info .title
95
- meta .producer = info .producer
96
104
meta .author = info .author
105
+ meta .creation_date = info .creation_date
106
+ meta .creator = info .creator
107
+ # Pending https://github.com/py-pdf/pypdf/pull/2939 to be able to access .keywords:
108
+ meta .keywords = info .get ("/Keywords" )
109
+ meta .producer = info .producer
110
+ meta .subject = info .subject
111
+ meta .title = info .title
97
112
98
113
if output == OutputOptions .json :
99
114
print (meta .json ())
@@ -107,9 +122,20 @@ def main(pdf: Path, output: OutputOptions) -> None:
107
122
)
108
123
table .add_column ("Value" , style = "white" )
109
124
110
- table .add_row ("Title" , meta .title )
111
- table .add_row ("Producer" , meta .producer )
112
- table .add_row ("Author" , meta .author )
125
+ if meta .title :
126
+ table .add_row ("Title" , meta .title )
127
+ if meta .author :
128
+ table .add_row ("Author" , meta .author )
129
+ if meta .creation_date :
130
+ table .add_row ("CreationDate" , str (meta .creation_date ))
131
+ if meta .creator :
132
+ table .add_row ("Creator" , meta .creator )
133
+ if meta .producer :
134
+ table .add_row ("Producer" , meta .producer )
135
+ if meta .subject :
136
+ table .add_row ("Subject" , meta .subject )
137
+ if meta .keywords :
138
+ table .add_row ("Keywords" , meta .keywords )
113
139
table .add_row ("Pages" , f"{ meta .pages :,} " if meta .pages else "unknown" )
114
140
table .add_row ("Encrypted" , f"{ meta .encryption } " )
115
141
table .add_row ("PDF File Version" , meta .pdf_file_version )
0 commit comments