|
| 1 | +"""Show metadata of a PDF file""" |
| 2 | + |
| 3 | +from enum import Enum |
| 4 | +from pathlib import Path |
| 5 | +from typing import Optional, Tuple |
| 6 | + |
| 7 | +from pydantic import BaseModel |
| 8 | +from PyPDF2 import PdfFileReader |
| 9 | + |
| 10 | + |
| 11 | +class MetaInfo(BaseModel): |
| 12 | + title: Optional[str] = None |
| 13 | + producer: Optional[str] = None |
| 14 | + pages: int |
| 15 | + encrypted: bool |
| 16 | + file_size: int # in bytes |
| 17 | + page_size: Tuple[float, float] # (width, height) |
| 18 | + pdf_file_version: str |
| 19 | + |
| 20 | + |
| 21 | +class OutputOptions(Enum): |
| 22 | + json = "json" |
| 23 | + text = "text" |
| 24 | + |
| 25 | + |
| 26 | +def main(pdf: Path, output: OutputOptions) -> None: |
| 27 | + with open(pdf, "rb") as f: |
| 28 | + reader = PdfFileReader(f) |
| 29 | + info = reader.getDocumentInfo() |
| 30 | + x1, y1, x2, y2 = reader.getPage(0).mediaBox |
| 31 | + |
| 32 | + reader.stream.seek(0) |
| 33 | + pdf_file_version = reader.stream.readline().decode() |
| 34 | + |
| 35 | + meta = MetaInfo( |
| 36 | + title=info.title, |
| 37 | + producer=info.producer, |
| 38 | + pages=reader.getNumPages(), |
| 39 | + encrypted=reader.isEncrypted, |
| 40 | + file_size=pdf.stat().st_size, |
| 41 | + page_size=(x2 - x1, y2 - y1), |
| 42 | + pdf_file_version=pdf_file_version, |
| 43 | + ) |
| 44 | + |
| 45 | + if output == OutputOptions.json: |
| 46 | + print(meta.json()) |
| 47 | + else: |
| 48 | + from rich.console import Console |
| 49 | + from rich.table import Table |
| 50 | + |
| 51 | + table = Table(title=f"{pdf}") |
| 52 | + table.add_column("Attribute", justify="right", style="cyan", no_wrap=True) |
| 53 | + table.add_column("Value", style="white") |
| 54 | + |
| 55 | + table.add_row("Title", meta.title) |
| 56 | + table.add_row("Producer", meta.producer) |
| 57 | + table.add_row("Pages", f"{meta.pages:,}") |
| 58 | + table.add_row("Encrypted", f"{meta.encrypted}") |
| 59 | + table.add_row("File size", f"{meta.file_size:,} bytes") |
| 60 | + table.add_row( |
| 61 | + "Page size", f"{meta.page_size[0]} x {meta.page_size[1]} pts (w x h)" |
| 62 | + ) |
| 63 | + table.add_row("PDF File Version", meta.pdf_file_version) |
| 64 | + |
| 65 | + console = Console() |
| 66 | + console.print(table) |
0 commit comments