From 85182e462255ba9c5c11cae12514879971bb91cf Mon Sep 17 00:00:00 2001 From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com> Date: Fri, 15 Nov 2024 15:15:23 +0100 Subject: [PATCH 1/6] make maint --- .pre-commit-config.yaml | 2 +- pdfly/cli.py | 7 +- requirements/ci.txt | 50 +++++---------- requirements/dev.txt | 137 ++++++++++++++++++---------------------- requirements/docs.txt | 44 ++++++------- 5 files changed, 96 insertions(+), 144 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e27e3e6..fe3ff6b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,7 +28,7 @@ repos: - id: blacken-docs additional_dependencies: [black==22.1.0] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.2 + rev: v0.7.4 hooks: - id: ruff args: ['--fix'] diff --git a/pdfly/cli.py b/pdfly/cli.py index 9c5fa31..faabc75 100644 --- a/pdfly/cli.py +++ b/pdfly/cli.py @@ -198,7 +198,6 @@ def compress( output: Annotated[ Path, typer.Argument( - exists=False, writable=True, ), ], @@ -211,18 +210,14 @@ def uncompress( pdf: Annotated[ Path, typer.Argument( - exists=True, - file_okay=True, dir_okay=False, - writable=False, - readable=True, + exists=True, resolve_path=True, ), ], output: Annotated[ Path, typer.Argument( - exists=False, writable=True, ), ], diff --git a/requirements/ci.txt b/requirements/ci.txt index 564460b..8914f7a 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -1,79 +1,59 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile requirements/ci.in # -aiohttp==3.9.1 - # via black -aiosignal==1.3.1 - # via aiohttp astor==0.8.1 # via flake8-simplify -async-timeout==4.0.3 - # via aiohttp -attrs==23.1.0 - # via - # aiohttp - # flake8-bugbear -black==23.12.0 +attrs==24.2.0 + # via flake8-bugbear +black==24.10.0 # via -r requirements/ci.in click==8.1.7 # via black -flake8==6.1.0 +flake8==7.1.1 # via # -r requirements/ci.in # flake8-bugbear # flake8-comprehensions # flake8-isort # flake8-simplify -flake8-bugbear==23.12.2 +flake8-bugbear==24.10.31 # via -r requirements/ci.in -flake8-comprehensions==3.14.0 +flake8-comprehensions==3.16.0 # via -r requirements/ci.in flake8-isort==6.1.1 # via -r requirements/ci.in flake8-simplify==0.21.0 # via -r requirements/ci.in -frozenlist==1.4.1 - # via - # aiohttp - # aiosignal -idna==3.6 - # via yarl isort==5.13.2 # via flake8-isort mccabe==0.7.0 # via flake8 -multidict==6.0.4 - # via - # aiohttp - # yarl -mypy==1.7.1 +mypy==1.13.0 # via -r requirements/ci.in mypy-extensions==1.0.0 # via # black # mypy -packaging==23.2 +packaging==24.2 # via black pathspec==0.12.1 # via black -platformdirs==4.1.0 +platformdirs==4.3.6 # via black -pycodestyle==2.11.1 +pycodestyle==2.12.1 # via flake8 -pyflakes==3.1.0 +pyflakes==3.2.0 # via flake8 -ruff==0.1.8 +ruff==0.7.4 # via -r requirements/ci.in -tomli==2.0.1 +tomli==2.1.0 # via # black # mypy -typing-extensions==4.9.0 +typing-extensions==4.12.2 # via # black # mypy -yarl==1.9.4 - # via aiohttp diff --git a/requirements/dev.txt b/requirements/dev.txt index 31f5434..3db7618 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,140 +1,129 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile requirements/dev.in # -aiohttp==3.9.1 - # via black -aiosignal==1.3.1 - # via aiohttp -annotated-types==0.6.0 +annotated-types==0.7.0 # via pydantic -async-timeout==4.0.3 - # via aiohttp -attrs==23.1.0 - # via aiohttp -black==23.12.0 +backports-tarfile==1.2.0 + # via jaraco-context +black==24.10.0 # via -r requirements/dev.in -build==1.0.3 +build==1.2.2.post1 # via pip-tools -certifi==2023.11.17 +certifi==2024.8.30 # via requests -cffi==1.16.0 +cffi==1.17.1 # via cryptography cfgv==3.4.0 # via pre-commit -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 # via requests click==8.1.7 # via # black # pip-tools -coverage[toml]==7.3.3 +coverage[toml]==7.6.6 # via # coverage # pytest-cov -cryptography==41.0.7 +cryptography==43.0.3 # via secretstorage -distlib==0.3.8 +distlib==0.3.9 # via virtualenv -docutils==0.20.1 +docutils==0.21.2 # via readme-renderer -exceptiongroup==1.2.0 +exceptiongroup==1.2.2 # via pytest -filelock==3.13.1 +filelock==3.16.1 # via virtualenv -frozenlist==1.4.1 - # via - # aiohttp - # aiosignal -identify==2.5.33 +identify==2.6.2 # via pre-commit -idna==3.6 - # via - # requests - # yarl -importlib-metadata==7.0.0 +idna==3.10 + # via requests +importlib-metadata==8.5.0 # via - # build # keyring # twine -importlib-resources==6.1.1 - # via keyring iniconfig==2.0.0 # via pytest -jaraco-classes==3.3.0 +jaraco-classes==3.4.0 + # via keyring +jaraco-context==6.0.1 + # via keyring +jaraco-functools==4.1.0 # via keyring jeepney==0.8.0 # via # keyring # secretstorage -keyring==24.3.0 +keyring==25.5.0 # via twine markdown-it-py==3.0.0 # via rich mdurl==0.1.2 # via markdown-it-py -more-itertools==10.1.0 - # via jaraco-classes -multidict==6.0.4 +more-itertools==10.5.0 # via - # aiohttp - # yarl + # jaraco-classes + # jaraco-functools mypy-extensions==1.0.0 # via black -nh3==0.2.15 +nh3==0.2.18 # via readme-renderer -nodeenv==1.8.0 +nodeenv==1.9.1 # via pre-commit -packaging==23.2 +packaging==24.2 # via # black # build # pytest pathspec==0.12.1 # via black -pip-tools==7.3.0 +pip-tools==7.4.1 # via -r requirements/dev.in -pkginfo==1.9.6 +pkginfo==1.10.0 # via twine -platformdirs==4.1.0 +platformdirs==4.3.6 # via # black # virtualenv -pluggy==1.3.0 +pluggy==1.5.0 # via pytest -pre-commit==3.5.0 +pre-commit==4.0.1 # via -r requirements/dev.in -pycparser==2.21 +pycparser==2.22 # via cffi -pydantic==2.5.2 +pydantic==2.9.2 # via -r requirements/dev.in -pydantic-core==2.14.5 +pydantic-core==2.23.4 # via pydantic -pygments==2.17.2 +pygments==2.18.0 # via # readme-renderer # rich -pyproject-hooks==1.0.0 - # via build -pytest==7.4.3 +pyproject-hooks==1.2.0 + # via + # build + # pip-tools +pytest==8.3.3 # via # -r requirements/dev.in # pytest-cov # pytest-socket # pytest-timeout -pytest-cov==4.1.0 +pytest-cov==6.0.0 # via -r requirements/dev.in -pytest-socket==0.6.0 +pytest-socket==0.7.0 # via -r requirements/dev.in -pytest-timeout==2.2.0 +pytest-timeout==2.3.1 # via -r requirements/dev.in -pyyaml==6.0.1 +pyyaml==6.0.2 # via pre-commit -readme-renderer==42.0 +readme-renderer==44.0 # via twine -requests==2.31.0 +requests==2.32.3 # via # requests-toolbelt # twine @@ -142,45 +131,39 @@ requests-toolbelt==1.0.0 # via twine rfc3986==2.0.0 # via twine -rich==13.7.0 +rich==13.9.4 # via # -r requirements/dev.in # twine secretstorage==3.3.3 # via keyring -tomli==2.0.1 +tomli==2.1.0 # via # black # build # coverage # pip-tools - # pyproject-hooks # pytest -twine==4.0.2 +twine==5.1.1 # via -r requirements/dev.in -typing-extensions==4.9.0 +typing-extensions==4.12.2 # via - # annotated-types # black # pydantic # pydantic-core # rich -urllib3==2.1.0 +urllib3==2.2.3 # via # requests # twine -virtualenv==20.25.0 +virtualenv==20.27.1 # via pre-commit -wheel==0.42.0 +wheel==0.45.0 # via # -r requirements/dev.in # pip-tools -yarl==1.9.4 - # via aiohttp -zipp==3.17.0 - # via - # importlib-metadata - # importlib-resources +zipp==3.21.0 + # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/requirements/docs.txt b/requirements/docs.txt index 8e1fe43..b5e1d42 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -1,31 +1,29 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile requirements/docs.in # -alabaster==0.7.13 +alabaster==0.7.16 # via sphinx -attrs==23.1.0 +attrs==24.2.0 # via -r requirements/docs.in -babel==2.14.0 +babel==2.16.0 # via sphinx -certifi==2023.11.17 +certifi==2024.8.30 # via requests -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 # via requests docutils==0.17.1 # via # myst-parser # sphinx # sphinx-rtd-theme -idna==3.6 +idna==3.10 # via requests imagesize==1.4.1 # via sphinx -importlib-metadata==7.0.0 - # via sphinx -jinja2==3.1.2 +jinja2==3.1.4 # via # myst-parser # sphinx @@ -33,7 +31,7 @@ markdown-it-py==2.2.0 # via # mdit-py-plugins # myst-parser -markupsafe==2.1.3 +markupsafe==3.0.2 # via jinja2 mdit-py-plugins==0.3.5 # via myst-parser @@ -41,15 +39,13 @@ mdurl==0.1.2 # via markdown-it-py myst-parser==0.16.1 # via -r requirements/docs.in -packaging==23.2 +packaging==24.2 # via sphinx -pygments==2.17.2 +pygments==2.18.0 # via sphinx -pytz==2023.3.post1 - # via babel -pyyaml==6.0.1 +pyyaml==6.0.2 # via myst-parser -requests==2.31.0 +requests==2.32.3 # via sphinx snowballstemmer==2.2.0 # via sphinx @@ -61,21 +57,19 @@ sphinx==4.5.0 # sphinxcontrib-jquery sphinx-rtd-theme==1.3.0 # via -r requirements/docs.in -sphinxcontrib-applehelp==1.0.4 +sphinxcontrib-applehelp==2.0.0 # via sphinx -sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-devhelp==2.0.0 # via sphinx -sphinxcontrib-htmlhelp==2.0.1 +sphinxcontrib-htmlhelp==2.1.0 # via sphinx sphinxcontrib-jquery==4.1 # via sphinx-rtd-theme sphinxcontrib-jsmath==1.0.1 # via sphinx -sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-qthelp==2.0.0 # via sphinx -sphinxcontrib-serializinghtml==1.1.5 +sphinxcontrib-serializinghtml==2.0.0 # via sphinx -urllib3==2.1.0 +urllib3==2.2.3 # via requests -zipp==3.17.0 - # via importlib-metadata From e42efd5e07e693ab12522877204d2840431007ce Mon Sep 17 00:00:00 2001 From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com> Date: Fri, 15 Nov 2024 15:32:56 +0100 Subject: [PATCH 2/6] pip-compile for Python 3.8 --- docs/conf.py | 1 + pdfly/__init__.py | 1 + requirements/ci.txt | 6 +++--- requirements/dev.txt | 30 +++++++++++++++++------------- requirements/docs.txt | 22 ++++++++++++++-------- setup.py | 1 + 6 files changed, 37 insertions(+), 24 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 07e324c..eae1ba7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -5,6 +5,7 @@ For a full list see the documentation: https://www.sphinx-doc.org/en/master/usage/configuration.html """ + # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, diff --git a/pdfly/__init__.py b/pdfly/__init__.py index 7364aa3..1efc402 100644 --- a/pdfly/__init__.py +++ b/pdfly/__init__.py @@ -1,4 +1,5 @@ """pdfly is a command line utility for manipulating PDFs and getting information about them.""" + from ._version import __version__ __all__ = [ diff --git a/requirements/ci.txt b/requirements/ci.txt index 8914f7a..eac538c 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.8 # by the following command: # # pip-compile requirements/ci.in @@ -8,7 +8,7 @@ astor==0.8.1 # via flake8-simplify attrs==24.2.0 # via flake8-bugbear -black==24.10.0 +black==24.8.0 # via -r requirements/ci.in click==8.1.7 # via black @@ -21,7 +21,7 @@ flake8==7.1.1 # flake8-simplify flake8-bugbear==24.10.31 # via -r requirements/ci.in -flake8-comprehensions==3.16.0 +flake8-comprehensions==3.15.0 # via -r requirements/ci.in flake8-isort==6.1.1 # via -r requirements/ci.in diff --git a/requirements/dev.txt b/requirements/dev.txt index 3db7618..fa76740 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.8 # by the following command: # # pip-compile requirements/dev.in @@ -8,7 +8,7 @@ annotated-types==0.7.0 # via pydantic backports-tarfile==1.2.0 # via jaraco-context -black==24.10.0 +black==24.8.0 # via -r requirements/dev.in build==1.2.2.post1 # via pip-tools @@ -24,28 +24,29 @@ click==8.1.7 # via # black # pip-tools -coverage[toml]==7.6.6 - # via - # coverage - # pytest-cov +coverage[toml]==7.6.1 + # via pytest-cov cryptography==43.0.3 # via secretstorage distlib==0.3.9 # via virtualenv -docutils==0.21.2 +docutils==0.20.1 # via readme-renderer exceptiongroup==1.2.2 # via pytest filelock==3.16.1 # via virtualenv -identify==2.6.2 +identify==2.6.1 # via pre-commit idna==3.10 # via requests importlib-metadata==8.5.0 # via + # build # keyring # twine +importlib-resources==6.4.5 + # via keyring iniconfig==2.0.0 # via pytest jaraco-classes==3.4.0 @@ -91,7 +92,7 @@ platformdirs==4.3.6 # virtualenv pluggy==1.5.0 # via pytest -pre-commit==4.0.1 +pre-commit==3.5.0 # via -r requirements/dev.in pycparser==2.22 # via cffi @@ -113,7 +114,7 @@ pytest==8.3.3 # pytest-cov # pytest-socket # pytest-timeout -pytest-cov==6.0.0 +pytest-cov==5.0.0 # via -r requirements/dev.in pytest-socket==0.7.0 # via -r requirements/dev.in @@ -121,7 +122,7 @@ pytest-timeout==2.3.1 # via -r requirements/dev.in pyyaml==6.0.2 # via pre-commit -readme-renderer==44.0 +readme-renderer==43.0 # via twine requests==2.32.3 # via @@ -148,6 +149,7 @@ twine==5.1.1 # via -r requirements/dev.in typing-extensions==4.12.2 # via + # annotated-types # black # pydantic # pydantic-core @@ -162,8 +164,10 @@ wheel==0.45.0 # via # -r requirements/dev.in # pip-tools -zipp==3.21.0 - # via importlib-metadata +zipp==3.20.2 + # via + # importlib-metadata + # importlib-resources # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/requirements/docs.txt b/requirements/docs.txt index b5e1d42..a67dc54 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -1,10 +1,10 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.8 # by the following command: # # pip-compile requirements/docs.in # -alabaster==0.7.16 +alabaster==0.7.13 # via sphinx attrs==24.2.0 # via -r requirements/docs.in @@ -23,6 +23,8 @@ idna==3.10 # via requests imagesize==1.4.1 # via sphinx +importlib-metadata==8.5.0 + # via sphinx jinja2==3.1.4 # via # myst-parser @@ -31,7 +33,7 @@ markdown-it-py==2.2.0 # via # mdit-py-plugins # myst-parser -markupsafe==3.0.2 +markupsafe==2.1.5 # via jinja2 mdit-py-plugins==0.3.5 # via myst-parser @@ -43,6 +45,8 @@ packaging==24.2 # via sphinx pygments==2.18.0 # via sphinx +pytz==2024.2 + # via babel pyyaml==6.0.2 # via myst-parser requests==2.32.3 @@ -57,19 +61,21 @@ sphinx==4.5.0 # sphinxcontrib-jquery sphinx-rtd-theme==1.3.0 # via -r requirements/docs.in -sphinxcontrib-applehelp==2.0.0 +sphinxcontrib-applehelp==1.0.4 # via sphinx -sphinxcontrib-devhelp==2.0.0 +sphinxcontrib-devhelp==1.0.2 # via sphinx -sphinxcontrib-htmlhelp==2.1.0 +sphinxcontrib-htmlhelp==2.0.1 # via sphinx sphinxcontrib-jquery==4.1 # via sphinx-rtd-theme sphinxcontrib-jsmath==1.0.1 # via sphinx -sphinxcontrib-qthelp==2.0.0 +sphinxcontrib-qthelp==1.0.3 # via sphinx -sphinxcontrib-serializinghtml==2.0.0 +sphinxcontrib-serializinghtml==1.1.5 # via sphinx urllib3==2.2.3 # via requests +zipp==3.20.2 + # via importlib-metadata diff --git a/setup.py b/setup.py index 62a65ee..96353a3 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ """Package pdfly with setuptools.""" + import re from setuptools import find_packages, setup From 342b8831c554dba926230421b5dadcb358544a94 Mon Sep 17 00:00:00 2001 From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com> Date: Fri, 15 Nov 2024 15:39:19 +0100 Subject: [PATCH 3/6] Pleasing ruff --- pdfly/cat.py | 35 ++++++++------------- pdfly/cli.py | 20 ++++-------- pdfly/metadata.py | 46 +++++++--------------------- pdfly/pagemeta.py | 8 ++--- pdfly/rm.py | 4 +-- pdfly/uncompress.py | 8 ++--- pdfly/update_offsets.py | 67 +++++++++++------------------------------ pdfly/x2pdf.py | 4 +-- 8 files changed, 52 insertions(+), 140 deletions(-) diff --git a/pdfly/cat.py b/pdfly/cat.py index eacb2c0..9811172 100644 --- a/pdfly/cat.py +++ b/pdfly/cat.py @@ -42,7 +42,6 @@ # All rights reserved. This software is available under a BSD license; # see https://github.com/py-pdf/pypdf/LICENSE - import os import sys import traceback @@ -59,9 +58,7 @@ def main( verbose: bool, inverted_page_selection: bool = False, ) -> None: - filename_page_ranges = parse_filepaths_and_pagerange_args( - filename, fn_pgrgs - ) + filename_page_ranges = parse_filepaths_and_pagerange_args(filename, fn_pgrgs) if output: output_fh = open(output, "wb") else: @@ -71,22 +68,16 @@ def main( writer = PdfWriter() in_fs = {} try: - for filename, page_range in filename_page_ranges: # type: ignore + for filepath, page_range in filename_page_ranges: # type: ignore if verbose: - print(filename, page_range, file=sys.stderr) - if filename not in in_fs: - in_fs[filename] = open(filename, "rb") + print(filepath, page_range, file=sys.stderr) + if filepath not in in_fs: + in_fs[filepath] = open(filepath, "rb") - reader = PdfReader(in_fs[filename]) + reader = PdfReader(in_fs[filepath]) num_pages = len(reader.pages) start, end, step = page_range.indices(num_pages) - if ( - start < 0 - or end < 0 - or start >= num_pages - or end > num_pages - or start > end - ): + if start < 0 or end < 0 or start >= num_pages or end > num_pages or start > end: print( f"WARNING: Page range {page_range} is out of bounds", file=sys.stderr, @@ -111,17 +102,15 @@ def main( # Not closing the in_fs because this script exits now. -def parse_filepaths_and_pagerange_args( - filename: Path, fn_pgrgs: List[str] -) -> List[Tuple[Path, PageRange]]: +def parse_filepaths_and_pagerange_args(filename: Path, fn_pgrgs: List[str]) -> List[Tuple[Path, PageRange]]: fn_pgrgs_l = list(fn_pgrgs) fn_pgrgs_l.insert(0, str(filename)) filename_page_ranges, invalid_filepaths = [], [] - for filename, page_range in parse_filename_page_ranges(fn_pgrgs_l): # type: ignore - if Path(filename).is_file(): - filename_page_ranges.append((filename, page_range)) + for filepath, page_range in parse_filename_page_ranges(fn_pgrgs_l): # type: ignore + if Path(filepath).is_file(): + filename_page_ranges.append((filepath, page_range)) else: - invalid_filepaths.append(str(filename)) + invalid_filepaths.append(str(filepath)) if invalid_filepaths: print( f"Invalid file path or page range provided: {' '.join(invalid_filepaths)}", diff --git a/pdfly/cli.py b/pdfly/cli.py index faabc75..0e3d1d4 100644 --- a/pdfly/cli.py +++ b/pdfly/cli.py @@ -33,9 +33,7 @@ def version_callback(value: bool) -> None: entry_point = typer.Typer( add_completion=False, - help=( - "pdfly is a pure-python cli application for manipulating PDF files." - ), + help=("pdfly is a pure-python cli application for manipulating PDF files."), rich_markup_mode="rich", # Allows to pretty-print commands documentation ) @@ -57,7 +55,7 @@ def extract_images( exists=True, resolve_path=True, ), - ] + ], ) -> None: pdfly.extract_images.main(pdf) @@ -91,9 +89,7 @@ def cat( fn_pgrgs: List[str] = typer.Argument( # noqa ..., help="filenames and/or page ranges" ), - verbose: bool = typer.Option( - False, help="show page ranges as they are being read" - ), + verbose: bool = typer.Option(False, help="show page ranges as they are being read"), ) -> None: pdfly.cat.main(filename, fn_pgrgs, output, verbose) @@ -112,9 +108,7 @@ def rm( fn_pgrgs: List[str] = typer.Argument( # noqa ..., help="filenames and/or page ranges" ), - verbose: bool = typer.Option( - False, help="show page ranges as they are being read" - ), + verbose: bool = typer.Option(False, help="show page ranges as they are being read"), ) -> None: pdfly.rm.main(filename, fn_pgrgs, output, verbose) @@ -175,7 +169,7 @@ def extract_text( exists=True, resolve_path=True, ), - ] + ], ) -> None: """Extract text from a PDF file.""" from pypdf import PdfReader @@ -240,9 +234,7 @@ def update_offsets( "ISO-8859-1", help="Encoding used to read and write the files, e.g. UTF-8.", ), - verbose: bool = typer.Option( - False, help="Show progress while processing." - ), + verbose: bool = typer.Option(False, help="Show progress while processing."), ) -> None: pdfly.update_offsets.main(file_in, file_out, encoding, verbose) diff --git a/pdfly/metadata.py b/pdfly/metadata.py index 19ebf21..4c4956f 100644 --- a/pdfly/metadata.py +++ b/pdfly/metadata.py @@ -94,11 +94,7 @@ def main(pdf: Path, output: OutputOptions) -> None: creation_time=datetime.fromtimestamp(pdf_stat.st_ctime), modification_time=datetime.fromtimestamp(pdf_stat.st_mtime), access_time=datetime.fromtimestamp(pdf_stat.st_atime), - images=[ - len(image.data) - for page in reader.pages - for image in page.images - ], + images=[len(image.data) for page in reader.pages for image in page.images], ) if info is not None: meta.author = info.author @@ -117,9 +113,7 @@ def main(pdf: Path, output: OutputOptions) -> None: from rich.table import Table table = Table(title="PDF Data") - table.add_column( - "Attribute", justify="right", style="cyan", no_wrap=True - ) + table.add_column("Attribute", justify="right", style="cyan", no_wrap=True) table.add_column("Value", style="white") if meta.title: @@ -149,21 +143,13 @@ def main(pdf: Path, output: OutputOptions) -> None: emb, unemb = page._get_fonts() embedded_fonts = embedded_fonts.union(set(emb)) unemedded_fonts = unemedded_fonts.union(set(unemb)) - table.add_row( - "Fonts (unembedded)", ", ".join(sorted(unemedded_fonts)) - ) - table.add_row( - "Fonts (embedded)", ", ".join(sorted(embedded_fonts)) - ) + table.add_row("Fonts (unembedded)", ", ".join(sorted(unemedded_fonts))) + table.add_row("Fonts (embedded)", ", ".join(sorted(embedded_fonts))) table.add_row("Attachments", meta.attachments) - table.add_row( - "Images", f"{len(meta.images)} images ({sum(meta.images):,} bytes)" - ) + table.add_row("Images", f"{len(meta.images)} images ({sum(meta.images):,} bytes)") enc_table = Table(title="Encryption information") - enc_table.add_column( - "Attribute", justify="right", style="cyan", no_wrap=True - ) + enc_table.add_column("Attribute", justify="right", style="cyan", no_wrap=True) enc_table.add_column("Value", style="white") if meta.encryption: enc_table.add_row( @@ -173,28 +159,18 @@ def main(pdf: Path, output: OutputOptions) -> None: enc_table.add_row("V value", str(meta.encryption.v_value)) os_table = Table(title="Operating System Data") - os_table.add_column( - "Attribute", justify="right", style="cyan", no_wrap=True - ) + os_table.add_column("Attribute", justify="right", style="cyan", no_wrap=True) os_table.add_column("Value", style="white") os_table.add_row("File Name", f"{pdf}") os_table.add_row("File Permissions", f"{meta.file_permissions}") os_table.add_row("File Size", f"{meta.file_size:,} bytes") - os_table.add_row( - "Creation Time", f"{meta.creation_time:%Y-%m-%d %H:%M:%S}" - ) - os_table.add_row( - "Modification Time", f"{meta.modification_time:%Y-%m-%d %H:%M:%S}" - ) - os_table.add_row( - "Access Time", f"{meta.access_time:%Y-%m-%d %H:%M:%S}" - ) + os_table.add_row("Creation Time", f"{meta.creation_time:%Y-%m-%d %H:%M:%S}") + os_table.add_row("Modification Time", f"{meta.modification_time:%Y-%m-%d %H:%M:%S}") + os_table.add_row("Access Time", f"{meta.access_time:%Y-%m-%d %H:%M:%S}") console = Console() console.print(os_table) console.print(table) if meta.encryption: console.print(enc_table) - console.print( - "Use the 'pagemeta' subcommand to get details about a single page" - ) + console.print("Use the 'pagemeta' subcommand to get details about a single page") diff --git a/pdfly/pagemeta.py b/pdfly/pagemeta.py index 762fce6..e95f825 100644 --- a/pdfly/pagemeta.py +++ b/pdfly/pagemeta.py @@ -38,9 +38,7 @@ def main(pdf: Path, page_index: int, output: OutputOptions) -> None: console = Console() table = Table(title=f"{pdf}, page index {page_index}") - table.add_column( - "Attribute", justify="right", style="cyan", no_wrap=True - ) + table.add_column("Attribute", justify="right", style="cyan", no_wrap=True) table.add_column("Value", style="white") table.add_row( @@ -57,9 +55,7 @@ def main(pdf: Path, page_index: int, output: OutputOptions) -> None: ) table.add_row( "artbox", - f"{meta.artbox}: " - f"with={meta.artbox[2] - meta.artbox[0]} " - f"x height={meta.artbox[3] - meta.artbox[1]}", + f"{meta.artbox}: with={meta.artbox[2] - meta.artbox[0]} x height={meta.artbox[3] - meta.artbox[1]}", ) table.add_row( "bleedbox", diff --git a/pdfly/rm.py b/pdfly/rm.py index fc859e6..d622236 100644 --- a/pdfly/rm.py +++ b/pdfly/rm.py @@ -44,7 +44,5 @@ from pdfly.cat import main as cat_main -def main( - filename: Path, fn_pgrgs: List[str], output: Path, verbose: bool -) -> None: +def main(filename: Path, fn_pgrgs: List[str], output: Path, verbose: bool) -> None: cat_main(filename, fn_pgrgs, output, verbose, inverted_page_selection=True) diff --git a/pdfly/uncompress.py b/pdfly/uncompress.py index a543473..72ec484 100644 --- a/pdfly/uncompress.py +++ b/pdfly/uncompress.py @@ -33,9 +33,7 @@ def main(pdf: Path, output: Path) -> None: uncomp_size = output.stat().st_size print(f"Original Size : {orig_size:,}") - print( - f"Uncompressed Size: {uncomp_size:,} ({(uncomp_size / orig_size) * 100:.1f}% of original)" - ) + print(f"Uncompressed Size: {uncomp_size:,} ({(uncomp_size / orig_size) * 100:.1f}% of original)") def decompress_content_stream(content: IndirectObject) -> None: @@ -47,6 +45,4 @@ def decompress_content_stream(content: IndirectObject) -> None: content.set_data(uncompressed_data) del content["/Filter"] except zlib.error as error: - print( - f"Some content stream with /FlateDecode failed to be decompressed: {error}" - ) + print(f"Some content stream with /FlateDecode failed to be decompressed: {error}") diff --git a/pdfly/update_offsets.py b/pdfly/update_offsets.py index ac4bb07..15e21ee 100644 --- a/pdfly/update_offsets.py +++ b/pdfly/update_offsets.py @@ -40,14 +40,10 @@ RE_OBJ = re.compile(r"^([0-9]+) ([0-9]+) obj *") RE_CONTENT = re.compile(r"^([^\r\n]*)", re.DOTALL) RE_LENGTH_REF = re.compile(r"^(.*/Length )([0-9]+) ([0-9]+) R(.*)", re.DOTALL) -RE_LENGTH = re.compile( - r"^(.*/Length )([0-9]+)([ />\x00\t\f\r\n].*)", re.DOTALL -) +RE_LENGTH = re.compile(r"^(.*/Length )([0-9]+)([ />\x00\t\f\r\n].*)", re.DOTALL) -def update_lines( - lines_in: List[str], encoding: str, console: Console, verbose: bool -) -> List[str]: +def update_lines(lines_in: List[str], encoding: str, console: Console, verbose: bool) -> List[str]: """ Iterates over the lines of a pdf-files and updates offsets. @@ -73,18 +69,14 @@ def update_lines( offset_xref = None # offset of xref-section map_stream_len = {} # map from object-number to /Length of stream map_obj_length_line = {} # map from object-number to /Length-line - map_obj_length_ref = ( - {} - ) # map from object-number to /Length-reference (e.g. "3") + map_obj_length_ref = {} # map from object-number to /Length-reference (e.g. "3") map_obj_length_line_no = {} # map from object-number to line_no of length # of /Length-line for idx, line in enumerate(lines_in): line_no = idx + 1 m_content = RE_CONTENT.match(line) if m_content is None: - raise RuntimeError( - f"Invalid PDF file: line {line_no} without line-break." - ) + raise RuntimeError(f"Invalid PDF file: line {line_no} without line-break.") content = m_content.group(1) map_line_offset[line_no] = offset_out m_obj = RE_OBJ.match(line) @@ -115,24 +107,14 @@ def update_lines( if verbose: console.print(f"line {line_no}: end stream") if curr_obj is None: - raise RuntimeError( - f"Invalid PDF file: line {line_no}: endstream without object-start." - ) + raise RuntimeError(f"Invalid PDF file: line {line_no}: endstream without object-start.") if len_stream is None: - raise RuntimeError( - f"Invalid PDF file: line {line_no}: endstream without stream." - ) + raise RuntimeError(f"Invalid PDF file: line {line_no}: endstream without stream.") if len_stream > 0: # Ignore the last EOL - len_stream = ( - len_stream - 2 - if lines_in[idx - 1][-2:] == "\r\n" - else len_stream - 1 - ) + len_stream = len_stream - 2 if lines_in[idx - 1][-2:] == "\r\n" else len_stream - 1 if verbose: - console.print( - f"line {line_no}: Computed /Length {len_stream} of obj {curr_obj}" - ) + console.print(f"line {line_no}: Computed /Length {len_stream} of obj {curr_obj}") map_stream_len[curr_obj] = len_stream elif content == "endobj": curr_obj = None @@ -142,9 +124,7 @@ def update_lines( len_obj = m_length_ref.group(2) len_obj_gen = m_length_ref.group(3) if verbose: - console.print( - f"line {line_no}, /Length-reference {len_obj} {len_obj_gen} R: {content}" - ) + console.print(f"line {line_no}, /Length-reference {len_obj} {len_obj_gen} R: {content}") map_obj_length_ref[curr_obj] = len_obj else: m_length = RE_LENGTH.match(line) @@ -175,26 +155,18 @@ def update_lines( # Some checks if len(map_obj_offset) == 0: - raise RuntimeError( - "Invalid PDF file: the command didn't find any PDF objects." - ) + raise RuntimeError("Invalid PDF file: the command didn't find any PDF objects.") if offset_xref is None: - raise RuntimeError( - "Invalid PDF file: the command didn't find a xref-section" - ) + raise RuntimeError("Invalid PDF file: the command didn't find a xref-section") if line_startxref is None: - raise RuntimeError( - "Invalid PDF file: the command didn't find a startxref-section" - ) + raise RuntimeError("Invalid PDF file: the command didn't find a startxref-section") for curr_obj, stream_len in map_stream_len.items(): if curr_obj in map_obj_length_line: line = map_obj_length_line[curr_obj] m_length = RE_LENGTH.match(line) if m_length is None: - raise RuntimeError( - f"Invalid PDF file: line '{line}' does not contain a valid /Length." - ) + raise RuntimeError(f"Invalid PDF file: line '{line}' does not contain a valid /Length.") prev_length = m_length.group(2) len_digits = len(prev_length) len_format = "%%0%dd" % len_digits @@ -210,9 +182,7 @@ def update_lines( elif curr_obj in map_obj_length_ref: len_obj = map_obj_length_ref[curr_obj] if len_obj not in map_obj_line: - raise RuntimeError( - f"obj {curr_obj} has unknown length-obj {len_obj}" - ) + raise RuntimeError(f"obj {curr_obj} has unknown length-obj {len_obj}") len_obj_line = map_obj_line[len_obj] prev_length = lines_out[len_obj_line][:-1] len_digits = len(prev_length) @@ -226,14 +196,11 @@ def update_lines( ) if prev_length != updated_length: if verbose: - console.print( - f"line {line_no}, ref-len {len_obj} of {curr_obj}: {prev_length} -> {updated_length}" - ) + console.print(f"line {line_no}, ref-len {len_obj} of {curr_obj}: {prev_length} -> {updated_length}") lines_out[len_obj_line] = updated_length + "\n" else: raise RuntimeError( - f"obj {curr_obj} with stream-len {stream_len}" - f" has no object-length-line: {map_obj_length_line}" + f"obj {curr_obj} with stream-len {stream_len} has no object-length-line: {map_obj_length_line}" ) return lines_out @@ -260,7 +227,7 @@ def read_binary_file(file_path: Path, encoding: str) -> List[str]: # Split buffer into chunks based on LF, CR, or CRLF while True: - match = re.search(b"(\x0D\x0A|\x0A|\x0D)", buffer) + match = re.search(b"(\x0d\x0a|\x0a|\x0d)", buffer) if not match: break # No more line breaks found, process the remaining buffer diff --git a/pdfly/x2pdf.py b/pdfly/x2pdf.py index 15cf0e6..cfdf9c8 100644 --- a/pdfly/x2pdf.py +++ b/pdfly/x2pdf.py @@ -42,9 +42,7 @@ def main(in_filepaths: List[Path], out_filepath: Path) -> int: new_page = PdfReader(pdf_bytes).pages[0] writer.insert_page(new_page) except Exception: - console.print( - f"[red]Error: Could not convert '{filepath}' to a PDF." - ) + console.print(f"[red]Error: Could not convert '{filepath}' to a PDF.") console.print_exception(extra_lines=1, max_frames=1) exit_code += 1 writer.write(out_filepath) From 74f13fe81c927cf3a6b936209bc9a11a581b8363 Mon Sep 17 00:00:00 2001 From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com> Date: Fri, 15 Nov 2024 15:40:51 +0100 Subject: [PATCH 4/6] Pleasing black --- pdfly/cat.py | 16 +++++++++-- pdfly/cli.py | 16 ++++++++--- pdfly/metadata.py | 46 ++++++++++++++++++++++-------- pdfly/pagemeta.py | 4 ++- pdfly/rm.py | 4 ++- pdfly/uncompress.py | 8 ++++-- pdfly/update_offsets.py | 62 +++++++++++++++++++++++++++++++---------- pdfly/x2pdf.py | 4 ++- 8 files changed, 122 insertions(+), 38 deletions(-) diff --git a/pdfly/cat.py b/pdfly/cat.py index 9811172..9f0870c 100644 --- a/pdfly/cat.py +++ b/pdfly/cat.py @@ -58,7 +58,9 @@ def main( verbose: bool, inverted_page_selection: bool = False, ) -> None: - filename_page_ranges = parse_filepaths_and_pagerange_args(filename, fn_pgrgs) + filename_page_ranges = parse_filepaths_and_pagerange_args( + filename, fn_pgrgs + ) if output: output_fh = open(output, "wb") else: @@ -77,7 +79,13 @@ def main( reader = PdfReader(in_fs[filepath]) num_pages = len(reader.pages) start, end, step = page_range.indices(num_pages) - if start < 0 or end < 0 or start >= num_pages or end > num_pages or start > end: + if ( + start < 0 + or end < 0 + or start >= num_pages + or end > num_pages + or start > end + ): print( f"WARNING: Page range {page_range} is out of bounds", file=sys.stderr, @@ -102,7 +110,9 @@ def main( # Not closing the in_fs because this script exits now. -def parse_filepaths_and_pagerange_args(filename: Path, fn_pgrgs: List[str]) -> List[Tuple[Path, PageRange]]: +def parse_filepaths_and_pagerange_args( + filename: Path, fn_pgrgs: List[str] +) -> List[Tuple[Path, PageRange]]: fn_pgrgs_l = list(fn_pgrgs) fn_pgrgs_l.insert(0, str(filename)) filename_page_ranges, invalid_filepaths = [], [] diff --git a/pdfly/cli.py b/pdfly/cli.py index 0e3d1d4..44b5ecd 100644 --- a/pdfly/cli.py +++ b/pdfly/cli.py @@ -33,7 +33,9 @@ def version_callback(value: bool) -> None: entry_point = typer.Typer( add_completion=False, - help=("pdfly is a pure-python cli application for manipulating PDF files."), + help=( + "pdfly is a pure-python cli application for manipulating PDF files." + ), rich_markup_mode="rich", # Allows to pretty-print commands documentation ) @@ -89,7 +91,9 @@ def cat( fn_pgrgs: List[str] = typer.Argument( # noqa ..., help="filenames and/or page ranges" ), - verbose: bool = typer.Option(False, help="show page ranges as they are being read"), + verbose: bool = typer.Option( + False, help="show page ranges as they are being read" + ), ) -> None: pdfly.cat.main(filename, fn_pgrgs, output, verbose) @@ -108,7 +112,9 @@ def rm( fn_pgrgs: List[str] = typer.Argument( # noqa ..., help="filenames and/or page ranges" ), - verbose: bool = typer.Option(False, help="show page ranges as they are being read"), + verbose: bool = typer.Option( + False, help="show page ranges as they are being read" + ), ) -> None: pdfly.rm.main(filename, fn_pgrgs, output, verbose) @@ -234,7 +240,9 @@ def update_offsets( "ISO-8859-1", help="Encoding used to read and write the files, e.g. UTF-8.", ), - verbose: bool = typer.Option(False, help="Show progress while processing."), + verbose: bool = typer.Option( + False, help="Show progress while processing." + ), ) -> None: pdfly.update_offsets.main(file_in, file_out, encoding, verbose) diff --git a/pdfly/metadata.py b/pdfly/metadata.py index 4c4956f..19ebf21 100644 --- a/pdfly/metadata.py +++ b/pdfly/metadata.py @@ -94,7 +94,11 @@ def main(pdf: Path, output: OutputOptions) -> None: creation_time=datetime.fromtimestamp(pdf_stat.st_ctime), modification_time=datetime.fromtimestamp(pdf_stat.st_mtime), access_time=datetime.fromtimestamp(pdf_stat.st_atime), - images=[len(image.data) for page in reader.pages for image in page.images], + images=[ + len(image.data) + for page in reader.pages + for image in page.images + ], ) if info is not None: meta.author = info.author @@ -113,7 +117,9 @@ def main(pdf: Path, output: OutputOptions) -> None: from rich.table import Table table = Table(title="PDF Data") - table.add_column("Attribute", justify="right", style="cyan", no_wrap=True) + table.add_column( + "Attribute", justify="right", style="cyan", no_wrap=True + ) table.add_column("Value", style="white") if meta.title: @@ -143,13 +149,21 @@ def main(pdf: Path, output: OutputOptions) -> None: emb, unemb = page._get_fonts() embedded_fonts = embedded_fonts.union(set(emb)) unemedded_fonts = unemedded_fonts.union(set(unemb)) - table.add_row("Fonts (unembedded)", ", ".join(sorted(unemedded_fonts))) - table.add_row("Fonts (embedded)", ", ".join(sorted(embedded_fonts))) + table.add_row( + "Fonts (unembedded)", ", ".join(sorted(unemedded_fonts)) + ) + table.add_row( + "Fonts (embedded)", ", ".join(sorted(embedded_fonts)) + ) table.add_row("Attachments", meta.attachments) - table.add_row("Images", f"{len(meta.images)} images ({sum(meta.images):,} bytes)") + table.add_row( + "Images", f"{len(meta.images)} images ({sum(meta.images):,} bytes)" + ) enc_table = Table(title="Encryption information") - enc_table.add_column("Attribute", justify="right", style="cyan", no_wrap=True) + enc_table.add_column( + "Attribute", justify="right", style="cyan", no_wrap=True + ) enc_table.add_column("Value", style="white") if meta.encryption: enc_table.add_row( @@ -159,18 +173,28 @@ def main(pdf: Path, output: OutputOptions) -> None: enc_table.add_row("V value", str(meta.encryption.v_value)) os_table = Table(title="Operating System Data") - os_table.add_column("Attribute", justify="right", style="cyan", no_wrap=True) + os_table.add_column( + "Attribute", justify="right", style="cyan", no_wrap=True + ) os_table.add_column("Value", style="white") os_table.add_row("File Name", f"{pdf}") os_table.add_row("File Permissions", f"{meta.file_permissions}") os_table.add_row("File Size", f"{meta.file_size:,} bytes") - os_table.add_row("Creation Time", f"{meta.creation_time:%Y-%m-%d %H:%M:%S}") - os_table.add_row("Modification Time", f"{meta.modification_time:%Y-%m-%d %H:%M:%S}") - os_table.add_row("Access Time", f"{meta.access_time:%Y-%m-%d %H:%M:%S}") + os_table.add_row( + "Creation Time", f"{meta.creation_time:%Y-%m-%d %H:%M:%S}" + ) + os_table.add_row( + "Modification Time", f"{meta.modification_time:%Y-%m-%d %H:%M:%S}" + ) + os_table.add_row( + "Access Time", f"{meta.access_time:%Y-%m-%d %H:%M:%S}" + ) console = Console() console.print(os_table) console.print(table) if meta.encryption: console.print(enc_table) - console.print("Use the 'pagemeta' subcommand to get details about a single page") + console.print( + "Use the 'pagemeta' subcommand to get details about a single page" + ) diff --git a/pdfly/pagemeta.py b/pdfly/pagemeta.py index e95f825..7830fc2 100644 --- a/pdfly/pagemeta.py +++ b/pdfly/pagemeta.py @@ -38,7 +38,9 @@ def main(pdf: Path, page_index: int, output: OutputOptions) -> None: console = Console() table = Table(title=f"{pdf}, page index {page_index}") - table.add_column("Attribute", justify="right", style="cyan", no_wrap=True) + table.add_column( + "Attribute", justify="right", style="cyan", no_wrap=True + ) table.add_column("Value", style="white") table.add_row( diff --git a/pdfly/rm.py b/pdfly/rm.py index d622236..fc859e6 100644 --- a/pdfly/rm.py +++ b/pdfly/rm.py @@ -44,5 +44,7 @@ from pdfly.cat import main as cat_main -def main(filename: Path, fn_pgrgs: List[str], output: Path, verbose: bool) -> None: +def main( + filename: Path, fn_pgrgs: List[str], output: Path, verbose: bool +) -> None: cat_main(filename, fn_pgrgs, output, verbose, inverted_page_selection=True) diff --git a/pdfly/uncompress.py b/pdfly/uncompress.py index 72ec484..a543473 100644 --- a/pdfly/uncompress.py +++ b/pdfly/uncompress.py @@ -33,7 +33,9 @@ def main(pdf: Path, output: Path) -> None: uncomp_size = output.stat().st_size print(f"Original Size : {orig_size:,}") - print(f"Uncompressed Size: {uncomp_size:,} ({(uncomp_size / orig_size) * 100:.1f}% of original)") + print( + f"Uncompressed Size: {uncomp_size:,} ({(uncomp_size / orig_size) * 100:.1f}% of original)" + ) def decompress_content_stream(content: IndirectObject) -> None: @@ -45,4 +47,6 @@ def decompress_content_stream(content: IndirectObject) -> None: content.set_data(uncompressed_data) del content["/Filter"] except zlib.error as error: - print(f"Some content stream with /FlateDecode failed to be decompressed: {error}") + print( + f"Some content stream with /FlateDecode failed to be decompressed: {error}" + ) diff --git a/pdfly/update_offsets.py b/pdfly/update_offsets.py index 15e21ee..92b7626 100644 --- a/pdfly/update_offsets.py +++ b/pdfly/update_offsets.py @@ -40,10 +40,14 @@ RE_OBJ = re.compile(r"^([0-9]+) ([0-9]+) obj *") RE_CONTENT = re.compile(r"^([^\r\n]*)", re.DOTALL) RE_LENGTH_REF = re.compile(r"^(.*/Length )([0-9]+) ([0-9]+) R(.*)", re.DOTALL) -RE_LENGTH = re.compile(r"^(.*/Length )([0-9]+)([ />\x00\t\f\r\n].*)", re.DOTALL) +RE_LENGTH = re.compile( + r"^(.*/Length )([0-9]+)([ />\x00\t\f\r\n].*)", re.DOTALL +) -def update_lines(lines_in: List[str], encoding: str, console: Console, verbose: bool) -> List[str]: +def update_lines( + lines_in: List[str], encoding: str, console: Console, verbose: bool +) -> List[str]: """ Iterates over the lines of a pdf-files and updates offsets. @@ -69,14 +73,18 @@ def update_lines(lines_in: List[str], encoding: str, console: Console, verbose: offset_xref = None # offset of xref-section map_stream_len = {} # map from object-number to /Length of stream map_obj_length_line = {} # map from object-number to /Length-line - map_obj_length_ref = {} # map from object-number to /Length-reference (e.g. "3") + map_obj_length_ref = ( + {} + ) # map from object-number to /Length-reference (e.g. "3") map_obj_length_line_no = {} # map from object-number to line_no of length # of /Length-line for idx, line in enumerate(lines_in): line_no = idx + 1 m_content = RE_CONTENT.match(line) if m_content is None: - raise RuntimeError(f"Invalid PDF file: line {line_no} without line-break.") + raise RuntimeError( + f"Invalid PDF file: line {line_no} without line-break." + ) content = m_content.group(1) map_line_offset[line_no] = offset_out m_obj = RE_OBJ.match(line) @@ -107,14 +115,24 @@ def update_lines(lines_in: List[str], encoding: str, console: Console, verbose: if verbose: console.print(f"line {line_no}: end stream") if curr_obj is None: - raise RuntimeError(f"Invalid PDF file: line {line_no}: endstream without object-start.") + raise RuntimeError( + f"Invalid PDF file: line {line_no}: endstream without object-start." + ) if len_stream is None: - raise RuntimeError(f"Invalid PDF file: line {line_no}: endstream without stream.") + raise RuntimeError( + f"Invalid PDF file: line {line_no}: endstream without stream." + ) if len_stream > 0: # Ignore the last EOL - len_stream = len_stream - 2 if lines_in[idx - 1][-2:] == "\r\n" else len_stream - 1 + len_stream = ( + len_stream - 2 + if lines_in[idx - 1][-2:] == "\r\n" + else len_stream - 1 + ) if verbose: - console.print(f"line {line_no}: Computed /Length {len_stream} of obj {curr_obj}") + console.print( + f"line {line_no}: Computed /Length {len_stream} of obj {curr_obj}" + ) map_stream_len[curr_obj] = len_stream elif content == "endobj": curr_obj = None @@ -124,7 +142,9 @@ def update_lines(lines_in: List[str], encoding: str, console: Console, verbose: len_obj = m_length_ref.group(2) len_obj_gen = m_length_ref.group(3) if verbose: - console.print(f"line {line_no}, /Length-reference {len_obj} {len_obj_gen} R: {content}") + console.print( + f"line {line_no}, /Length-reference {len_obj} {len_obj_gen} R: {content}" + ) map_obj_length_ref[curr_obj] = len_obj else: m_length = RE_LENGTH.match(line) @@ -155,18 +175,26 @@ def update_lines(lines_in: List[str], encoding: str, console: Console, verbose: # Some checks if len(map_obj_offset) == 0: - raise RuntimeError("Invalid PDF file: the command didn't find any PDF objects.") + raise RuntimeError( + "Invalid PDF file: the command didn't find any PDF objects." + ) if offset_xref is None: - raise RuntimeError("Invalid PDF file: the command didn't find a xref-section") + raise RuntimeError( + "Invalid PDF file: the command didn't find a xref-section" + ) if line_startxref is None: - raise RuntimeError("Invalid PDF file: the command didn't find a startxref-section") + raise RuntimeError( + "Invalid PDF file: the command didn't find a startxref-section" + ) for curr_obj, stream_len in map_stream_len.items(): if curr_obj in map_obj_length_line: line = map_obj_length_line[curr_obj] m_length = RE_LENGTH.match(line) if m_length is None: - raise RuntimeError(f"Invalid PDF file: line '{line}' does not contain a valid /Length.") + raise RuntimeError( + f"Invalid PDF file: line '{line}' does not contain a valid /Length." + ) prev_length = m_length.group(2) len_digits = len(prev_length) len_format = "%%0%dd" % len_digits @@ -182,7 +210,9 @@ def update_lines(lines_in: List[str], encoding: str, console: Console, verbose: elif curr_obj in map_obj_length_ref: len_obj = map_obj_length_ref[curr_obj] if len_obj not in map_obj_line: - raise RuntimeError(f"obj {curr_obj} has unknown length-obj {len_obj}") + raise RuntimeError( + f"obj {curr_obj} has unknown length-obj {len_obj}" + ) len_obj_line = map_obj_line[len_obj] prev_length = lines_out[len_obj_line][:-1] len_digits = len(prev_length) @@ -196,7 +226,9 @@ def update_lines(lines_in: List[str], encoding: str, console: Console, verbose: ) if prev_length != updated_length: if verbose: - console.print(f"line {line_no}, ref-len {len_obj} of {curr_obj}: {prev_length} -> {updated_length}") + console.print( + f"line {line_no}, ref-len {len_obj} of {curr_obj}: {prev_length} -> {updated_length}" + ) lines_out[len_obj_line] = updated_length + "\n" else: raise RuntimeError( diff --git a/pdfly/x2pdf.py b/pdfly/x2pdf.py index cfdf9c8..15cf0e6 100644 --- a/pdfly/x2pdf.py +++ b/pdfly/x2pdf.py @@ -42,7 +42,9 @@ def main(in_filepaths: List[Path], out_filepath: Path) -> int: new_page = PdfReader(pdf_bytes).pages[0] writer.insert_page(new_page) except Exception: - console.print(f"[red]Error: Could not convert '{filepath}' to a PDF.") + console.print( + f"[red]Error: Could not convert '{filepath}' to a PDF." + ) console.print_exception(extra_lines=1, max_frames=1) exit_code += 1 writer.write(out_filepath) From bf3d65a49aacbf37803714feaa4bef60972ed531 Mon Sep 17 00:00:00 2001 From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com> Date: Fri, 15 Nov 2024 15:45:18 +0100 Subject: [PATCH 5/6] Pleasing mypy --- pdfly/cat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdfly/cat.py b/pdfly/cat.py index 9f0870c..324d823 100644 --- a/pdfly/cat.py +++ b/pdfly/cat.py @@ -118,7 +118,7 @@ def parse_filepaths_and_pagerange_args( filename_page_ranges, invalid_filepaths = [], [] for filepath, page_range in parse_filename_page_ranges(fn_pgrgs_l): # type: ignore if Path(filepath).is_file(): - filename_page_ranges.append((filepath, page_range)) + filename_page_ranges.append((Path(filepath), page_range)) else: invalid_filepaths.append(str(filepath)) if invalid_filepaths: From b1f94a6e5e1a21bcd3e3cf41ef660d3364f0bab3 Mon Sep 17 00:00:00 2001 From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com> Date: Fri, 15 Nov 2024 15:46:42 +0100 Subject: [PATCH 6/6] Fix ruff command in CI pipeline --- .github/workflows/github-ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml index f302b58..d4cda99 100644 --- a/.github/workflows/github-ci.yaml +++ b/.github/workflows/github-ci.yaml @@ -73,7 +73,7 @@ jobs: - name: Test with ruff run: | echo `ruff --version` - ruff pdfly/ + ruff check pdfly/ package: name: Build & verify package