From 1a9990a9a6cdc5951a50a68ffedf0d4121f2aead Mon Sep 17 00:00:00 2001 From: benoit74 Date: Thu, 19 Dec 2024 16:34:08 +0000 Subject: [PATCH 1/4] Fix typing issues and activate pyright strict mode --- .pre-commit-config.yaml | 2 +- contrib/encode_video.py | 3 - pyproject.toml | 7 +- rules/generate_rules.py | 4 +- src/zimscraperlib/__init__.py | 3 - src/zimscraperlib/constants.py | 3 - src/zimscraperlib/download.py | 30 ++- src/zimscraperlib/filesystem.py | 5 - src/zimscraperlib/fix_ogvjs_dist.py | 6 - src/zimscraperlib/html.py | 8 +- src/zimscraperlib/i18n.py | 2 - src/zimscraperlib/image/__init__.py | 4 - src/zimscraperlib/image/conversion.py | 18 +- src/zimscraperlib/image/optimization.py | 58 ++--- src/zimscraperlib/image/presets.py | 5 - src/zimscraperlib/image/probing.py | 29 ++- src/zimscraperlib/image/transformation.py | 23 +- src/zimscraperlib/image/utils.py | 4 - src/zimscraperlib/inputs.py | 10 +- src/zimscraperlib/logging.py | 5 - src/zimscraperlib/misc.py | 2 - src/zimscraperlib/rewriting/css.py | 41 +++- src/zimscraperlib/rewriting/url_rewriting.py | 2 - src/zimscraperlib/types.py | 5 - src/zimscraperlib/typing.py | 8 +- src/zimscraperlib/uri.py | 2 - src/zimscraperlib/video/__init__.py | 6 +- src/zimscraperlib/video/config.py | 63 +++--- src/zimscraperlib/video/encoding.py | 13 +- src/zimscraperlib/video/presets.py | 15 +- src/zimscraperlib/video/probing.py | 7 +- src/zimscraperlib/zim/__init__.py | 3 - src/zimscraperlib/zim/_libkiwix.py | 6 +- src/zimscraperlib/zim/archive.py | 13 +- src/zimscraperlib/zim/creator.py | 51 +++-- src/zimscraperlib/zim/filesystem.py | 7 +- src/zimscraperlib/zim/indexing.py | 26 ++- src/zimscraperlib/zim/items.py | 30 +-- src/zimscraperlib/zim/metadata.py | 173 ++++++++------ src/zimscraperlib/zim/providers.py | 11 +- tests/conftest.py | 69 +++--- tests/download/test_download.py | 83 +++---- tests/filesystem/test_filesystem.py | 19 +- tests/html/conftest.py | 7 +- tests/html/test_html.py | 11 +- tests/image/test_image.py | 225 ++++++++++++++----- tests/inputs/test_inputs.py | 44 ++-- tests/logging/conftest.py | 7 +- tests/logging/test_logging.py | 117 +++++----- tests/misc/test_misc.py | 2 +- tests/ogvjs/conftest.py | 9 +- tests/ogvjs/test_ogvjs.py | 16 +- tests/rewriting/test_html_rewriting.py | 14 +- tests/rewriting/test_rx_replacer.py | 2 +- tests/rewriting/test_url_rewriting.py | 9 +- tests/types/test_types.py | 12 +- tests/uri/test_uri.py | 3 +- tests/video/conftest.py | 5 +- tests/video/test_encoding.py | 6 +- tests/video/test_video.py | 88 +++++--- tests/zim/conftest.py | 17 +- tests/zim/test_archive.py | 23 +- tests/zim/test_fs.py | 40 ++-- tests/zim/test_indexing.py | 50 +++-- tests/zim/test_libkiwix.py | 9 +- tests/zim/test_metadata.py | 51 ++--- tests/zim/test_zim_creator.py | 135 ++++++----- 67 files changed, 955 insertions(+), 831 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8302a4ae..ebb0c8a9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ repos: hooks: - id: ruff - repo: https://github.com/RobertCraigie/pyright-python - rev: v1.1.385 + rev: v1.1.391 hooks: - id: pyright name: pyright (system) diff --git a/contrib/encode_video.py b/contrib/encode_video.py index d4bb8570..590727fc 100644 --- a/contrib/encode_video.py +++ b/contrib/encode_video.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import sys from pathlib import Path @@ -20,7 +18,6 @@ def encode_video(src_path: Path, dst_path: Path, preset: str): src_path=src_path, dst_path=dst_path, ffmpeg_args=preset_cls().to_ffmpeg_args(), - with_process=True, ) # pyright: ignore[reportGeneralTypeIssues] (returned type is variable, depending on `with_process` value) if not success: logger.error(f"conversion failed:\n{process.stdout}") diff --git a/pyproject.toml b/pyproject.toml index 979a2e06..757243be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,7 @@ lint = [ "ruff==0.8.2", ] check = [ - "pyright==1.1.390", + "pyright==1.1.391", "pytest==8.3.4", ] test = [ @@ -286,12 +286,9 @@ include = ["contrib", "src", "tests", "tasks.py"] exclude = [".env/**", ".venv/**"] extraPaths = ["src"] pythonVersion = "3.12" -typeCheckingMode="basic" +typeCheckingMode="strict" disableBytesTypePromotions = true -[tool.pyright.overrides] -strict = true # Enable strict mode for specific files - [[tool.pyright.overrides.files]] files = [ "src/zimscraperlib/rewriting**/*.py", diff --git a/rules/generate_rules.py b/rules/generate_rules.py index cd9ca8e5..460e1b03 100644 --- a/rules/generate_rules.py +++ b/rules/generate_rules.py @@ -156,11 +156,11 @@ {% endfor %} ] ) -def {{ rule['name'] }}_case(request): +def {{ rule['name'] }}_case(request: pytest.FixtureRequest): yield request.param -def test_fuzzyrules_{{ rule['name'] }}({{ rule['name'] }}_case): +def test_fuzzyrules_{{ rule['name'] }}({{ rule['name'] }}_case: ContentForTests): assert ( ArticleUrlRewriter.apply_additional_rules({{ rule['name'] }}_case.input_str) == {{ rule['name'] }}_case.expected_str diff --git a/src/zimscraperlib/__init__.py b/src/zimscraperlib/__init__.py index 20c9e55b..3457a829 100644 --- a/src/zimscraperlib/__init__.py +++ b/src/zimscraperlib/__init__.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu - import logging as stdlogging import os diff --git a/src/zimscraperlib/constants.py b/src/zimscraperlib/constants.py index c2a89ebd..eee0673e 100644 --- a/src/zimscraperlib/constants.py +++ b/src/zimscraperlib/constants.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - import pathlib import re diff --git a/src/zimscraperlib/download.py b/src/zimscraperlib/download.py index 83ec167e..ca0a9623 100644 --- a/src/zimscraperlib/download.py +++ b/src/zimscraperlib/download.py @@ -1,18 +1,13 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - -from __future__ import annotations - import pathlib import subprocess from concurrent.futures import Future, ThreadPoolExecutor -from typing import ClassVar +from typing import Any, ClassVar import requests import requests.adapters import requests.structures import urllib3.util -import yt_dlp as youtube_dl +import yt_dlp as youtube_dl # pyright: ignore[reportMissingTypeStubs] from zimscraperlib import logger from zimscraperlib.constants import DEFAULT_WEB_REQUESTS_TIMEOUT @@ -31,24 +26,24 @@ def __init__(self, threads: int | None = 1) -> None: def __enter__(self): return self - def __exit__(self, *args): + def __exit__(self, *_: Any): self.shutdown() def shutdown(self) -> None: """shuts down the executor, awaiting completion""" self.executor.shutdown(wait=True) - def _run_youtube_dl(self, url: str, options: dict) -> None: + def _run_youtube_dl(self, url: str, options: dict[str, Any]) -> None: with youtube_dl.YoutubeDL(options) as ydl: - ydl.download([url]) + ydl.download([url]) # pyright: ignore[reportUnknownMemberType] def download( self, url: str, - options: dict | None, + options: dict[str, Any] | None, *, wait: bool | None = True, - ) -> bool | Future: + ) -> bool | Future[Any]: """Downloads video using initialized executor. url: URL or Video ID @@ -66,7 +61,7 @@ def download( return True -class YoutubeConfig(dict): +class YoutubeConfig(dict[str, str | bool | int | None]): options: ClassVar[dict[str, str | bool | int | None]] = {} defaults: ClassVar[dict[str, str | bool | int | None]] = { "writethumbnail": True, @@ -82,7 +77,7 @@ class YoutubeConfig(dict): "outtmpl": "video.%(ext)s", } - def __init__(self, **kwargs): + def __init__(self, **kwargs: str | bool | int | None): super().__init__(self, **type(self).defaults) self.update(self.options) self.update(kwargs) @@ -92,7 +87,7 @@ def get_options( cls, target_dir: pathlib.Path | None = None, filepath: pathlib.Path | None = None, - **options, + **options: str | bool | int | None, ): if "outtmpl" not in options: outtmpl = cls.options.get("outtmpl", cls.defaults["outtmpl"]) @@ -143,9 +138,10 @@ def save_large_file(url: str, fpath: pathlib.Path) -> None: ) -def _get_retry_adapter( +def get_retry_adapter( max_retries: int | None = 5, ) -> requests.adapters.BaseAdapter: + """A requests adapter to automatically retry on known HTTP status that can be""" retries = urllib3.util.retry.Retry( total=max_retries, # total number of retries connect=max_retries, # connection errors @@ -169,7 +165,7 @@ def _get_retry_adapter( def get_session(max_retries: int | None = 5) -> requests.Session: """Session to hold cookies and connection pool together""" session = requests.Session() - session.mount("http", _get_retry_adapter(max_retries)) # tied to http and https + session.mount("http", get_retry_adapter(max_retries)) # tied to http and https return session diff --git a/src/zimscraperlib/filesystem.py b/src/zimscraperlib/filesystem.py index 6ffbe6a1..6dce4d3e 100644 --- a/src/zimscraperlib/filesystem.py +++ b/src/zimscraperlib/filesystem.py @@ -1,12 +1,7 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu - """ Files manipulation tools Shortcuts to retrieve mime type using magic""" -from __future__ import annotations - import os import pathlib diff --git a/src/zimscraperlib/fix_ogvjs_dist.py b/src/zimscraperlib/fix_ogvjs_dist.py index 12115cbd..3cb64dfc 100755 --- a/src/zimscraperlib/fix_ogvjs_dist.py +++ b/src/zimscraperlib/fix_ogvjs_dist.py @@ -1,11 +1,5 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - - """ quick script to fix videojs-ogvjs so that it triggers on webm mimetype """ -from __future__ import annotations - import logging import pathlib import sys diff --git a/src/zimscraperlib/html.py b/src/zimscraperlib/html.py index d2d974ea..cdc9a79c 100644 --- a/src/zimscraperlib/html.py +++ b/src/zimscraperlib/html.py @@ -1,8 +1,4 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu - """ Tools to work with HTML contents """ -from __future__ import annotations import pathlib from typing import BinaryIO, TextIO @@ -43,9 +39,7 @@ def find_language_in(content: str | BinaryIO | TextIO, mime_type: str) -> str: for key in keylist: node = soup.find(nodename) if node: - if not isinstance(node, element.Tag) or ( - isinstance(node, element.Tag) and not node.has_attr(key) - ): + if not isinstance(node, element.Tag) or not node.has_attr(key): continue if ( nodename == "meta" diff --git a/src/zimscraperlib/i18n.py b/src/zimscraperlib/i18n.py index a5b9429c..79beae7f 100644 --- a/src/zimscraperlib/i18n.py +++ b/src/zimscraperlib/i18n.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import re import babel diff --git a/src/zimscraperlib/image/__init__.py b/src/zimscraperlib/image/__init__.py index c17912b2..5a7039ca 100644 --- a/src/zimscraperlib/image/__init__.py +++ b/src/zimscraperlib/image/__init__.py @@ -1,7 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# vim: ai ts=4 sts=4 et sw=4 nu - # flake8: noqa from .conversion import convert_image from .optimization import optimize_image diff --git a/src/zimscraperlib/image/conversion.py b/src/zimscraperlib/image/conversion.py index 7506f48c..a62cc411 100644 --- a/src/zimscraperlib/image/conversion.py +++ b/src/zimscraperlib/image/conversion.py @@ -1,12 +1,8 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - -from __future__ import annotations - import io import pathlib +from typing import Any -import cairosvg.svg +import cairosvg.svg # pyright: ignore[reportMissingTypeStubs] from PIL.Image import open as pilopen from zimscraperlib.constants import ALPHA_NOT_SUPPORTED @@ -54,7 +50,7 @@ def convert_svg2png( Output width and height might be specified if resize is needed. PNG background is transparent. """ - kwargs = {} + kwargs: dict[str, Any] = {} if isinstance(src, pathlib.Path): src = str(src) if isinstance(src, str): @@ -66,9 +62,13 @@ def convert_svg2png( if height: kwargs["output_height"] = height if isinstance(dst, pathlib.Path): - cairosvg.svg2png(write_to=str(dst), **kwargs) + cairosvg.svg2png( # pyright: ignore[reportUnknownMemberType] + write_to=str(dst), **kwargs + ) else: - result = cairosvg.svg2png(**kwargs) + result = cairosvg.svg2png( # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + **kwargs + ) if not isinstance(result, bytes): raise Exception( "Unexpected type returned by cairosvg.svg2png" diff --git a/src/zimscraperlib/image/optimization.py b/src/zimscraperlib/image/optimization.py index 5b9c4305..55e2f7f1 100644 --- a/src/zimscraperlib/image/optimization.py +++ b/src/zimscraperlib/image/optimization.py @@ -1,7 +1,3 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - - """ An image optimization module to optimize the following image formats: - JPEG (using optimize-images) @@ -22,19 +18,25 @@ can still run on default settings which give a bit less size than the original images but maintain a high quality. """ -from __future__ import annotations - import functools import io import os import pathlib import subprocess from collections.abc import Callable - -import piexif -from optimize_images.img_aux_processing import do_reduce_colors, rebuild_palette -from optimize_images.img_aux_processing import remove_transparency as remove_alpha -from optimize_images.img_dynamic_quality import jpeg_dynamic_quality +from typing import Any + +import piexif # pyright: ignore[reportMissingTypeStubs] +from optimize_images.img_aux_processing import ( # pyright: ignore[reportMissingTypeStubs] + do_reduce_colors, + rebuild_palette, +) +from optimize_images.img_aux_processing import ( # pyright: ignore[reportMissingTypeStubs] + remove_transparency as remove_alpha, +) +from optimize_images.img_dynamic_quality import ( # pyright: ignore[reportMissingTypeStubs] + jpeg_dynamic_quality, +) from PIL import Image from zimscraperlib.image.conversion import convert_image @@ -61,7 +63,7 @@ def optimize_png( reduce_colors: bool | None = False, fast_mode: bool | None = True, remove_transparency: bool | None = False, - **_, + **_: Any, ) -> pathlib.Path | io.BytesIO: """method to optimize PNG files using a pure python external optimizer @@ -87,10 +89,10 @@ def optimize_png( img = remove_alpha(img, background_color) if reduce_colors: - img, _, _ = do_reduce_colors(img, max_colors) + img, __, __ = do_reduce_colors(img, max_colors) if not fast_mode and img.mode == "P": - img, _ = rebuild_palette(img) + img, __ = rebuild_palette(img) if dst is None: dst = io.BytesIO() @@ -107,7 +109,7 @@ def optimize_jpeg( *, fast_mode: bool | None = True, keep_exif: bool | None = True, - **_, + **_: Any, ) -> pathlib.Path | io.BytesIO: """method to optimize JPEG files using a pure python external optimizer @@ -130,8 +132,14 @@ def optimize_jpeg( ) had_exif = False - if (not isinstance(src, pathlib.Path) and piexif.load(src.getvalue())["Exif"]) or ( - isinstance(src, pathlib.Path) and piexif.load(str(src))["Exif"] + if ( + not isinstance(src, pathlib.Path) + and piexif.load(src.getvalue())[ # pyright: ignore[reportUnknownMemberType] + "Exif" + ] + ) or ( + isinstance(src, pathlib.Path) + and piexif.load(str(src))["Exif"] # pyright: ignore[reportUnknownMemberType] ): had_exif = True @@ -141,7 +149,7 @@ def optimize_jpeg( if fast_mode: quality_setting = quality else: - quality_setting, _ = jpeg_dynamic_quality(img) + quality_setting, __ = jpeg_dynamic_quality(img) if dst is None: dst = io.BytesIO() @@ -158,7 +166,7 @@ def optimize_jpeg( dst.seek(0) if keep_exif and had_exif: - piexif.transplant( + piexif.transplant( # pyright: ignore[reportUnknownMemberType] exif_src=( str(src.resolve()) if isinstance(src, pathlib.Path) else src.getvalue() ), @@ -178,7 +186,7 @@ def optimize_webp( method: int | None = 6, *, lossless: bool | None = False, - **_, + **_: Any, ) -> pathlib.Path | io.BytesIO: """method to optimize WebP using Pillow options @@ -196,7 +204,7 @@ def optimize_webp( https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html#webp""" ensure_matches(src, "WEBP") - params = { + params: dict[str, bool | int | None] = { "lossless": lossless, "quality": quality, "method": method, @@ -231,7 +239,7 @@ def optimize_gif( *, interlace: bool | None = True, no_extensions: bool | None = True, - **_, + **_: Any, ) -> pathlib.Path: """method to optimize GIFs using gifsicle >= 1.92 @@ -285,7 +293,7 @@ def optimize_image( *, delete_src: bool | None = False, convert: bool | str | None = False, - **options, + **options: Any, ): """Optimize image, automatically selecting correct optimizer @@ -320,10 +328,10 @@ def optimize_image( src.unlink() -def get_optimization_method(fmt: str) -> Callable: +def get_optimization_method(fmt: str) -> Callable[..., Any]: """Return the proper optimization method to call for a given image format""" - def raise_error(*_, orig_format): + def raise_error(*_, orig_format: str): raise NotImplementedError( f"Image format '{orig_format}' cannot yet be optimized" ) diff --git a/src/zimscraperlib/image/presets.py b/src/zimscraperlib/image/presets.py index 6d339fa2..7be415f2 100644 --- a/src/zimscraperlib/image/presets.py +++ b/src/zimscraperlib/image/presets.py @@ -1,8 +1,3 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - -from __future__ import annotations - from typing import ClassVar """ presets for ImageOptimizer in zimscraperlib.image.optimization module """ diff --git a/src/zimscraperlib/image/probing.py b/src/zimscraperlib/image/probing.py index 213425dd..c84e8232 100644 --- a/src/zimscraperlib/image/probing.py +++ b/src/zimscraperlib/image/probing.py @@ -1,14 +1,9 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - -from __future__ import annotations - import colorsys import io import pathlib import re -import colorthief +import colorthief # pyright: ignore[reportMissingTypeStubs] import PIL.Image from zimscraperlib.filesystem import get_content_mimetype, get_file_mimetype @@ -25,9 +20,7 @@ def rgb_to_hex(r: int, g: int, b: int) -> str: def solarize(r: int, g: int, b: int) -> tuple[int, int, int]: # calculate solarized color for main - h, l, s = colorsys.rgb_to_hls( # noqa: E741 - float(r) / 256, float(g) / 256, float(b) / 256 - ) + h, _, s = colorsys.rgb_to_hls(float(r) / 256, float(g) / 256, float(b) / 256) r2, g2, b2 = (int(x * 256) for x in colorsys.hls_to_rgb(h, 0.95, s)) return r2, g2, b2 @@ -35,17 +28,23 @@ def solarize(r: int, g: int, b: int) -> tuple[int, int, int]: if use_palette: # extract two main colors from palette, solarizing second as background - palette = ct.get_palette(color_count=2, quality=1) + palette = ct.get_palette( # pyright: ignore[reportUnknownVariableType] + color_count=2, quality=1 + ) # using the first two colors of the palette? - mr, mg, mb = palette[0] - sr, sg, sb = solarize(*palette[1]) + mr, mg, mb = palette[0] # pyright: ignore[reportUnknownVariableType] + sr, sg, sb = solarize(*palette[1]) # pyright: ignore[reportUnknownArgumentType] else: # extract main color from image and solarize it as background - mr, mg, mb = ct.get_color(quality=1) - sr, sg, sb = solarize(mr, mg, mb) + mr, mg, mb = ct.get_color( # pyright: ignore[reportUnknownVariableType] + quality=1 + ) + sr, sg, sb = solarize(mr, mg, mb) # pyright: ignore[reportUnknownArgumentType] - return rgb_to_hex(mr, mg, mb), rgb_to_hex(sr, sg, sb) + return rgb_to_hex( + mr, mg, mb # pyright: ignore[reportUnknownArgumentType] + ), rgb_to_hex(sr, sg, sb) def is_hex_color(text: str) -> bool: diff --git a/src/zimscraperlib/image/transformation.py b/src/zimscraperlib/image/transformation.py index 8732d099..750df41d 100644 --- a/src/zimscraperlib/image/transformation.py +++ b/src/zimscraperlib/image/transformation.py @@ -1,13 +1,8 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - -from __future__ import annotations - import io import pathlib from PIL.Image import open as pilopen -from resizeimage import resizeimage +from resizeimage import resizeimage # pyright: ignore[reportMissingTypeStubs] from zimscraperlib.constants import ALPHA_NOT_SUPPORTED from zimscraperlib.image.utils import save_image @@ -36,21 +31,27 @@ def resize_image( if allow_upscaling: height_width_ratio = float(image.size[1]) / float(image.size[0]) if image.size[0] < width: - image = image.resize( # noqa: PLW2901 + image = image.resize( # noqa: PLW2901 # pyright: ignore[reportUnknownMemberType] (width, int(width * height_width_ratio)) ) if height and image.size[1] < height: - image = image.resize( # noqa: PLW2901 + image = image.resize( # noqa: PLW2901 # pyright: ignore[reportUnknownMemberType] (int(height / height_width_ratio), height) ) # resize using the requested method if method == "width": - resized = resizeimage.resize(method, image, width) + resized = resizeimage.resize( # pyright: ignore[reportUnknownMemberType] + method, image, width + ) elif method == "height": - resized = resizeimage.resize(method, image, height) + resized = resizeimage.resize( # pyright: ignore[reportUnknownMemberType] + method, image, height + ) else: - resized = resizeimage.resize(method, image, [width, height]) + resized = resizeimage.resize( # pyright: ignore[reportUnknownMemberType] + method, image, [width, height] + ) # remove alpha layer if not supported and added during resizing if resized.mode == "RGBA" and image_format in ALPHA_NOT_SUPPORTED: diff --git a/src/zimscraperlib/image/utils.py b/src/zimscraperlib/image/utils.py index 306b40b8..99b48757 100644 --- a/src/zimscraperlib/image/utils.py +++ b/src/zimscraperlib/image/utils.py @@ -1,7 +1,3 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu -from __future__ import annotations - import io import pathlib from typing import IO, Any diff --git a/src/zimscraperlib/inputs.py b/src/zimscraperlib/inputs.py index 23fee6b8..2f76c065 100644 --- a/src/zimscraperlib/inputs.py +++ b/src/zimscraperlib/inputs.py @@ -1,12 +1,8 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - -from __future__ import annotations - import pathlib import shutil import tempfile from collections.abc import Iterable +from typing import TypeVar from zimscraperlib import logger from zimscraperlib.constants import DEFAULT_USER_AGENT @@ -18,6 +14,8 @@ ) from zimscraperlib.download import stream_file +T = TypeVar("T") + def handle_user_provided_file( source: pathlib.Path | str | None = None, @@ -138,6 +136,6 @@ def compute_tags( } -def unique_values(items: list) -> list: +def unique_values(items: list[T]) -> list[T]: """Return unique values in input list while preserving list order""" return list(dict.fromkeys(items)) diff --git a/src/zimscraperlib/logging.py b/src/zimscraperlib/logging.py index 68937594..f30544d3 100644 --- a/src/zimscraperlib/logging.py +++ b/src/zimscraperlib/logging.py @@ -1,8 +1,3 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - -from __future__ import annotations - import io import logging import pathlib diff --git a/src/zimscraperlib/misc.py b/src/zimscraperlib/misc.py index 9d9b25f2..ef8be836 100644 --- a/src/zimscraperlib/misc.py +++ b/src/zimscraperlib/misc.py @@ -1,7 +1,5 @@ """ Miscelaneous utils""" -from __future__ import annotations - from typing import TypeVar T = TypeVar("T") diff --git a/src/zimscraperlib/rewriting/css.py b/src/zimscraperlib/rewriting/css.py index db553f28..9f4215f2 100644 --- a/src/zimscraperlib/rewriting/css.py +++ b/src/zimscraperlib/rewriting/css.py @@ -14,14 +14,14 @@ from functools import partial from typing import Any -from tinycss2 import ( +from tinycss2 import ( # pyright: ignore[reportMissingTypeStubs] ast, parse_declaration_list, # pyright: ignore[reportUnknownVariableType] parse_stylesheet, # pyright: ignore[reportUnknownVariableType] parse_stylesheet_bytes, # pyright: ignore[reportUnknownVariableType] serialize, # pyright: ignore[reportUnknownVariableType] ) -from tinycss2.serializer import ( +from tinycss2.serializer import ( # pyright: ignore[reportMissingTypeStubs] serialize_url, # pyright: ignore[reportUnknownVariableType] ) @@ -186,14 +186,26 @@ def _process_node(self, node: ast.Node): ) elif isinstance(node, ast.FunctionBlock): if node.lower_name == "url": # pyright: ignore[reportUnknownMemberType] - url_node: ast.Node = node.arguments[0] + url_node: ast.Node = ( # pyright: ignore[reportUnknownVariableType] + node.arguments[0] # pyright: ignore[reportUnknownMemberType] + ) new_url = self.url_rewriter( - getattr(url_node, "value", ""), + getattr( + url_node, # pyright: ignore[reportUnknownArgumentType] + "value", + "", + ), self.base_href, ).rewriten_url - setattr(url_node, "value", str(new_url)) # noqa: B010 setattr( # noqa: B010 - url_node, "representation", f'"{serialize_url(str(new_url))}"' + url_node, # pyright: ignore[reportUnknownArgumentType] + "value", + str(new_url), + ) + setattr( # noqa: B010 + url_node, # pyright: ignore[reportUnknownArgumentType] + "representation", + f'"{serialize_url(str(new_url))}"', ) else: @@ -201,12 +213,21 @@ def _process_node(self, node: ast.Node): getattr(node, "arguments", []), ) elif isinstance(node, ast.AtRule): - self._process_list(node.prelude) - self._process_list(node.content) + self._process_list( + node.prelude # pyright: ignore[reportUnknownMemberType, reportUnknownArgumentType] + ) + self._process_list( + node.content # pyright: ignore[reportUnknownMemberType, reportUnknownArgumentType] + ) elif isinstance(node, ast.Declaration): - self._process_list(node.value) + self._process_list( + node.value # pyright: ignore[reportUnknownMemberType, reportUnknownArgumentType] + ) elif isinstance(node, ast.URLToken): - new_url = self.url_rewriter(node.value, self.base_href).rewriten_url + new_url = self.url_rewriter( + node.value, # pyright: ignore[reportUnknownMemberType, reportUnknownArgumentType] + self.base_href, + ).rewriten_url node.value = new_url node.representation = f"url({serialize_url(new_url)})" diff --git a/src/zimscraperlib/rewriting/url_rewriting.py b/src/zimscraperlib/rewriting/url_rewriting.py index 4c76e19b..2db1915f 100644 --- a/src/zimscraperlib/rewriting/url_rewriting.py +++ b/src/zimscraperlib/rewriting/url_rewriting.py @@ -38,8 +38,6 @@ and not url-encoded. """ -from __future__ import annotations - import re from dataclasses import dataclass from pathlib import PurePosixPath diff --git a/src/zimscraperlib/types.py b/src/zimscraperlib/types.py index 35de13ac..5e9f23fe 100644 --- a/src/zimscraperlib/types.py +++ b/src/zimscraperlib/types.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu - """ File extensions to MIME-Type mapping All libzim *articles* contains the mime-type of their content, for the libzim @@ -15,8 +12,6 @@ Should your scraper need additional mapping, use mimetypes.add_type() and it will be automatically used. """ -from __future__ import annotations - import mimetypes import pathlib diff --git a/src/zimscraperlib/typing.py b/src/zimscraperlib/typing.py index ab14ebff..05657eeb 100644 --- a/src/zimscraperlib/typing.py +++ b/src/zimscraperlib/typing.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from collections.abc import Callable from dataclasses import dataclass from typing import Any, Protocol, TypeVar, runtime_checkable @@ -10,7 +8,7 @@ @dataclass class Callback: - func: Callable + func: Callable[..., Any] args: tuple[Any, ...] | None = None kwargs: dict[str, Any] | None = None @@ -24,8 +22,8 @@ def get_args(self) -> tuple[Any, ...]: def get_kwargs(self) -> dict[str, Any]: return self.kwargs or {} - def call_with(self, *args, **kwargs): - self.func.__call__(*args, **kwargs) + def call_with(self, *args: Any, **kwargs: Any): + self.func(*args, **kwargs) def call(self): self.call_with(*self.get_args(), **self.get_kwargs()) diff --git a/src/zimscraperlib/uri.py b/src/zimscraperlib/uri.py index 6e60a186..0bfe5637 100644 --- a/src/zimscraperlib/uri.py +++ b/src/zimscraperlib/uri.py @@ -1,7 +1,5 @@ """ URI handling module""" -from __future__ import annotations - import urllib.parse from zimscraperlib.misc import first diff --git a/src/zimscraperlib/video/__init__.py b/src/zimscraperlib/video/__init__.py index 959c3318..d4a2a744 100644 --- a/src/zimscraperlib/video/__init__.py +++ b/src/zimscraperlib/video/__init__.py @@ -1,9 +1,7 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# vim: ai ts=4 sts=4 et sw=4 nu - from .config import Config # flake8: noqa from .encoding import reencode from .probing import get_media_info + +__all__ = ["Config", "reencode", "get_media_info"] diff --git a/src/zimscraperlib/video/config.py b/src/zimscraperlib/video/config.py index 16658a8f..e59a1daa 100644 --- a/src/zimscraperlib/video/config.py +++ b/src/zimscraperlib/video/config.py @@ -1,20 +1,13 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu +from typing import Any, ClassVar -from __future__ import annotations -from typing import ClassVar - - -class Config(dict): +class Config(dict[str, str | None]): VERSION = 1 ext = "dat" mimetype = "application/data" - options: ClassVar[dict[str, str | bool | int | None]] = {} - defaults: ClassVar[dict[str, str | bool | int | None]] = { - "-max_muxing_queue_size": "9999" - } - mapping: ClassVar[dict[str, str | bool | int | None]] = { + options: ClassVar[dict[str, str | None]] = {} + defaults: ClassVar[dict[str, str | None]] = {"-max_muxing_queue_size": "9999"} + mapping: ClassVar[dict[str, str]] = { "video_codec": "-codec:v", "audio_codec": "-codec:a", "max_video_bitrate": "-maxrate", @@ -25,21 +18,21 @@ class Config(dict): "target_audio_bitrate": "-b:a", } - def __init__(self, **kwargs): + def __init__(self, **kwargs: Any): super().__init__(self, **type(self).defaults) self.update(self.options) self.update(kwargs) - def update_from(self, **kwargs): + def update_from(self, **kwargs: Any): """Updates Config object based on shortcut params as given in build_from()""" for key, value in kwargs.items(): setattr(self, key, value) - def to_ffmpeg_args(self): + def to_ffmpeg_args(self) -> list[str]: """Convert the options dict to list of ffmpeg arguments""" - args = [] + args: list[str] = [] for k, v in self.items(): if v: args += [k, v] @@ -54,7 +47,7 @@ def video_codec(self): return self.get(self.mapping["video_codec"]) @video_codec.setter - def video_codec(self, value): + def video_codec(self, value: str): self[self.mapping["video_codec"]] = value @property @@ -62,7 +55,7 @@ def audio_codec(self): return self.get(self.mapping["audio_codec"]) @audio_codec.setter - def audio_codec(self, value): + def audio_codec(self, value: str): self[self.mapping["audio_codec"]] = value @property @@ -70,7 +63,7 @@ def max_video_bitrate(self): return self.get(self.mapping["max_video_bitrate"]) @max_video_bitrate.setter - def max_video_bitrate(self, value): + def max_video_bitrate(self, value: str): self[self.mapping["max_video_bitrate"]] = value @property @@ -78,7 +71,7 @@ def min_video_bitrate(self): return self.get(self.mapping["min_video_bitrate"]) @min_video_bitrate.setter - def min_video_bitrate(self, value): + def min_video_bitrate(self, value: str): self[self.mapping["min_video_bitrate"]] = value @property @@ -86,7 +79,7 @@ def target_video_bitrate(self): return self.get(self.mapping["target_video_bitrate"]) @target_video_bitrate.setter - def target_video_bitrate(self, value): + def target_video_bitrate(self, value: str): self[self.mapping["target_video_bitrate"]] = value @property @@ -94,7 +87,7 @@ def target_audio_bitrate(self): return self.get(self.mapping["target_audio_bitrate"]) @target_audio_bitrate.setter - def target_audio_bitrate(self, value): + def target_audio_bitrate(self, value: str): self[self.mapping["target_audio_bitrate"]] = value @property @@ -102,7 +95,7 @@ def audio_sampling_rate(self): return self.get(self.mapping["audio_sampling_rate"]) @audio_sampling_rate.setter - def audio_sampling_rate(self, value): + def audio_sampling_rate(self, value: str): self[self.mapping["audio_sampling_rate"]] = value @property @@ -110,16 +103,21 @@ def buffersize(self): return self.get(self.mapping["buffersize"]) @buffersize.setter - def buffersize(self, value): + def buffersize(self, value: str): self[self.mapping["buffersize"]] = value @property - def video_scale(self): + def video_scale(self) -> str | None: # remove "scale='" and "'" and return the value in between - return self.get("-vf", [])[7:-1] if self.get("-vf") else None + if vf := self.get("-vf", []): + # test type to please type checker + if not isinstance(vf, str): # pragma: no cover + raise Exception("Incorrect vf value") + return vf[7:-1] + return None @video_scale.setter - def video_scale(self, value): + def video_scale(self, value: str): self["-vf"] = f"scale='{value}'" @property @@ -129,14 +127,9 @@ def quantizer_scale_range(self): return (int(qmin), int(qmax)) if qmin is not None and qmax is not None else None @quantizer_scale_range.setter - def quantizer_scale_range(self, value): + def quantizer_scale_range(self, value: tuple[int, int]): qmin, qmax = value - if ( - isinstance(qmin, int) - and isinstance(qmax, int) - and -1 <= qmin <= 69 # noqa: PLR2004 - and -1 <= qmax <= 1024 # noqa: PLR2004 - ): + if -1 <= qmin <= 69 and -1 <= qmax <= 1024: # noqa: PLR2004 self["-qmin"] = str(qmin) self["-qmax"] = str(qmax) else: @@ -145,7 +138,7 @@ def quantizer_scale_range(self, value): ) @classmethod - def build_from(cls, **params): + def build_from(cls, **params: Any): """build a Config easily via shortcut params video_codec: codec for output audio stream. more info diff --git a/src/zimscraperlib/video/encoding.py b/src/zimscraperlib/video/encoding.py index bedbcd3d..56c98e64 100644 --- a/src/zimscraperlib/video/encoding.py +++ b/src/zimscraperlib/video/encoding.py @@ -1,8 +1,3 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - -from __future__ import annotations - import pathlib import shutil import subprocess @@ -44,9 +39,8 @@ def reencode( threads: int | None = 1, *, delete_src: bool = False, - with_process: bool = False, failsafe: bool = True, -) -> tuple[bool, subprocess.CompletedProcess[str]] | bool: +) -> tuple[bool, subprocess.CompletedProcess[str]]: """Runs ffmpeg with given ffmpeg_args Arguments - @@ -55,7 +49,6 @@ def reencode( ffmpeg_args - A list of ffmpeg arguments threads - Number of encoding threads used by ffmpeg delete_src - Delete source file after convertion - with_process - Optionally return the output from ffmpeg (stderr and stdout) failsafe - Run in failsafe mode """ @@ -84,6 +77,4 @@ def reencode( if delete_src: src_path.unlink() shutil.copy(tmp_path, dst_path) - if with_process: - return ffmpeg.returncode == 0, ffmpeg - return ffmpeg.returncode == 0 + return ffmpeg.returncode == 0, ffmpeg diff --git a/src/zimscraperlib/video/presets.py b/src/zimscraperlib/video/presets.py index 81dab174..cc524ab7 100644 --- a/src/zimscraperlib/video/presets.py +++ b/src/zimscraperlib/video/presets.py @@ -1,8 +1,3 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - -from __future__ import annotations - from typing import ClassVar from zimscraperlib.video.config import Config @@ -22,7 +17,7 @@ class VoiceMp3Low(Config): ext = "mp3" mimetype = "audio/mp3" - options: ClassVar[dict[str, str | bool | int | None]] = { + options: ClassVar[dict[str, str | None]] = { "-vn": "", # remove video stream "-codec:a": "mp3", # audio codec "-ar": "44100", # audio sampling rate @@ -42,7 +37,7 @@ class VideoWebmLow(Config): ext = "webm" mimetype = f"{preset_type}/webm" - options: ClassVar[dict[str, str | bool | int | None]] = { + options: ClassVar[dict[str, str | None]] = { "-codec:v": "libvpx-vp9", # video codec "-b:v": "140k", # Adjust quantizer within min/max to target this bitrate "-qmin": "30", # Reduce the bitrate on very still videos @@ -70,7 +65,7 @@ class VideoMp4Low(Config): ext = "mp4" mimetype = f"{preset_type}/mp4" - options: ClassVar[dict[str, str | bool | int | None]] = { + options: ClassVar[dict[str, str | None]] = { "-codec:v": "h264", # video codec "-b:v": "300k", # target video bitrate "-maxrate": "300k", # max video bitrate @@ -95,7 +90,7 @@ class VideoWebmHigh(Config): ext = "webm" mimetype = f"{preset_type}/webm" - options: ClassVar[dict[str, str | bool | int | None]] = { + options: ClassVar[dict[str, str | None]] = { "-codec:v": "libvpx-vp9", # video codec "-b:v": "340k", # Adjust quantizer within min/max to target this bitrate "-qmin": "26", # Reduce the bitrate on very still videos @@ -119,7 +114,7 @@ class VideoMp4High(Config): ext = "mp4" mimetype = f"{preset_type}/mp4" - options: ClassVar[dict[str, str | bool | int | None]] = { + options: ClassVar[dict[str, str | None]] = { "-codec:v": "h264", # video codec "-codec:a": "aac", # audio codec "-crf": "20", # constant quality, lower value gives better qual and larger size diff --git a/src/zimscraperlib/video/probing.py b/src/zimscraperlib/video/probing.py index 5f6b217f..06b5d057 100644 --- a/src/zimscraperlib/video/probing.py +++ b/src/zimscraperlib/video/probing.py @@ -1,11 +1,8 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - - +import pathlib import subprocess -def get_media_info(src_path): +def get_media_info(src_path: str | pathlib.Path): """dict of file's details from ffprobe codecs: list of codecs in use diff --git a/src/zimscraperlib/zim/__init__.py b/src/zimscraperlib/zim/__init__.py index 9d82c1ea..d3460797 100644 --- a/src/zimscraperlib/zim/__init__.py +++ b/src/zimscraperlib/zim/__init__.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu - """ ZIM file creation tools zim.creator: create files by manually adding each article diff --git a/src/zimscraperlib/zim/_libkiwix.py b/src/zimscraperlib/zim/_libkiwix.py index 5e24e159..e2df485f 100644 --- a/src/zimscraperlib/zim/_libkiwix.py +++ b/src/zimscraperlib/zim/_libkiwix.py @@ -12,8 +12,6 @@ https://github.com/kiwix/libkiwix/blob/master/src/tools/otherTools.cpp """ -from __future__ import annotations - import io from typing import NamedTuple @@ -79,7 +77,7 @@ def parseMimetypeCounter( counterData: str, ) -> CounterMap: """Mapping of MIME types with count for each from ZIM Counter metadata string""" - counters = {} + counters: CounterMap = {} ss = io.StringIO(counterData) eof = False while not eof: @@ -94,7 +92,7 @@ def parseMimetypeCounter( def convertTags(tags_str: str) -> list[str]: """List of tags expanded with libkiwix's additional hints for pic/vid/det/index""" tags = tags_str.split(";") - tagsList = [] + tagsList: list[str] = [] picSeen = vidSeen = detSeen = indexSeen = False for tag in tags: # not upstream diff --git a/src/zimscraperlib/zim/archive.py b/src/zimscraperlib/zim/archive.py index 1f4dda89..4cd0a593 100644 --- a/src/zimscraperlib/zim/archive.py +++ b/src/zimscraperlib/zim/archive.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu - """ ZIM Archive helper Convenient subclass of libzim.reader.Archive with: @@ -9,9 +6,8 @@ - direct access to search results and number of results - public Entry access by Id""" -from __future__ import annotations - from collections.abc import Iterable +from types import TracebackType import libzim.reader # pyright: ignore[reportMissingModuleSource] import libzim.search # pyright: ignore[reportMissingModuleSource] @@ -24,7 +20,12 @@ class Archive(libzim.reader.Archive): def __enter__(self): return self - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__( + self, + exc_type: type[BaseException] | None = None, + exc_val: BaseException | None = None, + exc_tb: TracebackType | None = None, + ): pass @property diff --git a/src/zimscraperlib/zim/creator.py b/src/zimscraperlib/zim/creator.py index 99fe6dee..d339933d 100644 --- a/src/zimscraperlib/zim/creator.py +++ b/src/zimscraperlib/zim/creator.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu - """ ZIM Creator helper Convenient subclass of libzim.writer.Creator with: @@ -17,13 +14,12 @@ - content stored on object - can be used to store a filepath and content read from it (not stored) """ -from __future__ import annotations - import io import logging import pathlib import re import weakref +from types import TracebackType import libzim.writer # pyright: ignore[reportMissingModuleSource] import PIL.Image @@ -43,9 +39,10 @@ from zimscraperlib.zim.metadata import ( DEFAULT_DEV_ZIM_METADATA, MANDATORY_ZIM_METADATA_KEYS, + AnyMetadata, IllustrationBasedMetadata, LanguageMetadata, - Metadata, + MetadataBase, StandardMetadataList, ) @@ -113,7 +110,7 @@ def __init__( ignore_duplicates: bool | None = False, ): super().__init__(filename=filename) - self._metadata: dict[str, Metadata] = {} + self._metadata: dict[str, AnyMetadata] = {} self.__indexing_configured = False self.can_finish = True @@ -122,8 +119,6 @@ def __init__( if compression: self.config_compression( getattr(libzim.writer.Compression, compression.lower()) - if isinstance(compression, str) - else compression ) self.workaround_nocancel = workaround_nocancel @@ -146,10 +141,6 @@ def _log_metadata(self): """Log in DEBUG level all metadata key and value""" for name, metadata in sorted(self._metadata.items()): - if metadata is None: - logger.debug(f"Metadata: {name} is None") - continue - if not hasattr(metadata, "value"): logger.debug( f"Metadata: {name} is improper metadata type: " @@ -237,7 +228,9 @@ def start(self): return self - def add_metadata(self, value: Metadata): + def add_metadata( # pyright: ignore[reportIncompatibleMethodOverride] + self, value: AnyMetadata + ): """Really add the metadata to the ZIM, after ZIM creation has started. You would probably prefer to use config_metadata methods to check metadata @@ -249,8 +242,8 @@ def add_metadata(self, value: Metadata): def config_metadata( self, - std_metadata: StandardMetadataList | list[Metadata], - extra_metadata: list[Metadata] | None = None, + std_metadata: StandardMetadataList | list[AnyMetadata], + extra_metadata: list[AnyMetadata] | None = None, *, fail_on_missing_prefix_in_extras: bool = True, ): @@ -296,7 +289,7 @@ def config_metadata( def config_dev_metadata( self, - extra_metadata: Metadata | list[Metadata] | None = None, + extra_metadata: AnyMetadata | list[AnyMetadata] | None = None, ): """Calls minimal set of mandatory metadata with default values for dev @@ -306,7 +299,7 @@ def config_dev_metadata( std_metadata=DEFAULT_DEV_ZIM_METADATA, extra_metadata=( [extra_metadata] - if isinstance(extra_metadata, Metadata) + if isinstance(extra_metadata, MetadataBase) else extra_metadata ), fail_on_missing_prefix_in_extras=False, @@ -359,7 +352,9 @@ def add_item_for( if is_front is None: is_front = mimetype in FRONT_ARTICLE_MIMETYPES - hints = {libzim.writer.Hint.FRONT_ARTICLE: is_front} + hints: dict[libzim.writer.Hint, int] = { + libzim.writer.Hint.FRONT_ARTICLE: is_front + } if should_compress is not None: hints[libzim.writer.Hint.COMPRESS] = should_compress @@ -383,7 +378,7 @@ def add_item_for( ) return path - def add_item( + def add_item( # pyright: ignore[reportIncompatibleMethodOverride] self, item: libzim.writer.Item, duplicate_ok: bool | None = None, @@ -429,7 +424,7 @@ def add_redirect( title is optional. when set, the redirect itself can be found on suggestions (indexed) if considered FRONT_ARTICLE""" - hints = {} + hints: dict[libzim.writer.Hint, int] = {} if is_front is not None: hints[libzim.writer.Hint.FRONT_ARTICLE] = bool(is_front) @@ -446,7 +441,12 @@ def add_redirect( self.can_finish = False # pragma: no cover raise - def finish(self, exc_type=None, exc_val=None, exc_tb=None): # noqa: ARG002 + def finish( + self, + _: type[BaseException] | None = None, + __: BaseException | None = None, + ___: TracebackType | None = None, + ): """Triggers finalization of ZIM creation and create final ZIM file.""" if not getattr(self, "can_finish", False): return @@ -459,5 +459,10 @@ def __enter__(self): self.start() return self - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__( + self, + exc_type: type[BaseException] | None = None, + exc_val: BaseException | None = None, + exc_tb: TracebackType | None = None, + ): self.finish(exc_type, exc_val, exc_tb) diff --git a/src/zimscraperlib/zim/filesystem.py b/src/zimscraperlib/zim/filesystem.py index baa39c90..969eed26 100644 --- a/src/zimscraperlib/zim/filesystem.py +++ b/src/zimscraperlib/zim/filesystem.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu - """ zimwriterfs-like tools to convert a build folder into a ZIM make_zim_file behaves in a similar way to zimwriterfs and expects the same options: @@ -26,8 +23,6 @@ Meaning you should exit right after an exception in your code (during zim creation) Use workaround_nocancel=False to disable the workaround. """ -from __future__ import annotations - import datetime import pathlib import re @@ -131,7 +126,7 @@ def make_zim_file( date: datetime.date | None = None, language: str = "eng", creator: str = "-", - publisher="-", + publisher: str = "-", tags: Sequence[str] | None = None, source: str | None = None, flavour: str | None = None, diff --git a/src/zimscraperlib/zim/indexing.py b/src/zimscraperlib/zim/indexing.py index 807bfcf8..bbc2b271 100644 --- a/src/zimscraperlib/zim/indexing.py +++ b/src/zimscraperlib/zim/indexing.py @@ -1,16 +1,15 @@ """ Special item with customized index data and helper classes """ -from __future__ import annotations - import io import pathlib import libzim.writer # pyright: ignore[reportMissingModuleSource] try: - import pymupdf + import pymupdf # pyright: ignore[reportMissingTypeStubs] except ImportError: # pragma: no cover - import fitz as pymupdf # pymupdf main module was named fitz before 1.24.3 + # pymupdf main module was named fitz before 1.24.3 + import fitz as pymupdf # pyright: ignore[reportMissingTypeStubs] from zimscraperlib import logger @@ -78,7 +77,9 @@ def get_pdf_index_data( """ # do not display all pymupdf errors, we will filter them afterwards - pymupdf.TOOLS.mupdf_display_errors(False) + pymupdf.TOOLS.mupdf_display_errors( # pyright: ignore[reportUnknownMemberType] + False + ) if content: doc = pymupdf.open(stream=content) @@ -86,18 +87,23 @@ def get_pdf_index_data( doc = pymupdf.open(stream=fileobj) else: doc = pymupdf.open(filename=filepath) - metadata = doc.metadata + metadata = ( # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType] + doc.metadata + ) title = "" if metadata: # pragma: no branch (always metadata in test PDFs) - parts = [] + parts: list[str] = [] for key in ["title", "author", "subject"]: - if metadata.get(key): - parts.append(metadata[key]) + if metadata.get(key): # pyright: ignore[reportUnknownMemberType] + parts.append( + metadata[key] # pyright: ignore[reportUnknownArgumentType] + ) if parts: # pragma: no branch (always metadata in test PDFs) title = " - ".join(parts) content = "\n".join( - page.get_text() for page in doc # pyright: ignore[reportAttributeAccessIssue] + page.get_text() # pyright: ignore[reportUnknownArgumentType, reportUnknownMemberType, reportAttributeAccessIssue] + for page in doc ) # build list of messages and filter messages which are known to not be relevant diff --git a/src/zimscraperlib/zim/items.py b/src/zimscraperlib/zim/items.py index 05221f47..c46c8f93 100644 --- a/src/zimscraperlib/zim/items.py +++ b/src/zimscraperlib/zim/items.py @@ -1,9 +1,4 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu - - """ libzim Item helpers """ -from __future__ import annotations import io import pathlib @@ -34,7 +29,7 @@ def __init__( path: str | None = None, title: str | None = None, mimetype: str | None = None, - hints: dict | None = None, + hints: dict[libzim.writer.Hint, int] | None = None, **kwargs: Any, ): super().__init__() @@ -62,7 +57,7 @@ def get_title(self) -> str: def get_mimetype(self) -> str: return getattr(self, "mimetype", "") - def get_hints(self) -> dict: + def get_hints(self) -> dict[libzim.writer.Hint, int]: return getattr(self, "hints", {}) @@ -97,7 +92,7 @@ def __init__( path: str | None = None, title: str | None = None, mimetype: str | None = None, - hints: dict | None = None, + hints: dict[libzim.writer.Hint, int] | None = None, index_data: IndexData | None = None, *, auto_index: bool = True, @@ -192,7 +187,9 @@ def _get_auto_index(self): mimetype = get_file_mimetype(filepath) if mimetype == "application/pdf": index_data = get_pdf_index_data(filepath=filepath) - self.get_indexdata = lambda: index_data + self.get_indexdata = ( # pyright:ignore [reportIncompatibleVariableOverride] + lambda: index_data + ) else: return @@ -211,7 +208,12 @@ class URLItem(StaticItem): Use `tmp_dir` to point location of that temp file.""" @staticmethod - def download_for_size(url, on_disk, tmp_dir=None): + def download_for_size( + url: urllib.parse.ParseResult, + tmp_dir: pathlib.Path | None = None, + *, + on_disk: bool, + ): """Download URL to a temp file and return its tempfile and size""" fpath = stream = None if on_disk: @@ -232,7 +234,7 @@ def __init__( path: str | None = None, title: str | None = None, mimetype: str | None = None, - hints: dict | None = None, + hints: dict[libzim.writer.Hint, int] | None = None, use_disk: bool | None = None, **kwargs: Any, ): @@ -242,7 +244,7 @@ def __init__( path=path, title=title, mimetype=mimetype, hints=hints, **kwargs ) self.url = urllib.parse.urlparse(url) - use_disk = getattr(self, "use_disk", False) + use_disk_set: bool = getattr(self, "use_disk", False) # fetch headers to retrieve size and type try: @@ -271,7 +273,7 @@ def __init__( except Exception: # we couldn't retrieve size so we have to download resource to target, self.size = self.download_for_size( - self.url, on_disk=use_disk, tmp_dir=getattr(self, "tmp_dir", None) + self.url, on_disk=use_disk_set, tmp_dir=getattr(self, "tmp_dir", None) ) # downloaded to disk and using a file path from now on if use_disk: @@ -286,7 +288,7 @@ def get_path(self) -> str: def get_title(self) -> str: return getattr(self, "title", "") - def get_mimetype(self) -> str | None: + def get_mimetype(self) -> str: return getattr( self, "mimetype", diff --git a/src/zimscraperlib/zim/metadata.py b/src/zimscraperlib/zim/metadata.py index fdc9652a..99eff296 100644 --- a/src/zimscraperlib/zim/metadata.py +++ b/src/zimscraperlib/zim/metadata.py @@ -1,12 +1,11 @@ -from __future__ import annotations - import base64 import datetime import io +from abc import ABC, abstractmethod from collections.abc import Iterable from dataclasses import asdict, dataclass, fields from itertools import filterfalse -from typing import Any +from typing import Any, TypeVar import regex @@ -27,68 +26,21 @@ # whether to apply openZIM recommendations (see https://wiki.openzim.org/wiki/Metadata) APPLY_RECOMMENDATIONS: bool = True +# TypeVar without any constraint +T = TypeVar("T") -def clean_str(value: str) -> str: - """Clean a string value for unwanted control characters and strip white chars""" - return UNWANTED_CONTROL_CHARACTERS_REGEX.sub("", value).strip(" \r\n\t") - - -def nb_grapheme_for(value: str) -> int: - """Number of graphemes (visually perceived characters) in a given string""" - return len(regex.findall(r"\X", value)) - - -def mandatory(cls): - """Marks a Metadata mandatory: must be set to please Creator and cannot be empty""" - cls.is_required = True - cls.empty_allowed = False - return cls +class MetadataBase[T](ABC): + """Base class for metadata -def allow_empty(cls): - """Whether input can be blank""" - cls.empty_allowed = True - return cls - - -def allow_duplicates(cls): - """Whether list input can accept duplicate values""" - cls.duplicates_allowed = True - return cls - - -def deduplicate(cls): - """Whether duplicates in list inputs should be reduced""" - cls.duplicates_allowed = True - cls.require_deduplication = True - return cls + Both generic (to accomodate any value type implemented in child classes) and + abstract (because it has no idea how to compute the cleaned_value and libzim_value + for any value type) + """ - -def only_lang_codes(cls): - """Whether list input should be checked to only accept ISO-639-1 codes""" - cls.oz_only_iso636_3_allowed = True - return cls - - -def x_protected(cls): - """Whether metadata name should be checked for collision with reserved names - - when applying recommendations""" - cls.oz_x_protected = True - return cls - - -def x_prefixed(cls): - """Whether metadata names should be automatically X-Prefixed""" - cls.oz_x_protected = False - cls.oz_x_prefixed = True - return cls - - -class Metadata: # name of the metadata (not its value) meta_name: str - value: bytes + value: T # MIME type of the value meta_mimetype: str @@ -191,10 +143,89 @@ def name(self) -> str: def get_encoded(value: str) -> bytes: return value.encode() + def validate(self) -> None: + _ = self.name + _ = self.libzim_value + + @abstractmethod + def get_cleaned_value(self, value: Any) -> T: ... # pragma: no cover + @property def libzim_value(self) -> bytes: return self.get_libzim_value() + @abstractmethod + def get_libzim_value(self) -> bytes: ... # pragma: no cover + + +# Alias for convenience when function accept any metadata +AnyMetadata = MetadataBase[Any] + +# TypeVar bounded to subclasses of GenericMetadata, used by class decorators so that +# they properly accommodate to the class they are used on while still knowing they have +# access to all attributes of the MetadataBase class +U = TypeVar("U", bound=AnyMetadata) + + +def clean_str(value: str) -> str: + """Clean a string value for unwanted control characters and strip white chars""" + return UNWANTED_CONTROL_CHARACTERS_REGEX.sub("", value).strip(" \r\n\t") + + +def nb_grapheme_for(value: str) -> int: + """Number of graphemes (visually perceived characters) in a given string""" + return len(regex.findall(r"\X", value)) + + +def mandatory(cls: type[U]): + """Marks a Metadata mandatory: must be set to please Creator and cannot be empty""" + cls.is_required = True + cls.empty_allowed = False + return cls + + +def allow_empty(cls: type[U]): + """Whether input can be blank""" + cls.empty_allowed = True + return cls + + +def allow_duplicates(cls: type[U]): + """Whether list input can accept duplicate values""" + cls.duplicates_allowed = True + return cls + + +def deduplicate(cls: type[U]): + """Whether duplicates in list inputs should be reduced""" + cls.duplicates_allowed = True + cls.require_deduplication = True + return cls + + +def only_lang_codes(cls: type[U]): + """Whether list input should be checked to only accept ISO-639-1 codes""" + cls.oz_only_iso636_3_allowed = True + return cls + + +def x_protected(cls: type[U]): + """Whether metadata name should be checked for collision with reserved names + + when applying recommendations""" + cls.oz_x_protected = True + return cls + + +def x_prefixed(cls: type[U]): + """Whether metadata names should be automatically X-Prefixed""" + cls.oz_x_protected = False + cls.oz_x_prefixed = True + return cls + + +class Metadata(MetadataBase[bytes]): + def get_binary_from( self, value: bytes | SupportsRead[bytes] | SupportsSeekableRead[bytes] | io.BytesIO, @@ -210,7 +241,7 @@ def get_binary_from( last_pos = value.tell() bvalue = value.read() if isinstance(value, SupportsSeekableRead) and value.seekable(): - value.seek(last_pos) + value.seek(last_pos) # pyright: ignore[reportPossiblyUnboundVariable] if not self.empty_allowed and not value: raise ValueError("Missing value (empty not allowed)") return bvalue @@ -222,12 +253,8 @@ def get_cleaned_value(self, value: bytes | io.IOBase | io.BytesIO) -> bytes: def get_libzim_value(self) -> bytes: return self.value - def validate(self) -> None: - _ = self.name - _ = self.libzim_value - -class TextBasedMetadata(Metadata): +class TextBasedMetadata(MetadataBase[str]): """Expects a Text (str) input. Will be cleaned-up and UTF-8 encoded""" value: str @@ -257,7 +284,7 @@ def get_libzim_value(self) -> bytes: return self.get_encoded(self.value) -class TextListBasedMetadata(Metadata): +class TextListBasedMetadata(MetadataBase[list[str]]): """Expects a Text List (list[str]) input. Each item will be cleaned-up. List will be joined (see `join_list_with`) and UTF-8 encoded""" @@ -295,7 +322,7 @@ def get_libzim_value(self) -> bytes: return self.get_encoded(self.join_list_with.join(self.value)) -class DateBasedMetadata(Metadata): +class DateBasedMetadata(MetadataBase[datetime.date]): """Expects a Date (date | datetime) input. Will be UTF-8 encoded as YYYY-MM-DD""" value: datetime.date @@ -456,7 +483,7 @@ class StandardMetadataList: License: LicenseMetadata | None = None Relation: RelationMetadata | None = None - def values(self) -> list[Metadata]: + def values(self) -> list[AnyMetadata]: return list(filter(bool, asdict(self).values())) @classmethod @@ -464,11 +491,13 @@ def get_reserved_names(cls) -> list[str]: """list of mandatory metadata as per the spec. computed from metadata using @mandatory decorator""" - names = [] + names: list[str] = [] for field in fields(cls): - meta_type = globals().get(str(field.type)) - if getattr(meta_type, "is_required", False): - names.append(getattr(meta_type, "meta_name", "")) + if not isinstance(field.type, type): + continue + # if field type is a type, it means that it is required (otherwise field + # type is a string when None is allowed) + names.append(getattr(field.type, "meta_name", "")) return names diff --git a/src/zimscraperlib/zim/providers.py b/src/zimscraperlib/zim/providers.py index 4a7ddec8..f1733852 100644 --- a/src/zimscraperlib/zim/providers.py +++ b/src/zimscraperlib/zim/providers.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu - """ libzim Providers accepting a `ref` arg to keep it away from garbage collection Use case is to pass it the Item instance that created the Provider so that the @@ -9,8 +6,6 @@ (and thus Provider instanced twice) - to release whatever needs to be once we know data won't be fetched anymore """ -from __future__ import annotations - import io import pathlib from collections.abc import Generator @@ -18,7 +13,7 @@ import libzim.writer # pyright: ignore[reportMissingModuleSource] import requests -from zimscraperlib.download import _get_retry_adapter, stream_file +from zimscraperlib.download import get_retry_adapter, stream_file class FileProvider(libzim.writer.FileProvider): @@ -78,12 +73,12 @@ def __init__(self, url: str, size: int | None = None, ref: object | None = None) self.ref = ref session = requests.Session() - session.mount("http", _get_retry_adapter()) + session.mount("http", get_retry_adapter()) self.resp = session.get(url, stream=True) self.resp.raise_for_status() @staticmethod - def get_size_of(url) -> int | None: + def get_size_of(url: str) -> int | None: _, headers = stream_file(url, byte_stream=io.BytesIO(), only_first_block=True) try: return int(headers["Content-Length"]) diff --git a/tests/conftest.py b/tests/conftest.py index 15e93ecf..58e8daac 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,12 +1,9 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - import pathlib import pytest -def pytest_addoption(parser): +def pytest_addoption(parser: pytest.Parser): parser.addoption( "--runslow", action="store_true", default=False, help="run slow tests" ) @@ -18,14 +15,14 @@ def pytest_addoption(parser): ) -def pytest_configure(config): +def pytest_configure(config: pytest.Config): config.addinivalue_line("markers", "slow: mark test as slow to run") config.addinivalue_line( "markers", "installed: mark test as testing installed features" ) -def pytest_collection_modifyitems(config, items): +def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]): skip_slow = pytest.mark.skip(reason="need --runslow option to run") skip_installed = pytest.mark.skip(reason="need --runinstalled option to run") @@ -37,103 +34,103 @@ def pytest_collection_modifyitems(config, items): @pytest.fixture(scope="module") -def valid_http_url(): +def valid_http_url() -> str: return "http://google.com/favicon.ico" @pytest.fixture(scope="module") -def valid_https_url(): +def valid_https_url() -> str: return "https://www.google.com/favicon.ico" @pytest.fixture(scope="module") -def invalid_url(): +def invalid_url() -> str: return "http://nodomain.notld/nofile.noext" @pytest.fixture(scope="module") -def http_error_url(): +def http_error_url() -> str: return "https://github.com/satyamtg/404_error" @pytest.fixture(scope="module") -def timeout_url(): +def timeout_url() -> str: # Should always fail with a connection timeout (nothing listening on that port) # taken from request's own tests return "http://10.255.255.1" @pytest.fixture(scope="module") -def png_image_url(): +def png_image_url() -> str: return "https://commons.wikimedia.org/static/images/project-logos/commonswiki.png" @pytest.fixture(scope="module") -def gzip_html_url(): +def gzip_html_url() -> str: return "https://en.wikipedia.org/wiki/Main_Page" @pytest.fixture(scope="module") -def gzip_nonhtml_url(): +def gzip_nonhtml_url() -> str: return "http://mirror.download.kiwix.org/robots.txt" -def file_src(fname): +def file_src(fname: str) -> pathlib.Path: return pathlib.Path(__file__).parent.joinpath("files", fname) @pytest.fixture(scope="module") -def png_image(): +def png_image() -> pathlib.Path: return file_src("commons48.png") @pytest.fixture(scope="module") -def png_image2(): +def png_image2() -> pathlib.Path: return file_src("commons.png") @pytest.fixture(scope="module") -def jpg_image(): +def jpg_image() -> pathlib.Path: return file_src("pluto.jpg") @pytest.fixture(scope="module") -def jpg_exif_image(): +def jpg_exif_image() -> pathlib.Path: return file_src("blue.jpg") @pytest.fixture(scope="module") -def square_png_image(): +def square_png_image() -> pathlib.Path: return file_src("square.png") @pytest.fixture(scope="module") -def square_jpg_image(): +def square_jpg_image() -> pathlib.Path: return file_src("square.jpg") @pytest.fixture(scope="module") -def font(): +def font() -> pathlib.Path: return file_src("DroidSans.ttf") @pytest.fixture(scope="module") -def svg_image(): +def svg_image() -> pathlib.Path: return file_src("star.svg") @pytest.fixture(scope="module") -def gif_image(): +def gif_image() -> pathlib.Path: return file_src("mail.gif") @pytest.fixture(scope="module") -def webp_image(): +def webp_image() -> pathlib.Path: return file_src("ninja.webp") @pytest.fixture(scope="module") -def encrypted_pdf_file(): +def encrypted_pdf_file() -> pathlib.Path: """Return an encrypted PDF encrypted.pdf is a PDF encrypted with only a owner password (restricting edit/print) @@ -144,17 +141,17 @@ def encrypted_pdf_file(): @pytest.fixture(scope="module") -def encrypted_pdf_content(): +def encrypted_pdf_content() -> pathlib.Path: return file_src("encrypted.txt") @pytest.fixture(scope="module") -def big_pdf_file(): +def big_pdf_file() -> pathlib.Path: return file_src("milderm.pdf") @pytest.fixture(scope="module") -def big_pdf_content(): +def big_pdf_content() -> pathlib.Path: return file_src("milderm.txt") @@ -164,10 +161,10 @@ def valid_user_agent(): @pytest.fixture(scope="session") -def small_zim_file(tmpdir_factory): +def small_zim_file(tmpdir_factory: pytest.TempdirFactory) -> pathlib.Path: from zimscraperlib.download import stream_file - dst = pathlib.Path(tmpdir_factory.mktemp("data").join("small.zim")) + dst = pathlib.Path(tmpdir_factory.mktemp("data") / "small.zim") stream_file( "https://github.com/openzim/zim-testing-suite/raw/v0.3/data/nons/small.zim", dst, @@ -176,10 +173,10 @@ def small_zim_file(tmpdir_factory): @pytest.fixture(scope="session") -def ns_zim_file(tmpdir_factory): +def ns_zim_file(tmpdir_factory: pytest.TempdirFactory) -> pathlib.Path: from zimscraperlib.download import stream_file - dst = pathlib.Path(tmpdir_factory.mktemp("data").join("ns.zim")) + dst = pathlib.Path(tmpdir_factory.mktemp("data") / "ns.zim") stream_file( "https://github.com/openzim/zim-testing-suite/raw/v0.4/data/withns/" "wikibooks_be_all_nopic_2017-02.zim", @@ -189,10 +186,10 @@ def ns_zim_file(tmpdir_factory): @pytest.fixture(scope="session") -def real_zim_file(tmpdir_factory): +def real_zim_file(tmpdir_factory: pytest.TempdirFactory) -> pathlib.Path: from zimscraperlib.download import stream_file - dst = pathlib.Path(tmpdir_factory.mktemp("data").join("small.zim")) + dst = pathlib.Path(tmpdir_factory.mktemp("data") / "small.zim") stream_file( "https://github.com/openzim/zim-testing-suite/raw/v0.3/data/withns/" "wikipedia_en_climate_change_nopic_2020-01.zim", @@ -202,7 +199,7 @@ def real_zim_file(tmpdir_factory): @pytest.fixture(scope="session") -def undecodable_byte_stream(): +def undecodable_byte_stream() -> bytes: """bytes that is not recognized by some libmagic and raises UnicodeDecodeError""" return ( b"\x03\x04\x14\x00\x06\x00\x08\x00\x00\x00!\x00\xd9\x85nc\x81\x01\x00" diff --git a/tests/download/test_download.py b/tests/download/test_download.py index b450a7a7..8f138ef3 100644 --- a/tests/download/test_download.py +++ b/tests/download/test_download.py @@ -1,18 +1,14 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - -from __future__ import annotations - import concurrent.futures import io import pathlib import re from typing import ClassVar +from unittest.mock import Mock import pytest import requests import requests.structures -from yt_dlp import DownloadError +from yt_dlp import DownloadError # pyright: ignore[reportMissingTypeStubs] from zimscraperlib.constants import DEFAULT_WEB_REQUESTS_TIMEOUT from zimscraperlib.download import ( @@ -24,7 +20,7 @@ ) -def assert_downloaded_file(url, file): +def assert_downloaded_file(url: str, file: pathlib.Path): assert file.exists() # our google test urls dont support HEAD req = requests.get(url, timeout=DEFAULT_WEB_REQUESTS_TIMEOUT) @@ -32,12 +28,12 @@ def assert_downloaded_file(url, file): assert file.stat().st_size == len(req.content) -def assert_headers(returned_headers): +def assert_headers(returned_headers: requests.structures.CaseInsensitiveDict[str]): assert isinstance(returned_headers, requests.structures.CaseInsensitiveDict) assert returned_headers["Content-Type"] == "image/x-icon" -def get_dest_file(tmp_path): +def get_dest_file(tmp_path: pathlib.Path): return tmp_path.joinpath("favicon.ico") @@ -46,22 +42,22 @@ def test_missing_dest(): stream_file(url="http://some_url", byte_stream=io.BytesIO()) -def test_invalid_url(tmp_path, invalid_url): +def test_invalid_url(tmp_path: pathlib.Path, invalid_url: str): dest_file = tmp_path / "favicon.ico" with pytest.raises(requests.exceptions.ConnectionError): stream_file(url=invalid_url, fpath=dest_file) -def test_no_output_supplied(valid_http_url): +def test_no_output_supplied(valid_http_url: str): with pytest.raises( ValueError, match="Either file path or a bytesIO object is needed" ): stream_file(url=valid_http_url) -def test_first_block_download_default_session(valid_http_url): +def test_first_block_download_default_session(valid_http_url: str): byte_stream = io.BytesIO() - size, ret = stream_file( + _, ret = stream_file( url=valid_http_url, byte_stream=byte_stream, only_first_block=True ) assert_headers(ret) @@ -71,7 +67,7 @@ def test_first_block_download_default_session(valid_http_url): assert len(byte_stream.read()) <= expected -def test_filehandler(tmp_path, valid_http_url): +def test_filehandler(tmp_path: pathlib.Path, valid_http_url: str): dest_file = pathlib.Path(tmp_path / "favicon.ico") def notseekable(): @@ -79,7 +75,7 @@ def notseekable(): with open(dest_file, "wb") as byte_stream: assert byte_stream.seekable() - size, ret = stream_file( + _, ret = stream_file( url=valid_http_url, byte_stream=byte_stream, only_first_block=True ) assert_headers(ret) @@ -87,14 +83,14 @@ def notseekable(): byte_stream.seekable = notseekable assert not byte_stream.seekable() - size, ret = stream_file( + _, ret = stream_file( url=valid_http_url, byte_stream=byte_stream, only_first_block=True ) assert_headers(ret) assert byte_stream.tell() > 0 -def test_first_block_download_custom_session(mocker, valid_http_url): +def test_first_block_download_custom_session(mocker: Mock, valid_http_url: str): byte_stream = io.BytesIO() custom_session = mocker.Mock(spec=requests.Session) @@ -118,7 +114,7 @@ def test_first_block_download_custom_session(mocker, valid_http_url): headers=None, timeout=DEFAULT_WEB_REQUESTS_TIMEOUT, ) - requests.Session.assert_not_called() # pyright: ignore[reportAttributeAccessIssue] + requests.Session.assert_not_called() # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType] @pytest.mark.slow @@ -134,25 +130,25 @@ def test_user_agent(): @pytest.mark.slow -def test_save_http(tmp_path, valid_http_url): +def test_save_http(tmp_path: pathlib.Path, valid_http_url: str): dest_file = tmp_path / "favicon.ico" - size, ret = stream_file(url=valid_http_url, fpath=dest_file) + _, ret = stream_file(url=valid_http_url, fpath=dest_file) assert_headers(ret) assert_downloaded_file(valid_http_url, dest_file) @pytest.mark.slow -def test_save_https(tmp_path, valid_https_url): +def test_save_https(tmp_path: pathlib.Path, valid_https_url: str): dest_file = tmp_path / "favicon.ico" - size, ret = stream_file(url=valid_https_url, fpath=dest_file) + _, ret = stream_file(url=valid_https_url, fpath=dest_file) assert_headers(ret) assert_downloaded_file(valid_https_url, dest_file) @pytest.mark.slow -def test_stream_to_bytes(valid_https_url): +def test_stream_to_bytes(valid_https_url: str): byte_stream = io.BytesIO() - size, ret = stream_file(url=valid_https_url, byte_stream=byte_stream) + _, ret = stream_file(url=valid_https_url, byte_stream=byte_stream) assert_headers(ret) assert ( byte_stream.read() @@ -161,39 +157,39 @@ def test_stream_to_bytes(valid_https_url): @pytest.mark.slow -def test_unseekable_stream(valid_https_url): +def test_unseekable_stream(valid_https_url: str): def notseekable(): return False byte_stream = io.BytesIO() byte_stream.seekable = notseekable - size, ret = stream_file(url=valid_https_url, byte_stream=byte_stream) + _, ret = stream_file(url=valid_https_url, byte_stream=byte_stream) assert_headers(ret) @pytest.mark.slow -def test_save_parent_folder_missing(tmp_path, valid_http_url): +def test_save_parent_folder_missing(tmp_path: pathlib.Path, valid_http_url: str): dest_file = tmp_path / "some-folder" / "favicon.ico" with pytest.raises(IOError): stream_file(url=valid_http_url, fpath=dest_file) @pytest.mark.slow -def test_save_http_error(tmp_path, http_error_url): +def test_save_http_error(tmp_path: pathlib.Path, http_error_url: str): dest_file = tmp_path / "favicon.ico" with pytest.raises(requests.exceptions.HTTPError): stream_file(url=http_error_url, fpath=dest_file) @pytest.mark.slow -def test_large_download_http(tmp_path, valid_http_url): +def test_large_download_http(tmp_path: pathlib.Path, valid_http_url: str): dest_file = tmp_path / "favicon.ico" save_large_file(valid_http_url, dest_file) assert_downloaded_file(valid_http_url, dest_file) @pytest.mark.slow -def test_large_download_https(tmp_path, valid_https_url): +def test_large_download_https(tmp_path: pathlib.Path, valid_https_url: str): dest_file = tmp_path / "favicon.ico" save_large_file(valid_https_url, dest_file) assert_downloaded_file(valid_https_url, dest_file) @@ -207,7 +203,7 @@ def test_large_download_https(tmp_path, valid_https_url): ("https://tube.jeena.net/w/tyekuoPZqb7BtkyNPwVHJL", "tyekuoPZqb7BtkyNPwVHJL"), ], ) -def test_youtube_download_serial(url, video_id, tmp_path): +def test_youtube_download_serial(url: str, video_id: str, tmp_path: pathlib.Path): yt_downloader = YoutubeDownloader(threads=1) options = BestMp4.get_options( target_dir=tmp_path, @@ -219,20 +215,21 @@ def test_youtube_download_serial(url, video_id, tmp_path): @pytest.mark.slow -def test_youtube_download_nowait(tmp_path): +def test_youtube_download_nowait(tmp_path: pathlib.Path): with YoutubeDownloader(threads=1) as yt_downloader: future = yt_downloader.download( "https://tube.jeena.net/w/tyekuoPZqb7BtkyNPwVHJL", BestMp4.get_options(target_dir=tmp_path), wait=False, ) - assert future.running() # pyright: ignore[reportAttributeAccessIssue] + assert isinstance(future, concurrent.futures.Future) + assert future.running() assert not yt_downloader.executor._shutdown done, not_done = concurrent.futures.wait( - [future], # pyright: ignore[reportArgumentType] + [future], return_when=concurrent.futures.ALL_COMPLETED, ) - assert future.exception() is None # pyright: ignore[reportAttributeAccessIssue] + assert future.exception() is None assert len(done) == 1 assert len(not_done) == 0 @@ -246,7 +243,7 @@ def test_youtube_download_error(): @pytest.mark.slow -def test_youtube_download_contextmanager(tmp_path): +def test_youtube_download_contextmanager(tmp_path: pathlib.Path): with YoutubeDownloader(threads=1) as yt_downloader: yt_downloader.download( "https://tube.jeena.net/w/tyekuoPZqb7BtkyNPwVHJL", @@ -280,22 +277,24 @@ def test_get_options_wrong_outtmpl_type(): WrongOuttmplType.get_options() -def test_get_options_target_dir(target_dir): +def test_get_options_target_dir(target_dir: pathlib.Path): options = BestWebm.get_options(target_dir=target_dir) assert options["outtmpl"] == "adir1/video.%(ext)s" -def test_get_options_filepath(filepath): +def test_get_options_filepath(filepath: pathlib.Path): options = BestWebm.get_options(filepath=filepath) assert options["outtmpl"] == "adir2/afile" -def test_get_options_target_dir_filepath(target_dir, filepath): +def test_get_options_target_dir_filepath( + target_dir: pathlib.Path, filepath: pathlib.Path +): options = BestWebm.get_options(target_dir=target_dir, filepath=filepath) assert options["outtmpl"] == "adir1/adir2/afile" -def test_get_options_override_outtmpl_no_other_vars(custom_outtmpl): +def test_get_options_override_outtmpl_no_other_vars(custom_outtmpl: str): original = BestWebm.get_options() overriden = BestWebm.get_options(outtmpl=custom_outtmpl) assert "outtmpl" in original @@ -307,7 +306,9 @@ def test_get_options_override_outtmpl_no_other_vars(custom_outtmpl): assert overriden[key] == custom_outtmpl -def test_get_options_override_outtmpl_other_vars(target_dir, filepath, custom_outtmpl): +def test_get_options_override_outtmpl_other_vars( + target_dir: pathlib.Path, filepath: pathlib.Path, custom_outtmpl: str +): original = BestWebm.get_options(target_dir=target_dir, filepath=filepath) overriden = BestWebm.get_options( target_dir=target_dir, diff --git a/tests/filesystem/test_filesystem.py b/tests/filesystem/test_filesystem.py index daf2bec8..1d9e783e 100644 --- a/tests/filesystem/test_filesystem.py +++ b/tests/filesystem/test_filesystem.py @@ -1,7 +1,8 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu +import pathlib +from typing import Any import magic +import pytest from zimscraperlib.filesystem import ( delete_callback, @@ -10,12 +11,12 @@ ) -def test_file_mimetype(png_image, jpg_image): +def test_file_mimetype(png_image: pathlib.Path, jpg_image: pathlib.Path): assert get_file_mimetype(png_image) == "image/png" assert get_file_mimetype(jpg_image) == "image/jpeg" -def test_content_mimetype(png_image, jpg_image): +def test_content_mimetype(png_image: pathlib.Path, jpg_image: pathlib.Path): with open(png_image, "rb") as fh: assert get_content_mimetype(fh.read(64)) == "image/png" @@ -23,19 +24,21 @@ def test_content_mimetype(png_image, jpg_image): assert get_content_mimetype(fh.read(64)) == "image/jpeg" -def test_content_mimetype_fallback(monkeypatch, undecodable_byte_stream): +def test_content_mimetype_fallback( + monkeypatch: pytest.MonkeyPatch, undecodable_byte_stream: bytes +): # use raw function first to test actual code assert get_content_mimetype(undecodable_byte_stream) == "application/octet-stream" # mock then so we keep coverage on systems where magic works - def raising_magic(*args, **kwargs): # noqa: ARG001 + def raising_magic(*_: Any, **__: Any): raise UnicodeDecodeError("nocodec", b"", 0, 1, "noreason") monkeypatch.setattr(magic, "from_buffer", raising_magic) assert get_content_mimetype(undecodable_byte_stream) == "application/octet-stream" -def test_mime_overrides(svg_image): +def test_mime_overrides(svg_image: pathlib.Path): mime_map = [(svg_image, "image/svg+xml")] for fpath, expected_mime in mime_map: assert get_file_mimetype(fpath) == expected_mime @@ -43,7 +46,7 @@ def test_mime_overrides(svg_image): assert get_content_mimetype(fh.read(64)) == expected_mime -def test_delete_callback(tmp_path): +def test_delete_callback(tmp_path: pathlib.Path): fpath = tmp_path.joinpath("my-file") with open(fpath, "w") as fh: fh.write("content") diff --git a/tests/html/conftest.py b/tests/html/conftest.py index 9716e17f..20b055d8 100644 --- a/tests/html/conftest.py +++ b/tests/html/conftest.py @@ -1,11 +1,8 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - import pytest @pytest.fixture(scope="function") -def html_page(): +def html_page() -> str: """sample HTML content with title""" return """ @@ -26,7 +23,7 @@ def html_page(): @pytest.fixture(scope="function") -def html_page_without_title(): +def html_page_without_title() -> str: """sample HTML content without title""" return """ diff --git a/tests/html/test_html.py b/tests/html/test_html.py index 011981a8..ec102fb4 100644 --- a/tests/html/test_html.py +++ b/tests/html/test_html.py @@ -1,5 +1,4 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu +import pathlib import pytest @@ -11,7 +10,9 @@ ) -def test_find_title(tmp_path, html_page, html_page_without_title): +def test_find_title( + tmp_path: pathlib.Path, html_page: str, html_page_without_title: str +): # find title in example HTML assert ( find_title_in(html_page, "text/html") @@ -38,7 +39,7 @@ def test_find_title(tmp_path, html_page, html_page_without_title): assert find_title_in_file(tmp_path / "nope", "text/html") == "" -def test_find_language(tmp_path, html_page): +def test_find_language(tmp_path: pathlib.Path, html_page: str): # find language in example HTML assert find_language_in(html_page, "text/html") == "en-US" # make sure non-HTML returns no language @@ -91,5 +92,5 @@ def test_find_language(tmp_path, html_page): ), ], ) -def test_find_language_order(html_string, expected_language): +def test_find_language_order(html_string: str, expected_language: str): assert find_language_in(html_string, "text/html") == expected_language diff --git a/tests/image/test_image.py b/tests/image/test_image.py index 507057bd..c39b367d 100644 --- a/tests/image/test_image.py +++ b/tests/image/test_image.py @@ -1,19 +1,17 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - -from __future__ import annotations - import inspect import io import os import pathlib import re import shutil +from typing import Any -import piexif +import piexif # pyright: ignore[reportMissingTypeStubs] import pytest from PIL import Image -from resizeimage.imageexceptions import ImageSizeError +from resizeimage.imageexceptions import ( # pyright: ignore[reportMissingTypeStubs] + ImageSizeError, +) from zimscraperlib.image import presets from zimscraperlib.image.conversion import ( @@ -55,13 +53,13 @@ ALL_PRESETS = [(n, p) for n, p in inspect.getmembers(presets) if inspect.isclass(p)] -def get_image_size(fpath): +def get_image_size(fpath: pathlib.Path | io.BytesIO) -> tuple[int, int]: return Image.open(fpath).size def get_src_dst( tmp_path: pathlib.Path, - fmt, + fmt: str, png_image: pathlib.Path | None = None, jpg_image: pathlib.Path | None = None, gif_image: pathlib.Path | None = None, @@ -96,7 +94,7 @@ def get_src_dst( ("000000", False), ], ) -def test_is_hex_color(value, valid): +def test_is_hex_color(value: str, *, valid: bool): if valid: assert is_hex_color(value) else: @@ -108,19 +106,19 @@ def test_colors_noimage(): get_colors(pathlib.Path("nofile.here")) -def test_colors_png_nopalette(png_image): +def test_colors_png_nopalette(png_image: pathlib.Path): assert get_colors(png_image, use_palette=False) == ("#04659B", "#E7F6FF") -def test_colors_jpg_nopalette(jpg_image): +def test_colors_jpg_nopalette(jpg_image: pathlib.Path): assert get_colors(jpg_image, use_palette=False) == ("#C1BBB3", "#F4F3F1") -def test_colors_png_palette(png_image): +def test_colors_png_palette(png_image: pathlib.Path): assert get_colors(png_image, use_palette=True) == ("#9E0404", "#E7F6FF") -def test_colors_jpg_palette(jpg_image): +def test_colors_jpg_palette(jpg_image: pathlib.Path): assert get_colors(jpg_image, use_palette=True) == ("#221C1B", "#F4F3F1") @@ -131,7 +129,14 @@ def test_colors_jpg_palette(jpg_image): ("jpg", "JPEG", {"quality": 50}), ], ) -def test_save_image(png_image, jpg_image, tmp_path, src_fmt, dst_fmt, params): +def test_save_image( + png_image: pathlib.Path, + jpg_image: pathlib.Path, + tmp_path: pathlib.Path, + src_fmt: str, + dst_fmt: str, + params: dict[str, Any] | None, +): src, dst = get_src_dst(tmp_path, src_fmt, png_image=png_image, jpg_image=jpg_image) img = Image.open(src) save_image(img, dst, fmt=dst_fmt, **(params or {})) @@ -142,7 +147,9 @@ def test_save_image(png_image, jpg_image, tmp_path, src_fmt, dst_fmt, params): "fmt", ["png", "jpg"], ) -def test_resize_thumbnail(png_image, jpg_image, tmp_path, fmt): +def test_resize_thumbnail( + png_image: pathlib.Path, jpg_image: pathlib.Path, tmp_path: pathlib.Path, fmt: str +): src, dst = get_src_dst(tmp_path, fmt, png_image=png_image, jpg_image=jpg_image) width, height = 100, 50 @@ -156,8 +163,10 @@ def test_resize_thumbnail(png_image, jpg_image, tmp_path, fmt): "fmt", ["png", "jpg"], ) -def test_resize_bytestream(png_image, jpg_image, tmp_path, fmt): - src, dst = get_src_dst(tmp_path, fmt, png_image=png_image, jpg_image=jpg_image) +def test_resize_bytestream( + png_image: pathlib.Path, jpg_image: pathlib.Path, tmp_path: pathlib.Path, fmt: str +): + src, _ = get_src_dst(tmp_path, fmt, png_image=png_image, jpg_image=jpg_image) # copy image content into a bytes stream img = io.BytesIO() @@ -176,7 +185,9 @@ def test_resize_bytestream(png_image, jpg_image, tmp_path, fmt): "fmt", ["png", "jpg"], ) -def test_resize_width(png_image, jpg_image, tmp_path, fmt): +def test_resize_width( + png_image: pathlib.Path, jpg_image: pathlib.Path, tmp_path: pathlib.Path, fmt: str +): src, dst = get_src_dst(tmp_path, fmt, png_image=png_image, jpg_image=jpg_image) width, height = 100, 50 @@ -189,7 +200,9 @@ def test_resize_width(png_image, jpg_image, tmp_path, fmt): "fmt", ["png", "jpg"], ) -def test_resize_height(png_image, jpg_image, tmp_path, fmt): +def test_resize_height( + png_image: pathlib.Path, jpg_image: pathlib.Path, tmp_path: pathlib.Path, fmt: str +): src, dst = get_src_dst(tmp_path, fmt, png_image=png_image, jpg_image=jpg_image) width, height = 100, 50 @@ -202,7 +215,9 @@ def test_resize_height(png_image, jpg_image, tmp_path, fmt): "fmt", ["png", "jpg"], ) -def test_resize_crop(png_image, jpg_image, tmp_path, fmt): +def test_resize_crop( + png_image: pathlib.Path, jpg_image: pathlib.Path, tmp_path: pathlib.Path, fmt: str +): src, dst = get_src_dst(tmp_path, fmt, png_image=png_image, jpg_image=jpg_image) width, height = 5, 50 @@ -216,7 +231,9 @@ def test_resize_crop(png_image, jpg_image, tmp_path, fmt): "fmt", ["png", "jpg"], ) -def test_resize_cover(png_image, jpg_image, tmp_path, fmt): +def test_resize_cover( + png_image: pathlib.Path, jpg_image: pathlib.Path, tmp_path: pathlib.Path, fmt: str +): src, dst = get_src_dst(tmp_path, fmt, png_image=png_image, jpg_image=jpg_image) width, height = 5, 50 @@ -230,7 +247,9 @@ def test_resize_cover(png_image, jpg_image, tmp_path, fmt): "fmt", ["png", "jpg"], ) -def test_resize_contain(png_image, jpg_image, tmp_path, fmt): +def test_resize_contain( + png_image: pathlib.Path, jpg_image: pathlib.Path, tmp_path: pathlib.Path, fmt: str +): src, dst = get_src_dst(tmp_path, fmt, png_image=png_image, jpg_image=jpg_image) width, height = 5, 50 @@ -244,7 +263,9 @@ def test_resize_contain(png_image, jpg_image, tmp_path, fmt): "fmt", ["png", "jpg"], ) -def test_resize_upscale(png_image, jpg_image, tmp_path, fmt): +def test_resize_upscale( + png_image: pathlib.Path, jpg_image: pathlib.Path, tmp_path: pathlib.Path, fmt: str +): src, dst = get_src_dst(tmp_path, fmt, png_image=png_image, jpg_image=jpg_image) width, height = 500, 1000 @@ -258,7 +279,9 @@ def test_resize_upscale(png_image, jpg_image, tmp_path, fmt): "fmt", ["png", "jpg"], ) -def test_resize_small_image_error(png_image, jpg_image, tmp_path, fmt): +def test_resize_small_image_error( + png_image: pathlib.Path, jpg_image: pathlib.Path, tmp_path: pathlib.Path, fmt: str +): src, dst = get_src_dst(tmp_path, fmt, png_image=png_image, jpg_image=jpg_image) width, height = 500, 1000 @@ -278,7 +301,12 @@ def test_resize_small_image_error(png_image, jpg_image, tmp_path, fmt): [("png", "JPEG", "RGB"), ("png", "BMP", None), ("jpg", "JPEG", "CMYK")], ) def test_change_image_format( - png_image, jpg_image, tmp_path, src_fmt, dst_fmt, colorspace + png_image: pathlib.Path, + jpg_image: pathlib.Path, + tmp_path: pathlib.Path, + src_fmt: str, + dst_fmt: str, + colorspace: str | None, ): src, _ = get_src_dst(tmp_path, src_fmt, png_image=png_image, jpg_image=jpg_image) dst = tmp_path / f"out.{dst_fmt.lower()}" @@ -289,7 +317,7 @@ def test_change_image_format( assert dst_image.format == dst_fmt -def test_change_image_format_defaults(png_image, tmp_path): +def test_change_image_format_defaults(png_image: pathlib.Path, tmp_path: pathlib.Path): # PNG to JPEG (loosing alpha) dst = tmp_path.joinpath(f"{png_image.stem}.jpg") convert_image(png_image, dst) @@ -377,7 +405,13 @@ def test_convert_svg_path_src_io_dst(svg_image: pathlib.Path): "fmt,exp_size", [("png", 128), ("jpg", 128)], ) -def test_create_favicon(png_image2, jpg_image, tmp_path, fmt, exp_size): +def test_create_favicon( + png_image2: pathlib.Path, + jpg_image: pathlib.Path, + tmp_path: pathlib.Path, + fmt: str, + exp_size: int, +): src, dst = get_src_dst(tmp_path, fmt, png_image=png_image2, jpg_image=jpg_image) dst = dst.parent.joinpath("favicon.ico") create_favicon(src, dst) @@ -391,7 +425,12 @@ def test_create_favicon(png_image2, jpg_image, tmp_path, fmt, exp_size): "fmt", ["png", "jpg"], ) -def test_create_favicon_square(square_png_image, square_jpg_image, tmp_path, fmt): +def test_create_favicon_square( + square_png_image: pathlib.Path, + square_jpg_image: pathlib.Path, + tmp_path: pathlib.Path, + fmt: str, +): src, dst = get_src_dst( tmp_path, fmt, png_image=square_png_image, jpg_image=square_jpg_image ) @@ -407,7 +446,12 @@ def test_create_favicon_square(square_png_image, square_jpg_image, tmp_path, fmt "fmt", ["png", "jpg"], ) -def test_wrong_extension(square_png_image, square_jpg_image, tmp_path, fmt): +def test_wrong_extension( + square_png_image: pathlib.Path, + square_jpg_image: pathlib.Path, + tmp_path: pathlib.Path, + fmt: str, +): src, dst = get_src_dst( tmp_path, fmt, png_image=square_png_image, jpg_image=square_jpg_image ) @@ -420,7 +464,12 @@ def test_wrong_extension(square_png_image, square_jpg_image, tmp_path, fmt): ["png", "jpg", "gif", "webp"], ) def test_optimize_image_default( - png_image2, jpg_image, gif_image, webp_image, tmp_path, fmt + png_image2: pathlib.Path, + jpg_image: pathlib.Path, + gif_image: pathlib.Path, + webp_image: pathlib.Path, + tmp_path: pathlib.Path, + fmt: str, ): src, dst = get_src_dst( tmp_path, @@ -434,7 +483,7 @@ def test_optimize_image_default( assert os.path.getsize(dst) < os.path.getsize(src) -def test_optimize_image_del_src(png_image, tmp_path): +def test_optimize_image_del_src(png_image: pathlib.Path, tmp_path: pathlib.Path): shutil.copy(png_image, tmp_path) src = tmp_path / png_image.name dst = tmp_path / "out.png" @@ -444,7 +493,7 @@ def test_optimize_image_del_src(png_image, tmp_path): assert not src.exists() -def test_optimize_image_allow_convert(png_image, tmp_path): +def test_optimize_image_allow_convert(png_image: pathlib.Path, tmp_path: pathlib.Path): shutil.copy(png_image, tmp_path) src = tmp_path / png_image.name dst = tmp_path / "out.webp" @@ -453,7 +502,7 @@ def test_optimize_image_allow_convert(png_image, tmp_path): assert dst.exists() and os.path.getsize(dst) > 0 -def test_optimize_image_bad_dst(png_image, tmp_path): +def test_optimize_image_bad_dst(png_image: pathlib.Path, tmp_path: pathlib.Path): shutil.copy(png_image, tmp_path) src = tmp_path / png_image.name dst = tmp_path / "out.raster" @@ -535,15 +584,25 @@ def test_optimize_image_bad_dst(png_image, tmp_path): ], ) def test_preset( - preset, - expected_version, - options, - fmt, - png_image, - jpg_image, - gif_image, - webp_image, - tmp_path, + preset: ( + WebpLow + | WebpMedium + | WebpHigh + | JpegLow + | JpegMedium + | JpegHigh + | PngLow + | PngMedium + | PngHigh + ), + expected_version: int, + options: dict[str, str | bool | int | None], + fmt: str, + png_image: pathlib.Path, + jpg_image: pathlib.Path, + gif_image: pathlib.Path, + webp_image: pathlib.Path, + tmp_path: pathlib.Path, ): assert preset.VERSION == expected_version assert preset.options == options @@ -555,7 +614,12 @@ def test_preset( gif_image=gif_image, webp_image=webp_image, ) - optimize_image(src, dst, delete_src=False, **preset.options) + optimize_image( + src, + dst, + delete_src=False, + **preset.options, # pyright: ignore[reportArgumentType] + ) assert os.path.getsize(dst) < os.path.getsize(src) if fmt in ["jpg", "webp", "png"]: @@ -582,19 +646,23 @@ def test_preset_has_mime_and_ext(): assert preset().mimetype.startswith("image/") -def test_remove_png_transparency(png_image, tmp_path): +def test_remove_png_transparency(png_image: pathlib.Path, tmp_path: pathlib.Path): dst = tmp_path / "out.png" optimize_png(src=png_image, dst=dst, remove_transparency=True) assert os.path.getsize(dst) == 2352 -def test_jpeg_exif_preserve(jpg_exif_image, tmp_path): +def test_jpeg_exif_preserve(jpg_exif_image: pathlib.Path, tmp_path: pathlib.Path): # in filesystem dst = tmp_path / "out.jpg" optimize_jpeg(src=jpg_exif_image, dst=dst) - assert piexif.load(str(dst))["Exif"] and ( - piexif.load(str(dst))["Exif"] - == piexif.load(str(jpg_exif_image.resolve()))["Exif"] + assert piexif.load(str(dst))[ # pyright: ignore[reportUnknownMemberType] + "Exif" + ] and ( + piexif.load(str(dst))["Exif"] # pyright: ignore[reportUnknownMemberType] + == piexif.load( # pyright: ignore[reportUnknownMemberType] + str(jpg_exif_image.resolve()) + )["Exif"] ) # in memory @@ -602,19 +670,24 @@ def test_jpeg_exif_preserve(jpg_exif_image, tmp_path): src_bytes = fl.read() optimized_img = optimize_jpeg(src=io.BytesIO(src_bytes)) assert isinstance(optimized_img, io.BytesIO) - assert piexif.load(optimized_img.getvalue())["Exif"] and ( - piexif.load(src_bytes)["Exif"] == piexif.load(optimized_img.getvalue())["Exif"] + assert piexif.load( # pyright: ignore[reportUnknownMemberType] + optimized_img.getvalue() + )["Exif"] and ( + piexif.load(src_bytes)["Exif"] # pyright: ignore[reportUnknownMemberType] + == piexif.load( # pyright: ignore[reportUnknownMemberType] + optimized_img.getvalue() + )["Exif"] ) -def test_dynamic_jpeg_quality(jpg_image, tmp_path): +def test_dynamic_jpeg_quality(jpg_image: pathlib.Path, tmp_path: pathlib.Path): # check optimization without fast mode dst = tmp_path / "out.jpg" optimize_jpeg(src=jpg_image, dst=dst, fast_mode=False) assert os.path.getsize(dst) < os.path.getsize(jpg_image) -def test_ensure_matches(webp_image): +def test_ensure_matches(webp_image: pathlib.Path): with pytest.raises(ValueError, match=re.escape("is not of format")): ensure_matches(webp_image, "PNG") @@ -624,7 +697,14 @@ def test_ensure_matches(webp_image): [("png", "PNG"), ("jpg", "JPEG"), ("gif", "GIF"), ("webp", "WEBP"), ("svg", "SVG")], ) def test_format_for_real_images_suffix( - png_image, jpg_image, gif_image, webp_image, svg_image, tmp_path, fmt, expected + png_image: pathlib.Path, + jpg_image: pathlib.Path, + gif_image: pathlib.Path, + webp_image: pathlib.Path, + svg_image: pathlib.Path, + tmp_path: pathlib.Path, + fmt: str, + expected: str, ): src, _ = get_src_dst( tmp_path, @@ -643,7 +723,14 @@ def test_format_for_real_images_suffix( [("png", "PNG"), ("jpg", "JPEG"), ("gif", "GIF"), ("webp", "WEBP"), ("svg", "SVG")], ) def test_format_for_real_images_content_path( - png_image, jpg_image, gif_image, webp_image, svg_image, tmp_path, fmt, expected + png_image: pathlib.Path, + jpg_image: pathlib.Path, + gif_image: pathlib.Path, + webp_image: pathlib.Path, + svg_image: pathlib.Path, + tmp_path: pathlib.Path, + fmt: str, + expected: str, ): src, _ = get_src_dst( tmp_path, @@ -662,7 +749,14 @@ def test_format_for_real_images_content_path( [("png", "PNG"), ("jpg", "JPEG"), ("gif", "GIF"), ("webp", "WEBP"), ("svg", "SVG")], ) def test_format_for_real_images_content_bytes( - png_image, jpg_image, gif_image, webp_image, svg_image, tmp_path, fmt, expected + png_image: pathlib.Path, + jpg_image: pathlib.Path, + gif_image: pathlib.Path, + webp_image: pathlib.Path, + svg_image: pathlib.Path, + tmp_path: pathlib.Path, + fmt: str, + expected: str, ): src, _ = get_src_dst( tmp_path, @@ -687,7 +781,7 @@ def test_format_for_real_images_content_bytes( ("image.raster", None), ], ) -def test_format_for_from_suffix(src, expected): +def test_format_for_from_suffix(src: str, expected: str): assert format_for(src=pathlib.Path(src), from_suffix=True) == expected @@ -699,14 +793,19 @@ def test_format_for_cannot_use_suffix_with_byte_array(): assert format_for(src=io.BytesIO(), from_suffix=True) -def test_wrong_extension_optim(tmp_path, png_image): +def test_wrong_extension_optim(tmp_path: pathlib.Path, png_image: pathlib.Path): dst = tmp_path.joinpath("image.jpg") shutil.copy(png_image, dst) with pytest.raises(ValueError, match=re.escape("is not of format JPEG")): optimize_jpeg(dst, dst) -def test_is_valid_image(png_image, png_image2, jpg_image, font): +def test_is_valid_image( + png_image: pathlib.Path, + png_image2: pathlib.Path, + jpg_image: pathlib.Path, + font: pathlib.Path, +): assert is_valid_image(png_image, "PNG") assert not is_valid_image(png_image, "JPEG") assert is_valid_image(jpg_image, "JPEG") @@ -720,13 +819,15 @@ def test_is_valid_image(png_image, png_image2, jpg_image, font): assert is_valid_image(io.BytesIO(fh.read()), "PNG", (48, 48)) -def test_optimize_gif_no_optimize_level(gif_image, tmp_path): +def test_optimize_gif_no_optimize_level( + gif_image: pathlib.Path, tmp_path: pathlib.Path +): optimize_gif(gif_image, tmp_path / "out.gif", delete_src=False, optimize_level=None) -def test_optimize_gif_no_no_extensions(gif_image, tmp_path): +def test_optimize_gif_no_no_extensions(gif_image: pathlib.Path, tmp_path: pathlib.Path): optimize_gif(gif_image, tmp_path / "out.gif", delete_src=False, no_extensions=None) -def test_optimize_gif_no_interlace(gif_image, tmp_path): +def test_optimize_gif_no_interlace(gif_image: pathlib.Path, tmp_path: pathlib.Path): optimize_gif(gif_image, tmp_path / "out.gif", delete_src=False, interlace=None) diff --git a/tests/inputs/test_inputs.py b/tests/inputs/test_inputs.py index 016b3371..652d599c 100644 --- a/tests/inputs/test_inputs.py +++ b/tests/inputs/test_inputs.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - -from __future__ import annotations - import pathlib +from typing import Any import pytest @@ -36,7 +32,7 @@ def test_missing_local(): handle_user_provided_file(source="/some/incorrect/path.txt") -def test_local_copy(png_image): +def test_local_copy(png_image: pathlib.Path): fpath = handle_user_provided_file(source=str(png_image)) assert fpath is not None assert fpath.exists() @@ -44,21 +40,21 @@ def test_local_copy(png_image): assert fpath.stat().st_size == png_image.stat().st_size -def test_local_nocopy(png_image): +def test_local_nocopy(png_image: pathlib.Path): fpath = handle_user_provided_file(source=str(png_image), nocopy=True) assert fpath is not None assert fpath.exists() assert str(fpath) == str(png_image) -def test_remote(valid_http_url): +def test_remote(valid_http_url: str): fpath = handle_user_provided_file(source=valid_http_url) assert fpath is not None assert fpath.exists() assert fpath.suffix == pathlib.Path(valid_http_url).suffix -def test_local_dest(tmp_path, png_image): +def test_local_dest(tmp_path: pathlib.Path, png_image: pathlib.Path): dest = tmp_path / png_image.name fpath = handle_user_provided_file(source=str(png_image), dest=dest) assert fpath is not None @@ -66,7 +62,7 @@ def test_local_dest(tmp_path, png_image): assert fpath == dest -def test_remote_dest(tmp_path, valid_http_url): +def test_remote_dest(tmp_path: pathlib.Path, valid_http_url: str): dest = tmp_path / pathlib.Path(valid_http_url).name fpath = handle_user_provided_file(source=valid_http_url, dest=dest) assert fpath is not None @@ -74,22 +70,24 @@ def test_remote_dest(tmp_path, valid_http_url): assert fpath == dest -def test_local_indir(tmp_path, png_image): +def test_local_indir(tmp_path: pathlib.Path, png_image: pathlib.Path): fpath = handle_user_provided_file(source=str(png_image), in_dir=tmp_path) assert fpath is not None assert fpath.exists() assert fpath.parent == tmp_path -def test_remote_indir(tmp_path, valid_http_url): +def test_remote_indir(tmp_path: pathlib.Path, valid_http_url: str): fpath = handle_user_provided_file(source=valid_http_url, in_dir=tmp_path) assert fpath is not None assert fpath.exists() assert fpath.parent == tmp_path -def test_remote_default_user_agent(valid_http_url, monkeypatch): - def mock_stream_file(**kwargs): +def test_remote_default_user_agent( + valid_http_url: str, monkeypatch: pytest.MonkeyPatch +): + def mock_stream_file(**kwargs: Any): headers = kwargs.get("headers") assert headers is not None user_agent = headers.get("User-Agent") @@ -98,7 +96,7 @@ def mock_stream_file(**kwargs): assert user_agent.endswith(f"({CONTACT})") monkeypatch.setattr( - zimscraperlib.inputs, # pyright: ignore[reportAttributeAccessIssue] + zimscraperlib.inputs, # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType, reportUnknownArgumentType] "stream_file", mock_stream_file, raising=True, @@ -106,8 +104,10 @@ def mock_stream_file(**kwargs): handle_user_provided_file(source=valid_http_url) -def test_remote_provided_user_agent(valid_http_url, valid_user_agent, monkeypatch): - def mock_stream_file(**kwargs): +def test_remote_provided_user_agent( + valid_http_url: str, valid_user_agent: str, monkeypatch: pytest.MonkeyPatch +): + def mock_stream_file(**kwargs: Any): headers = kwargs.get("headers") assert headers is not None user_agent = headers.get("User-Agent") @@ -115,7 +115,7 @@ def mock_stream_file(**kwargs): assert user_agent == valid_user_agent monkeypatch.setattr( - zimscraperlib.inputs, # pyright: ignore[reportAttributeAccessIssue] + zimscraperlib.inputs, # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType, reportUnknownArgumentType] "stream_file", mock_stream_file, raising=True, @@ -123,12 +123,14 @@ def mock_stream_file(**kwargs): handle_user_provided_file(source=valid_http_url, user_agent=valid_user_agent) -def test_remote_provided_none_user_agent(valid_http_url, monkeypatch): - def mock_stream_file(**kwargs): +def test_remote_provided_none_user_agent( + valid_http_url: str, monkeypatch: pytest.MonkeyPatch +): + def mock_stream_file(**kwargs: Any): assert kwargs.get("headers") is None monkeypatch.setattr( - zimscraperlib.inputs, # pyright: ignore[reportAttributeAccessIssue] + zimscraperlib.inputs, # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType, reportUnknownArgumentType] "stream_file", mock_stream_file, raising=True, diff --git a/tests/logging/conftest.py b/tests/logging/conftest.py index c91f00a3..f63c936a 100644 --- a/tests/logging/conftest.py +++ b/tests/logging/conftest.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - import io import uuid @@ -8,10 +5,10 @@ @pytest.fixture(scope="function") -def random_id(): +def random_id() -> str: return uuid.uuid4().hex @pytest.fixture(scope="function") -def console(): +def console() -> io.StringIO: return io.StringIO() diff --git a/tests/logging/test_logging.py b/tests/logging/test_logging.py index 235e66b4..2e5fe2ca 100644 --- a/tests/logging/test_logging.py +++ b/tests/logging/test_logging.py @@ -1,12 +1,17 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - +import io import logging +import pathlib from zimscraperlib.logging import getLogger, nicer_args_join -def assert_message_console(logger, console, level, expected): +def assert_message_console( + logger: logging.Logger, + console: io.StringIO, + level: str, + *, + expected: bool, +): msg = f"a {level} message" getattr(logger, level)(msg) if expected: @@ -15,7 +20,9 @@ def assert_message_console(logger, console, level, expected): assert msg not in console.getvalue() -def assert_message_file(logger, fpath, level, expected): +def assert_message_file( + logger: logging.Logger, fpath: pathlib.Path, level: str, *, expected: bool +): msg = f"a {level} message" getattr(logger, level)(msg) with open(fpath) as file: @@ -34,7 +41,7 @@ def test_args_join(): assert f' {args[1]} "{args[2]}"' in nicer -def test_default(random_id): +def test_default(random_id: str): logger = getLogger(name=random_id) logger.debug("a debug") logger.info("an info") @@ -43,39 +50,39 @@ def test_default(random_id): logger.critical("a critical") -def test_debug_level(random_id, console): +def test_debug_level(random_id: str, console: io.StringIO): logger = getLogger(name=random_id, console=console, level=logging.DEBUG) - assert_message_console(logger, console, "debug", True) + assert_message_console(logger, console, "debug", expected=True) -def test_info_level(random_id, console): +def test_info_level(random_id: str, console: io.StringIO): logger = getLogger(name=random_id, console=console, level=logging.INFO) - assert_message_console(logger, console, "debug", False) - assert_message_console(logger, console, "info", True) + assert_message_console(logger, console, "debug", expected=False) + assert_message_console(logger, console, "info", expected=True) -def test_warning_level(random_id, console): +def test_warning_level(random_id: str, console: io.StringIO): logger = getLogger(name=random_id, console=console, level=logging.WARNING) - assert_message_console(logger, console, "debug", False) - assert_message_console(logger, console, "info", False) - assert_message_console(logger, console, "warning", True) + assert_message_console(logger, console, "debug", expected=False) + assert_message_console(logger, console, "info", expected=False) + assert_message_console(logger, console, "warning", expected=True) -def test_error_level(random_id, console): +def test_error_level(random_id: str, console: io.StringIO): logger = getLogger(name=random_id, console=console, level=logging.ERROR) - assert_message_console(logger, console, "debug", False) - assert_message_console(logger, console, "info", False) - assert_message_console(logger, console, "warning", False) - assert_message_console(logger, console, "error", True) + assert_message_console(logger, console, "debug", expected=False) + assert_message_console(logger, console, "info", expected=False) + assert_message_console(logger, console, "warning", expected=False) + assert_message_console(logger, console, "error", expected=True) -def test_critical_level(random_id, console): +def test_critical_level(random_id: str, console: io.StringIO): logger = getLogger(name=random_id, console=console, level=logging.CRITICAL) - assert_message_console(logger, console, "debug", False) - assert_message_console(logger, console, "info", False) - assert_message_console(logger, console, "warning", False) - assert_message_console(logger, console, "error", False) - assert_message_console(logger, console, "critical", True) + assert_message_console(logger, console, "debug", expected=False) + assert_message_console(logger, console, "info", expected=False) + assert_message_console(logger, console, "warning", expected=False) + assert_message_console(logger, console, "error", expected=False) + assert_message_console(logger, console, "critical", expected=True) def test_format(): @@ -84,7 +91,7 @@ def test_format(): pass -def test_file_logger(random_id, tmp_path): +def test_file_logger(random_id: str, tmp_path: pathlib.Path): log_file = tmp_path / "test.log" logger = getLogger(name=random_id, file=log_file) logger.debug("a debug") @@ -94,62 +101,62 @@ def test_file_logger(random_id, tmp_path): logger.critical("a critical") -def test_debug_level_file(random_id, tmp_path): +def test_debug_level_file(random_id: str, tmp_path: pathlib.Path): log_file = tmp_path / "test.log" logger = getLogger(name=random_id, file=log_file, file_level=logging.DEBUG) - assert_message_file(logger, log_file, "debug", True) + assert_message_file(logger, log_file, "debug", expected=True) -def test_info_level_file(random_id, tmp_path): +def test_info_level_file(random_id: str, tmp_path: pathlib.Path): log_file = tmp_path / "test.log" logger = getLogger(name=random_id, file=log_file, file_level=logging.INFO) - assert_message_file(logger, log_file, "debug", False) - assert_message_file(logger, log_file, "info", True) + assert_message_file(logger, log_file, "debug", expected=False) + assert_message_file(logger, log_file, "info", expected=True) -def test_warning_level_file(random_id, tmp_path): +def test_warning_level_file(random_id: str, tmp_path: pathlib.Path): log_file = tmp_path / "test.log" logger = getLogger(name=random_id, file=log_file, file_level=logging.WARNING) - assert_message_file(logger, log_file, "debug", False) - assert_message_file(logger, log_file, "info", False) - assert_message_file(logger, log_file, "warning", True) + assert_message_file(logger, log_file, "debug", expected=False) + assert_message_file(logger, log_file, "info", expected=False) + assert_message_file(logger, log_file, "warning", expected=True) -def test_error_level_file(random_id, tmp_path): +def test_error_level_file(random_id: str, tmp_path: pathlib.Path): log_file = tmp_path / "test.log" logger = getLogger(name=random_id, file=log_file, file_level=logging.ERROR) - assert_message_file(logger, log_file, "debug", False) - assert_message_file(logger, log_file, "info", False) - assert_message_file(logger, log_file, "warning", False) - assert_message_file(logger, log_file, "error", True) + assert_message_file(logger, log_file, "debug", expected=False) + assert_message_file(logger, log_file, "info", expected=False) + assert_message_file(logger, log_file, "warning", expected=False) + assert_message_file(logger, log_file, "error", expected=True) -def test_critical_level_file(random_id, tmp_path): +def test_critical_level_file(random_id: str, tmp_path: pathlib.Path): log_file = tmp_path / "test.log" logger = getLogger(name=random_id, file=log_file, file_level=logging.CRITICAL) - assert_message_file(logger, log_file, "debug", False) - assert_message_file(logger, log_file, "info", False) - assert_message_file(logger, log_file, "warning", False) - assert_message_file(logger, log_file, "error", False) - assert_message_file(logger, log_file, "critical", True) + assert_message_file(logger, log_file, "debug", expected=False) + assert_message_file(logger, log_file, "info", expected=False) + assert_message_file(logger, log_file, "warning", expected=False) + assert_message_file(logger, log_file, "error", expected=False) + assert_message_file(logger, log_file, "critical", expected=True) -def test_level_fallback(random_id, tmp_path): +def test_level_fallback(random_id: str, tmp_path: pathlib.Path): log_file = tmp_path / "test.log" logger = getLogger(name=random_id, file=log_file, level=logging.CRITICAL) - assert_message_file(logger, log_file, "debug", False) - assert_message_file(logger, log_file, "info", False) - assert_message_file(logger, log_file, "warning", False) - assert_message_file(logger, log_file, "error", False) - assert_message_file(logger, log_file, "critical", True) + assert_message_file(logger, log_file, "debug", expected=False) + assert_message_file(logger, log_file, "info", expected=False) + assert_message_file(logger, log_file, "warning", expected=False) + assert_message_file(logger, log_file, "error", expected=False) + assert_message_file(logger, log_file, "critical", expected=True) -def test_no_output(random_id): +def test_no_output(random_id: str): logger = getLogger(name=random_id, console=None, file=None) logger.error("error") -def test_additional_deps(random_id): +def test_additional_deps(random_id: str): assert logging.getLogger("something").level == logging.NOTSET getLogger(name=random_id, additional_deps=["something"], console=None, file=None) assert logging.getLogger("something").level == logging.WARNING diff --git a/tests/misc/test_misc.py b/tests/misc/test_misc.py index 329a3383..42adf092 100644 --- a/tests/misc/test_misc.py +++ b/tests/misc/test_misc.py @@ -12,5 +12,5 @@ ([None, None, "a", None, "b"], "a"), ], ) -def test_first(args, expected): +def test_first(args: list[str | None], expected: str): assert first(*args) == expected diff --git a/tests/ogvjs/conftest.py b/tests/ogvjs/conftest.py index 20d75054..c2342fd9 100644 --- a/tests/ogvjs/conftest.py +++ b/tests/ogvjs/conftest.py @@ -1,11 +1,8 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - import pytest @pytest.fixture(scope="module") -def videojs_url(): +def videojs_url() -> str: return ( "https://github.com/videojs/video.js/releases/download/v7.6.4/" "video-js-7.6.4.zip" @@ -13,10 +10,10 @@ def videojs_url(): @pytest.fixture(scope="module") -def ogvjs_url(): +def ogvjs_url() -> str: return "https://github.com/brion/ogv.js/releases/download/1.6.1/ogvjs-1.6.1.zip" @pytest.fixture(scope="module") -def videojs_ogvjs_url(): +def videojs_ogvjs_url() -> str: return "https://github.com/hartman/videojs-ogvjs/archive/v1.3.1.zip" diff --git a/tests/ogvjs/test_ogvjs.py b/tests/ogvjs/test_ogvjs.py index f53e6350..2071c222 100644 --- a/tests/ogvjs/test_ogvjs.py +++ b/tests/ogvjs/test_ogvjs.py @@ -1,6 +1,4 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - +import pathlib import shutil import subprocess import zipfile @@ -11,7 +9,9 @@ from zimscraperlib.fix_ogvjs_dist import run -def prepare_ogvjs_folder(tmp_path, videojs_url, ogvjs_url, videojs_ogvjs_url): +def prepare_ogvjs_folder( + tmp_path: pathlib.Path, videojs_url: str, ogvjs_url: str, videojs_ogvjs_url: str +): videojs_zip = tmp_path / "video-js-7.6.4.zip" if not videojs_zip.exists(): save_large_file(videojs_url, videojs_zip) @@ -65,7 +65,9 @@ def test_ogvjs_from_code_missing_params(): @pytest.mark.slow @pytest.mark.installed -def test_ogvjs_installed_script_ok(tmp_path, videojs_url, ogvjs_url, videojs_ogvjs_url): +def test_ogvjs_installed_script_ok( + tmp_path: pathlib.Path, videojs_url: str, ogvjs_url: str, videojs_ogvjs_url: str +): # run from installed script to check real conditions prepare_ogvjs_folder(tmp_path, videojs_url, ogvjs_url, videojs_ogvjs_url) @@ -83,7 +85,9 @@ def test_ogvjs_installed_script_ok(tmp_path, videojs_url, ogvjs_url, videojs_ogv @pytest.mark.slow -def test_ogvjs_from_code_ok(tmp_path, videojs_url, ogvjs_url, videojs_ogvjs_url): +def test_ogvjs_from_code_ok( + tmp_path: pathlib.Path, videojs_url: str, ogvjs_url: str, videojs_ogvjs_url: str +): # run from code to mesure coverage easily prepare_ogvjs_folder(tmp_path, videojs_url, ogvjs_url, videojs_ogvjs_url) diff --git a/tests/rewriting/test_html_rewriting.py b/tests/rewriting/test_html_rewriting.py index 1fddbbc1..48c6db16 100644 --- a/tests/rewriting/test_html_rewriting.py +++ b/tests/rewriting/test_html_rewriting.py @@ -476,7 +476,7 @@ def test_head_insert(): ) def test_js_module_detected_script(js_src: str, expected_js_module_path: str): - js_modules = [] + js_modules: list[ZimPath] = [] def custom_notify(zim_path: ZimPath): js_modules.append(zim_path) @@ -511,7 +511,7 @@ def custom_notify(zim_path: ZimPath): ) def test_js_module_detected_module_preload(js_src: str, expected_js_module_path: str): - js_modules = [] + js_modules: list[ZimPath] = [] def custom_notify(zim_path: ZimPath): js_modules.append(zim_path) @@ -539,7 +539,7 @@ def custom_notify(zim_path: ZimPath): ) def test_no_js_module_detected(script_src: str): - js_modules = [] + js_modules: list[ZimPath] = [] def custom_notify(zim_path: ZimPath): js_modules.append(zim_path) @@ -558,7 +558,7 @@ def custom_notify(zim_path: ZimPath): def test_js_module_base_href_src(): - js_modules = [] + js_modules: list[ZimPath] = [] def custom_notify(zim_path: ZimPath): js_modules.append(zim_path) @@ -582,7 +582,7 @@ def custom_notify(zim_path: ZimPath): def test_js_module_base_href_inline(): - js_modules = [] + js_modules: list[ZimPath] = [] def custom_notify(zim_path: ZimPath): js_modules.append(zim_path) @@ -732,7 +732,7 @@ def test_extract_base_href(html_content: str, expected_base_href: str): ), ] ) -def rewrite_base_href_content(request): +def rewrite_base_href_content(request: pytest.FixtureRequest): yield request.param @@ -1070,7 +1070,7 @@ def test_bad_html_drop_rules_argument_name(): with pytest.raises(TypeError, match="Parameter .* is unsupported in function"): @bad_rules.drop_attribute() - def bad_signature(foo: str) -> bool: + def bad_signature(foo: str) -> bool: # pyright: ignore[reportUnusedFunction] return foo == "bar" diff --git a/tests/rewriting/test_rx_replacer.py b/tests/rewriting/test_rx_replacer.py index a2e24efe..13e35441 100644 --- a/tests/rewriting/test_rx_replacer.py +++ b/tests/rewriting/test_rx_replacer.py @@ -99,7 +99,7 @@ def test_rx_rewriter(text: str, expected: str): def test_rx_rewriter_no_rules(): rewriter = RxRewriter() - rewriter._compile_rules( + rewriter._compile_rules( # pyright: ignore[reportPrivateUsage] [ (re.compile(""), replace("pla", "123")), ] diff --git a/tests/rewriting/test_url_rewriting.py b/tests/rewriting/test_url_rewriting.py index 54775134..657197bf 100644 --- a/tests/rewriting/test_url_rewriting.py +++ b/tests/rewriting/test_url_rewriting.py @@ -102,19 +102,22 @@ class TestNormalize: ), ], ) - def test_normalize(self, url, zim_path): + def test_normalize(self, url: str, zim_path: str): assert ( ArticleUrlRewriter.normalize(HttpUrl(url)).value == ZimPath(zim_path).value ) +_empty_zimpath_set: set[ZimPath] = set() + + class TestArticleUrlRewriter: @pytest.mark.parametrize( "original_content_url, expected_missing_zim_paths", [ ( "foo.html", - set(), + _empty_zimpath_set, ), ( "bar.html", @@ -128,7 +131,7 @@ def test_missing_zim_paths( expected_missing_zim_paths: set[ZimPath], ): http_article_url = HttpUrl("https://kiwix.org/a/article/document.html") - missing_zim_paths = set() + missing_zim_paths: set[ZimPath] = set() rewriter = ArticleUrlRewriter( article_url=http_article_url, existing_zim_paths={ZimPath("kiwix.org/a/article/foo.html")}, diff --git a/tests/types/test_types.py b/tests/types/test_types.py index 2f26e97f..8337d73e 100644 --- a/tests/types/test_types.py +++ b/tests/types/test_types.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu - import pytest from zimscraperlib.types import ARTICLE_MIME, FONT_MIMES, get_mime_for_name @@ -41,8 +38,13 @@ def test_constants(): ("assets/test.css", "text/css", None, None), ], ) -def test_mime_for_name(filename, fallback, expected_mime, no_ext_to): - kwargs = {} +def test_mime_for_name( + filename: str, + fallback: str | None, + expected_mime: str | tuple[str], + no_ext_to: str | None, +): + kwargs: dict[str, str] = {} if fallback is not None: kwargs.update({"fallback": fallback}) if no_ext_to is not None: diff --git a/tests/uri/test_uri.py b/tests/uri/test_uri.py index 91493e96..802fca82 100644 --- a/tests/uri/test_uri.py +++ b/tests/uri/test_uri.py @@ -1,4 +1,5 @@ import urllib.parse +from typing import Any import pytest @@ -50,5 +51,5 @@ ), ], ) -def test_rebuild_uri(uri, changes, expected): +def test_rebuild_uri(uri: str, changes: dict[str, Any], expected: str): assert rebuild_uri(urllib.parse.urlparse(uri), **changes).geturl() == expected diff --git a/tests/video/conftest.py b/tests/video/conftest.py index d2b45de9..cb2f742f 100644 --- a/tests/video/conftest.py +++ b/tests/video/conftest.py @@ -1,13 +1,10 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - import pathlib import pytest @pytest.fixture(scope="function") -def test_files(): +def test_files() -> dict[str, pathlib.Path]: files_dir = pathlib.Path(__file__).parent.parent.joinpath("files") return { "mp4": files_dir.joinpath("video.mp4"), diff --git a/tests/video/test_encoding.py b/tests/video/test_encoding.py index ae9bf08a..755d549c 100644 --- a/tests/video/test_encoding.py +++ b/tests/video/test_encoding.py @@ -1,12 +1,12 @@ -from __future__ import annotations - import re from copy import deepcopy from pathlib import Path import pytest -from zimscraperlib.video.encoding import _build_ffmpeg_args +from zimscraperlib.video.encoding import ( + _build_ffmpeg_args, # pyright: ignore[reportPrivateUsage] +) from zimscraperlib.video.presets import VideoWebmLow diff --git a/tests/video/test_video.py b/tests/video/test_video.py index 61271264..684d66d6 100644 --- a/tests/video/test_video.py +++ b/tests/video/test_video.py @@ -1,11 +1,9 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu - import inspect import pathlib import shutil import subprocess import tempfile +from typing import Any import pytest @@ -25,7 +23,14 @@ ] -def copy_media_and_reencode(temp_dir, src, dest, ffmpeg_args, test_files, **kwargs): +def copy_media_and_reencode( + temp_dir: pathlib.Path, + src: str, + dest: str, + ffmpeg_args: list[str], + test_files: dict[str, pathlib.Path], + **kwargs: Any, +): src_path = temp_dir.joinpath(src) dest_path = temp_dir.joinpath(dest) shutil.copy2(test_files[src_path.suffix[1:]], src_path) @@ -53,6 +58,14 @@ def test_config_update(): assert getattr(config, k) == v +@pytest.mark.parametrize( + "config, expected_scale", + [(VoiceMp3Low(), None), (VideoWebmLow(), "480:trunc(ow/a/2)*2")], +) +def test_config_video_scale(config: Config, expected_scale: str | None): + assert config.video_scale == expected_scale + + def test_config_build_from(): config = Config.build_from( video_codec="h264", @@ -84,9 +97,9 @@ def test_config_build_from(): assert idx != -1 assert args[idx + 1] == str(getattr(config, attr)) video_scale = config.video_scale - qmin, qmax = ( - config.quantizer_scale_range # pyright: ignore[reportGeneralTypeIssues] - ) + scale_range = config.quantizer_scale_range + assert scale_range + qmin, qmax = scale_range assert args.index("-qmin") != -1 and args[args.index("-qmin") + 1] == str(qmin) assert args.index("-qmax") != -1 and args[args.index("-qmax") + 1] == str(qmax) assert ( @@ -128,7 +141,12 @@ def test_config_build_from(): ), ], ) -def test_get_media_info(media_format, media, expected, test_files): +def test_get_media_info( + media_format: str, + media: str, + expected: dict[str, Any], + test_files: dict[str, pathlib.Path], +): with tempfile.TemporaryDirectory() as t: src = pathlib.Path(t).joinpath(media) shutil.copy2(test_files[media_format], src) @@ -359,7 +377,13 @@ def test_preset_voice_mp3_low(): ), ], ) -def test_reencode_media(src, dest, ffmpeg_args, expected, test_files): +def test_reencode_media( + src: str, + dest: str, + ffmpeg_args: list[str], + expected: dict[str, Any], + test_files: dict[str, pathlib.Path], +): with tempfile.TemporaryDirectory() as t: temp_dir = pathlib.Path(t) copy_media_and_reencode(temp_dir, src, dest, ffmpeg_args, test_files) @@ -386,7 +410,14 @@ def test_reencode_media(src, dest, ffmpeg_args, expected, test_files): ), ], ) -def test_reencode_delete_src(src, dest, ffmpeg_args, delete_src, test_files): +def test_reencode_delete_src( + src: str, + dest: str, + ffmpeg_args: list[str], + test_files: dict[str, pathlib.Path], + *, + delete_src: bool, +): with tempfile.TemporaryDirectory() as t: temp_dir = pathlib.Path(t) src_path = temp_dir.joinpath(src) @@ -402,42 +433,32 @@ def test_reencode_delete_src(src, dest, ffmpeg_args, delete_src, test_files): @pytest.mark.slow @pytest.mark.parametrize( - "src,dest,ffmpeg_args,return_output", + "src,dest,ffmpeg_args", [ ( "video.mp4", "video.webm", VideoWebmLow().to_ffmpeg_args(), - True, - ), - ( - "video.mp4", - "audio.mp3", - VoiceMp3Low().to_ffmpeg_args(), - False, ), ], ) def test_reencode_return_ffmpeg_output( - src, dest, ffmpeg_args, return_output, test_files + src: str, + dest: str, + ffmpeg_args: list[str], + test_files: dict[str, pathlib.Path], ): with tempfile.TemporaryDirectory() as t: temp_dir = pathlib.Path(t) - ret = copy_media_and_reencode( + success, process = copy_media_and_reencode( temp_dir, src, dest, ffmpeg_args, test_files, - with_process=return_output, ) - if return_output: - assert not isinstance(ret, bool) - success, process = ret - assert success - assert len(process.stdout) > 0 - else: - assert ret + assert success + assert len(process.stdout) > 0 @pytest.mark.slow @@ -458,7 +479,14 @@ def test_reencode_return_ffmpeg_output( ), ], ) -def test_reencode_failsafe(src, dest, ffmpeg_args, failsafe, test_files): +def test_reencode_failsafe( + src: str, + dest: str, + ffmpeg_args: list[str], + test_files: dict[str, pathlib.Path], + *, + failsafe: bool, +): with tempfile.TemporaryDirectory() as t: temp_dir = pathlib.Path(t) if not failsafe: @@ -476,5 +504,5 @@ def test_reencode_failsafe(src, dest, ffmpeg_args, failsafe, test_files): else: success = copy_media_and_reencode( temp_dir, src, dest, ffmpeg_args, test_files, failsafe=failsafe - ) + )[0] assert not success diff --git a/tests/zim/conftest.py b/tests/zim/conftest.py index c2ff66ae..1a0fc42d 100644 --- a/tests/zim/conftest.py +++ b/tests/zim/conftest.py @@ -1,5 +1,6 @@ -#!/usr/bin/env python3 -# vim: ai ts=4 sts=4 et sw=4 nu +import pathlib +from collections.abc import Generator +from typing import Any import pytest @@ -7,7 +8,7 @@ @pytest.fixture(scope="function") -def html_str(): +def html_str() -> str: """sample HTML content with various links""" return """ @@ -27,7 +28,7 @@ def html_str(): @pytest.fixture(scope="function") -def html_str_cn(): +def html_str_cn() -> str: """sample HTML content with chinese characters""" return """ @@ -47,7 +48,7 @@ def html_str_cn(): @pytest.fixture(scope="function") -def html_file(tmp_path, html_str): +def html_file(tmp_path: pathlib.Path, html_str: str) -> pathlib.Path: fpath = tmp_path / "test.html" with open(fpath, "w") as fh: fh.write(html_str) @@ -55,7 +56,7 @@ def html_file(tmp_path, html_str): @pytest.fixture(scope="function") -def build_data(tmp_path, png_image): +def build_data(tmp_path: pathlib.Path, png_image: pathlib.Path) -> dict[str, Any]: fpath = tmp_path / "test.zim" redirects_file = tmp_path / "redirects.tsv" with open(redirects_file, "w") as fh: @@ -106,7 +107,7 @@ def counters(): @pytest.fixture -def ignore_metadata_conventions(): +def ignore_metadata_conventions() -> Generator[None]: zimscraperlib.zim.metadata.APPLY_RECOMMENDATIONS = False - yield + yield None zimscraperlib.zim.metadata.APPLY_RECOMMENDATIONS = True diff --git a/tests/zim/test_archive.py b/tests/zim/test_archive.py index 9ec27309..fbeed262 100644 --- a/tests/zim/test_archive.py +++ b/tests/zim/test_archive.py @@ -1,5 +1,4 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu +import pathlib import pytest @@ -7,7 +6,7 @@ from zimscraperlib.zim._libkiwix import convertTags -def test_metadata(small_zim_file): +def test_metadata(small_zim_file: pathlib.Path): with Archive(small_zim_file) as zim: assert zim.metadata == { "Counter": "image/png=1;text/html=1", @@ -22,7 +21,7 @@ def test_metadata(small_zim_file): } -def test_entry_by_id(small_zim_file): +def test_entry_by_id(small_zim_file: pathlib.Path): with Archive(small_zim_file) as zim: for id_ in range(zim.all_entry_count): assert zim.get_entry_by_id(id_) @@ -30,26 +29,26 @@ def test_entry_by_id(small_zim_file): zim.get_entry_by_id(zim.all_entry_count + 1) -def test_get_item(small_zim_file): +def test_get_item(small_zim_file: pathlib.Path): with Archive(small_zim_file) as zim: assert zim.get_item("main.html").mimetype == "text/html" assert len(zim.get_content("main.html")) == 207 -def test_suggestions(small_zim_file): +def test_suggestions(small_zim_file: pathlib.Path): with Archive(small_zim_file) as zim: assert zim.get_suggestions_count("test") == 1 assert list(zim.get_suggestions("test")) == ["main.html"] -def test_suggestions_end_index(small_zim_file): +def test_suggestions_end_index(small_zim_file: pathlib.Path): with Archive(small_zim_file) as zim: assert zim.get_suggestions_count("test") == 1 assert len(list(zim.get_suggestions("test", end=0))) == 0 assert list(zim.get_suggestions("test", end=1)) == ["main.html"] -def test_search_no_fti(small_zim_file): +def test_search_no_fti(small_zim_file: pathlib.Path): with Archive(small_zim_file) as zim: with pytest.raises( RuntimeError, match="Cannot create Search without FT Xapian index" @@ -62,14 +61,14 @@ def test_search_no_fti(small_zim_file): @pytest.mark.slow -def test_search(real_zim_file): +def test_search(real_zim_file: pathlib.Path): with Archive(real_zim_file) as zim: assert zim.get_search_results_count("test") > 0 assert "A/Diesel_emissions_scandal" in list(zim.get_search_results("test")) @pytest.mark.slow -def test_search_end_index(real_zim_file): +def test_search_end_index(real_zim_file: pathlib.Path): with Archive(real_zim_file) as zim: assert list(zim.get_search_results("test", end=0)) == [] assert "A/Diesel_emissions_scandal" in list( @@ -77,12 +76,12 @@ def test_search_end_index(real_zim_file): ) -def test_counters(small_zim_file): +def test_counters(small_zim_file: pathlib.Path): with Archive(small_zim_file) as zim: assert zim.counters == {"image/png": 1, "text/html": 1} -def test_get_tags(small_zim_file, real_zim_file): +def test_get_tags(small_zim_file: pathlib.Path, real_zim_file: pathlib.Path): with Archive(small_zim_file) as zim: assert zim.get_tags() == ["_ftindex:no"] assert zim.get_tags(libkiwix=True) == [ diff --git a/tests/zim/test_fs.py b/tests/zim/test_fs.py index 2ba5cfc1..864d221a 100644 --- a/tests/zim/test_fs.py +++ b/tests/zim/test_fs.py @@ -1,10 +1,8 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu - import pathlib import shutil import subprocess import sys +from typing import Any import pytest @@ -21,7 +19,7 @@ ) -def test_fileitem(tmp_path, png_image): +def test_fileitem(tmp_path: pathlib.Path, png_image: pathlib.Path): fpath = tmp_path / png_image.name shutil.copyfile(png_image, fpath) @@ -32,7 +30,9 @@ def test_fileitem(tmp_path, png_image): assert article.get_mimetype() == "image/png" -def test_redirects_file(tmp_path, png_image, build_data): +def test_redirects_file( + tmp_path: pathlib.Path, png_image: pathlib.Path, build_data: dict[str, Any] +): build_data["build_dir"].mkdir() shutil.copyfile(png_image, build_data["build_dir"] / png_image.name) build_data["redirects_file"] = tmp_path / "toto.tsv" @@ -55,14 +55,14 @@ def test_redirects_file(tmp_path, png_image, build_data): ) -def test_make_zim_file_fail_nobuildir(build_data): +def test_make_zim_file_fail_nobuildir(build_data: dict[str, Any]): # ensure we fail on missing build dir with pytest.raises(IOError): make_zim_file(**build_data) assert not build_data["fpath"].exists() -def test_make_zim_file_fail_noillustration(build_data): +def test_make_zim_file_fail_noillustration(build_data: dict[str, Any]): # ensure we fail on missing illustration build_data["build_dir"].mkdir() with pytest.raises(IOError): @@ -75,7 +75,11 @@ def test_make_zim_file_fail_noillustration(build_data): [(True, True), (True, False), (False, True), (False, False)], ) def test_make_zim_file_working( - build_data, png_image, with_redirects, with_redirects_file + build_data: dict[str, Any], + png_image: pathlib.Path, + *, + with_redirects: bool, + with_redirects_file: bool, ): build_data["build_dir"].mkdir() @@ -116,7 +120,9 @@ def test_make_zim_file_working( assert "welcome" in list(reader.get_suggestions("coucou")) -def test_make_zim_file_exceptions_while_building(tmp_path, png_image, build_data): +def test_make_zim_file_exceptions_while_building( + tmp_path: pathlib.Path, png_image: pathlib.Path, build_data: dict[str, Any] +): build_data["build_dir"].mkdir() shutil.copyfile(png_image, build_data["build_dir"] / png_image.name) build_data["redirects_file"] = tmp_path / "toto.tsv" @@ -126,7 +132,9 @@ def test_make_zim_file_exceptions_while_building(tmp_path, png_image, build_data assert build_data["fpath"].exists() -def test_make_zim_file_no_file_on_error(tmp_path, png_image, build_data): +def test_make_zim_file_no_file_on_error( + tmp_path: pathlib.Path, png_image: pathlib.Path, build_data: dict[str, Any] +): build_data["build_dir"].mkdir() shutil.copyfile(png_image, build_data["build_dir"] / png_image.name) build_data["redirects_file"] = tmp_path / "toto.tsv" @@ -172,36 +180,36 @@ def valid_zim_filename(): return "myfile.zim" -def test_validate_folder_writable_not_exists(tmp_path): +def test_validate_folder_writable_not_exists(tmp_path: pathlib.Path): with pytest.raises(MissingFolderError): validate_folder_writable(tmp_path / "foo") -def test_validate_folder_writable_not_dir(tmp_path): +def test_validate_folder_writable_not_dir(tmp_path: pathlib.Path): with pytest.raises(NotADirectoryFolderError): (tmp_path / "foo.txt").touch() validate_folder_writable(tmp_path / "foo.txt") -def test_validate_folder_writable_not_writable(tmp_path): +def test_validate_folder_writable_not_writable(tmp_path: pathlib.Path): with pytest.raises(NotWritableFolderError): (tmp_path / "foo").mkdir(mode=111) validate_folder_writable(tmp_path / "foo") -def test_validate_folder_writable_ok(tmp_path): +def test_validate_folder_writable_ok(tmp_path: pathlib.Path): validate_folder_writable(tmp_path) -def test_validate_file_creatable_ok(tmp_path, valid_zim_filename): +def test_validate_file_creatable_ok(tmp_path: pathlib.Path, valid_zim_filename: str): validate_file_creatable(tmp_path, valid_zim_filename) -def test_validate_file_creatable_bad_name(tmp_path): +def test_validate_file_creatable_bad_name(tmp_path: pathlib.Path): with pytest.raises(IncorrectFilenameError): validate_file_creatable(tmp_path, "t\0t\0.zim") diff --git a/tests/zim/test_indexing.py b/tests/zim/test_indexing.py index 9cc036c5..6f5013f8 100644 --- a/tests/zim/test_indexing.py +++ b/tests/zim/test_indexing.py @@ -9,7 +9,7 @@ from zimscraperlib.zim.items import StaticItem -def test_indexing_html_with_libzim(tmp_path, html_file): +def test_indexing_html_with_libzim(tmp_path: pathlib.Path, html_file: pathlib.Path): """Two HTML entries automatically indexed by libzim""" fpath = tmp_path / "test.zim" main_path = "welcome" @@ -31,7 +31,7 @@ def test_indexing_html_with_libzim(tmp_path, html_file): assert reader.get_search_results_count("foo") == 1 -def test_indexing_disabled(tmp_path, html_file): +def test_indexing_disabled(tmp_path: pathlib.Path, html_file: pathlib.Path): """One HTML entry is disabled from libzim indexing""" fpath = tmp_path / "test.zim" main_path = "welcome" @@ -56,7 +56,7 @@ def test_indexing_disabled(tmp_path, html_file): assert reader.get_search_results_count("foo") == 1 -def test_indexing_custom(tmp_path, html_file): +def test_indexing_custom(tmp_path: pathlib.Path, html_file: pathlib.Path): """One HTML entry has custom indexing data""" fpath = tmp_path / "test.zim" main_path = "welcome" @@ -82,7 +82,7 @@ def test_indexing_custom(tmp_path, html_file): assert "welcome" in list(reader.get_suggestions("blu")) -def test_indexing_item_is_front(tmp_path, png_image): +def test_indexing_item_is_front(tmp_path: pathlib.Path, png_image: pathlib.Path): """Create a ZIM with a single item with customized title and content for indexing""" fpath = tmp_path / "test.zim" main_path = "welcome" @@ -111,7 +111,7 @@ def test_indexing_item_is_front(tmp_path, png_image): assert reader.get_search_results_count("food") == 0 -def test_indexing_item_not_front(tmp_path, png_image): +def test_indexing_item_not_front(tmp_path: pathlib.Path, png_image: pathlib.Path): fpath = tmp_path / "test.zim" main_path = "welcome" with Creator(fpath, main_path).config_dev_metadata() as creator: @@ -174,7 +174,9 @@ def _assert_png_zim(fpath: pathlib.Path): assert "welcome" not in list(reader.get_suggestions("feed")) -def test_indexing_item_pdf_filepath(tmp_path, encrypted_pdf_file): +def test_indexing_item_pdf_filepath( + tmp_path: pathlib.Path, encrypted_pdf_file: pathlib.Path +): """A PDF item can be automatically indexed""" fpath = tmp_path / "test.zim" main_path = "welcome" @@ -189,7 +191,9 @@ def test_indexing_item_pdf_filepath(tmp_path, encrypted_pdf_file): _assert_pdf_zim(fpath) -def test_indexing_item_pdf_fileobj(tmp_path, encrypted_pdf_file): +def test_indexing_item_pdf_fileobj( + tmp_path: pathlib.Path, encrypted_pdf_file: pathlib.Path +): """A PDF item can be automatically indexed""" fpath = tmp_path / "test.zim" main_path = "welcome" @@ -206,7 +210,9 @@ def test_indexing_item_pdf_fileobj(tmp_path, encrypted_pdf_file): _assert_pdf_zim(fpath) -def test_indexing_item_pdf_content(tmp_path, encrypted_pdf_file): +def test_indexing_item_pdf_content( + tmp_path: pathlib.Path, encrypted_pdf_file: pathlib.Path +): """A PDF item can be automatically indexed""" fpath = tmp_path / "test.zim" main_path = "welcome" @@ -222,7 +228,7 @@ def test_indexing_item_pdf_content(tmp_path, encrypted_pdf_file): _assert_pdf_zim(fpath) -def test_indexing_item_png_filepath(tmp_path, png_image): +def test_indexing_item_png_filepath(tmp_path: pathlib.Path, png_image: pathlib.Path): """A PNG item cannot automatically be indexed but it works properly""" fpath = tmp_path / "test.zim" main_path = "welcome" @@ -237,7 +243,7 @@ def test_indexing_item_png_filepath(tmp_path, png_image): ) -def test_indexing_item_png_fileobj(tmp_path, png_image): +def test_indexing_item_png_fileobj(tmp_path: pathlib.Path, png_image: pathlib.Path): """A PNG item cannot automatically be indexed but it works properly""" fpath = tmp_path / "test.zim" main_path = "welcome" @@ -255,7 +261,7 @@ def test_indexing_item_png_fileobj(tmp_path, png_image): _assert_png_zim(fpath) -def test_indexing_item_png_content(tmp_path, png_image): +def test_indexing_item_png_content(tmp_path: pathlib.Path, png_image: pathlib.Path): """A PNG item cannot automatically be indexed but it works properly""" fpath = tmp_path / "test.zim" main_path = "welcome" @@ -283,13 +289,13 @@ def test_indexing_item_png_content(tmp_path, png_image): ], ) def test_get_pdf_index_data( - pdf_no, - expected_title, - expected_word_count, - encrypted_pdf_file, - encrypted_pdf_content, - big_pdf_file, - big_pdf_content, + pdf_no: int, + expected_title: str, + expected_word_count: int, + encrypted_pdf_file: pathlib.Path, + encrypted_pdf_content: pathlib.Path, + big_pdf_file: pathlib.Path, + big_pdf_content: pathlib.Path, ): index_data = get_pdf_index_data( @@ -307,7 +313,9 @@ def test_get_pdf_index_data( assert index_data.get_keywords() == "" -def test_indexing_item_pdf_custom_title(tmp_path, encrypted_pdf_file): +def test_indexing_item_pdf_custom_title( + tmp_path: pathlib.Path, encrypted_pdf_file: pathlib.Path +): """Test case with a custom title is passed, it is not overwritten by PDF title""" fpath = tmp_path / "test.zim" main_path = "welcome" @@ -349,7 +357,9 @@ def test_indexing_item_pdf_custom_title(tmp_path, encrypted_pdf_file): ), # wordcount is passed so it is not automatically computed ], ) -def test_index_data_wordcount(content, wordcount, expected_wordcount): +def test_index_data_wordcount( + content: str, wordcount: int | None, expected_wordcount: int +): assert ( IndexData(title="foo", content=content, wordcount=wordcount).get_wordcount() == expected_wordcount diff --git a/tests/zim/test_libkiwix.py b/tests/zim/test_libkiwix.py index de4e6fbd..15096aa0 100644 --- a/tests/zim/test_libkiwix.py +++ b/tests/zim/test_libkiwix.py @@ -1,13 +1,10 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu - import io import pytest -from zimscraperlib.zim._libkiwix import getline, parseMimetypeCounter +from zimscraperlib.zim._libkiwix import CounterMap, getline, parseMimetypeCounter -empty = {} +empty: CounterMap = {} def test_geline_nodelim(): @@ -77,6 +74,6 @@ def test_getline(): ("text/html=50;;foo", {"text/html": 50}), ], ) -def test_counter_parsing(counter_str, counter_map): +def test_counter_parsing(counter_str: str, counter_map: CounterMap): # https://github.com/kiwix/libkiwix/blob/master/test/counterParsing.cpp assert parseMimetypeCounter(counter_str) == counter_map diff --git a/tests/zim/test_metadata.py b/tests/zim/test_metadata.py index b05b1f3e..03d7b6bf 100644 --- a/tests/zim/test_metadata.py +++ b/tests/zim/test_metadata.py @@ -1,11 +1,10 @@ -from __future__ import annotations - import base64 import dataclasses import datetime import io import pathlib import re +from collections.abc import Iterable from types import NoneType from typing import BinaryIO, NamedTuple @@ -90,7 +89,7 @@ def test_validate_language_invalid(value: list[str] | str, exception: type, erro pytest.param(["wikipedia", "wikipedia"], True, id="two_identical"), ], ) -def test_validate_tags_valid(tags, is_valid): +def test_validate_tags_valid(tags: Iterable[str] | str, *, is_valid: bool): if is_valid: metadata.TagsMetadata(tags) else: @@ -142,7 +141,7 @@ def test_validate_too_long_title_check_enabled(): def test_validate_too_long_title_check_disabled( - ignore_metadata_conventions, # noqa: ARG001 + ignore_metadata_conventions: NoneType, # noqa: ARG001 ): assert metadata.TitleMetadata("T" * 31) @@ -161,7 +160,7 @@ def test_validate_too_long_description_check_enabled(): def test_validate_too_long_description_check_disabled( - ignore_metadata_conventions, # noqa: ARG001 + ignore_metadata_conventions: NoneType, # noqa: ARG001 ): assert metadata.DescriptionMetadata("T" * 81) @@ -180,7 +179,7 @@ def test_validate_too_long_longdescription_check_enabled(): def test_validate_too_long_longdescription_check_disabled( - ignore_metadata_conventions, # noqa: ARG001 + ignore_metadata_conventions: NoneType, # noqa: ARG001 ): assert metadata.LongDescriptionMetadata("T" * 4001) @@ -202,9 +201,9 @@ def test_validate_date_invalid_datee(): @pytest.mark.parametrize("value", [("9999-99-99"), ("2023/02/29"), ("1969-13-31")]) -def test_validate_date_invalid_datetype(value): +def test_validate_date_invalid_datetype(value: str): with pytest.raises(InvalidType, match="violates type hint"): - metadata.DateMetadata(value) + metadata.DateMetadata(value) # pyright: ignore[reportArgumentType] def test_validate_illustration_invalid_image(): @@ -214,7 +213,7 @@ def test_validate_illustration_invalid_image(): metadata.IllustrationMetadata(b"PN", size=48) -def test_validate_illustration_wrong_sizes(png_image2): +def test_validate_illustration_wrong_sizes(png_image2: pathlib.Path): with open(png_image2, "rb") as fh: png_data = fh.read() with pytest.raises( @@ -332,7 +331,7 @@ def test_nonreserved_custom(metadata_name: str, metadata_type: type): def test_mandatory_value(metadata_init: MetadataInitConfig): - if not metadata_init.a_type.is_required: + if not getattr(metadata_init.a_type, "is_required", False): pytest.skip("Only testing mandatory ones") with pytest.raises(ValueError, match="Missing"): if issubclass(metadata_init.a_type, metadata.TextListBasedMetadata): @@ -344,8 +343,8 @@ def test_mandatory_value(metadata_init: MetadataInitConfig): elif issubclass(metadata_init.a_type, metadata.DateBasedMetadata): pytest.skip("Cannot set an empty Date") elif issubclass(metadata_init.a_type, metadata.DefaultIllustrationMetadata): - metadata_init.a_type(b"") - metadata_init.a_type(b" ") + metadata_init.a_type(b"") # pyright:ignore[reportUnknownMemberType] + metadata_init.a_type(b" ") # pyright:ignore[reportUnknownMemberType] elif get_classvar_value_type(metadata_init.a_type) is bytes: if metadata_init.nb_args == 1: metadata_init.a_type(b"") # pyright: ignore[reportCallIssue] @@ -433,22 +432,18 @@ def test_libzim_bytes_value(metadata_init: MetadataInitConfig, png_image: pathli def test_libzim_io_bytesio_value( metadata_init: MetadataInitConfig, png_image: pathlib.Path ): - if isinstance(metadata_init.a_type, metadata.IllustrationBasedMetadata): + if metadata_init.a_type == metadata.DefaultIllustrationMetadata: with open(png_image, "rb") as fh: png_data = fh.read() - if metadata_init.nb_args == 1: - assert ( - metadata_init.a_type(value=io.BytesIO(png_data)).libzim_value - == png_data - ) - else: - assert ( - metadata_init.a_type( - value=io.BytesIO(png_data), size=48 - ).libzim_value - == png_data - ) - if get_classvar_value_type(metadata_init.a_type) in ( + assert metadata_init.a_type(value=io.BytesIO(png_data)).libzim_value == png_data + elif metadata_init.a_type == metadata.IllustrationMetadata: + with open(png_image, "rb") as fh: + png_data = fh.read() + assert ( + metadata_init.a_type(value=io.BytesIO(png_data), size=48).libzim_value + == png_data + ) + elif get_classvar_value_type(metadata_init.a_type) in ( bytes, BinaryIO, io.BytesIO, @@ -475,7 +470,7 @@ def test_std_metadata_values(): values = test_value.values() assert len(values) == 9 - expected_values: list[metadata.Metadata] = [ + expected_values: list[metadata.AnyMetadata] = [ metadata.LicenseMetadata("Creative Commons CC0"), metadata.NameMetadata("Test Name"), metadata.TitleMetadata("Test Title"), @@ -571,7 +566,7 @@ def test_default_dev_zim_metadata(): assert len(metadata.DEFAULT_DEV_ZIM_METADATA.values()) == 8 -def test_get_binary_from(png_image): +def test_get_binary_from(png_image: pathlib.Path): with open(png_image, "rb") as fh: png_data = fh.read() # bytes input diff --git a/tests/zim/test_zim_creator.py b/tests/zim/test_zim_creator.py index 36380374..7cc1d4c2 100644 --- a/tests/zim/test_zim_creator.py +++ b/tests/zim/test_zim_creator.py @@ -1,8 +1,3 @@ -#!/usr/bin/env python -# vim: ai ts=4 sts=4 et sw=4 nu - -from __future__ import annotations - import datetime import io import logging @@ -13,7 +8,8 @@ import sys import tempfile import time -from typing import NamedTuple +from types import NoneType +from typing import Any, NamedTuple from unittest.mock import call, patch import pytest @@ -26,6 +22,7 @@ from zimscraperlib.zim import Archive, Creator, StaticItem, URLItem from zimscraperlib.zim.metadata import ( DEFAULT_DEV_ZIM_METADATA, + AnyMetadata, CreatorMetadata, CustomMetadata, CustomTextMetadata, @@ -71,7 +68,13 @@ def get_contentprovider(self): return FileLikeProvider(self.fileobj) -def test_zim_creator(tmp_path, png_image, html_file, html_str: str, html_str_cn: str): +def test_zim_creator( + tmp_path: pathlib.Path, + png_image: pathlib.Path, + html_file: pathlib.Path, + html_str: str, + html_str_cn: str, +): fpath = tmp_path / "test.zim" main_path = "welcome" tags = ";".join(["toto", "tata"]) @@ -144,7 +147,7 @@ def test_zim_creator(tmp_path, png_image, html_file, html_str: str, html_str_cn: assert bytes(reader.get_illustration_item().content) == png_data -def test_create_without_workaround(tmp_path): +def test_create_without_workaround(tmp_path: pathlib.Path): fpath = tmp_path / "test.zim" with Creator(fpath, "welcome", workaround_nocancel=False).config_dev_metadata(): @@ -153,7 +156,7 @@ def test_create_without_workaround(tmp_path): assert fpath.exists() -def test_noindexlanguage(tmp_path): +def test_noindexlanguage(tmp_path: pathlib.Path): fpath = tmp_path / "test.zim" creator = Creator(fpath, "welcome").config_dev_metadata(LanguageMetadata("bam")) creator.config_indexing(False) @@ -169,7 +172,7 @@ def test_noindexlanguage(tmp_path): assert not reader.has_fulltext_index -def test_add_item_for(tmp_path): +def test_add_item_for(tmp_path: pathlib.Path): fpath = tmp_path / "test.zim" # test without mimetype with Creator(fpath, "welcome").config_dev_metadata() as creator: @@ -181,7 +184,7 @@ def test_add_item_for(tmp_path): creator.add_item_for(path="welcome", title="hello") -def test_additem_bad_content(tmp_path): +def test_additem_bad_content(tmp_path: pathlib.Path): with Creator(tmp_path / "test.zim", "welcome").config_dev_metadata() as creator: with pytest.raises(RuntimeError, match="Unexpected type for content"): si = StaticItem(path="welcome", content="hello") @@ -189,7 +192,7 @@ def test_additem_bad_content(tmp_path): creator.add_item(si) -def test_add_item_for_delete(tmp_path, html_file): +def test_add_item_for_delete(tmp_path: pathlib.Path, html_file: pathlib.Path): fpath = tmp_path / "test.zim" local_path = pathlib.Path(tmp_path / "somefile.html") @@ -205,7 +208,7 @@ def test_add_item_for_delete(tmp_path, html_file): assert reader.get_item("index") -def test_add_item_for_delete_fail(tmp_path, png_image): +def test_add_item_for_delete_fail(tmp_path: pathlib.Path, png_image: pathlib.Path): fpath = tmp_path / "test.zim" local_path = pathlib.Path(tmp_path / "somefile.png") @@ -226,7 +229,7 @@ def test_add_item_for_delete_fail(tmp_path, png_image): assert reader.get_item("index") -def test_add_item_empty_content(tmp_path): +def test_add_item_empty_content(tmp_path: pathlib.Path): fpath = tmp_path / "test.zim" # test with incorrect content type with Creator(fpath, "welcome").config_dev_metadata() as creator: @@ -237,7 +240,7 @@ def test_add_item_empty_content(tmp_path): ) -def test_compression(tmp_path): +def test_compression(tmp_path: pathlib.Path): fpath = tmp_path / "test.zim" with Creator( tmp_path / "test.zim", "welcome", compression="zstd" @@ -250,7 +253,7 @@ def test_compression(tmp_path): creator.add_item(StaticItem(path="welcome", content="hello")) -def test_double_finish(tmp_path): +def test_double_finish(tmp_path: pathlib.Path): fpath = tmp_path / "test.zim" with Creator(fpath, "welcome").config_dev_metadata() as creator: creator.add_item(StaticItem(path="welcome", content="hello")) @@ -259,13 +262,13 @@ def test_double_finish(tmp_path): creator.finish() -def test_cannot_finish(tmp_path): +def test_cannot_finish(tmp_path: pathlib.Path): creator = Creator(tmp_path / "test.zim", "") creator.can_finish = False creator.finish() -def test_sourcefile_removal(tmp_path, html_file): +def test_sourcefile_removal(tmp_path: pathlib.Path, html_file: pathlib.Path): fpath = tmp_path / "test.zim" with Creator(fpath, "").config_dev_metadata() as creator: # using a temp dir so file still have a meaningful name @@ -279,9 +282,9 @@ def test_sourcefile_removal(tmp_path, html_file): assert not src_path.exists() -def test_sourcefile_removal_std(tmp_path, html_file): +def test_sourcefile_removal_std(tmp_path: pathlib.Path, html_file: pathlib.Path): fpath = tmp_path / "test.zim" - paths = [] + paths: list[pathlib.Path] = [] with Creator(fpath, "").config_dev_metadata() as creator: for idx in range(0, 4): # copy html to folder @@ -299,7 +302,7 @@ def test_sourcefile_removal_std(tmp_path, html_file): assert not path.exists() -def test_sourcefile_noremoval(tmp_path, html_file): +def test_sourcefile_noremoval(tmp_path: pathlib.Path, html_file: pathlib.Path): # copy html to folder src_path = tmp_path / "source.html" shutil.copyfile(html_file, src_path) @@ -311,13 +314,13 @@ def test_sourcefile_noremoval(tmp_path, html_file): assert src_path.exists() -def test_urlitem_badurl(tmp_path): +def test_urlitem_badurl(tmp_path: pathlib.Path): with Creator(tmp_path / "test.zim", "").config_dev_metadata() as creator: with pytest.raises(IOError, match="Unable to access URL"): creator.add_item(URLItem(url="httpo://hello:helloe:hello/")) -def test_urlitem_html(tmp_path, gzip_html_url): +def test_urlitem_html(tmp_path: pathlib.Path, gzip_html_url: str): file_path = tmp_path / "file.html" save_large_file(gzip_html_url, file_path) with open(file_path, "rb") as fh: @@ -331,7 +334,7 @@ def test_urlitem_html(tmp_path, gzip_html_url): assert bytes(zim.get_item("wiki/Main_Page").content) == file_bytes -def test_urlitem_nonhtmlgzip(tmp_path, gzip_nonhtml_url): +def test_urlitem_nonhtmlgzip(tmp_path: pathlib.Path, gzip_nonhtml_url: str): file_path = tmp_path / "file.txt" save_large_file(gzip_nonhtml_url, file_path) with open(file_path, "rb") as fh: @@ -348,7 +351,7 @@ def test_urlitem_nonhtmlgzip(tmp_path, gzip_nonhtml_url): assert bytes(zim.get_item("robots.txt").content) == file_bytes -def test_urlitem_binary(tmp_path, png_image_url): +def test_urlitem_binary(tmp_path: pathlib.Path, png_image_url: str): file_path = tmp_path / "file.png" save_large_file(png_image_url, file_path) with open(file_path, "rb") as fh: @@ -365,7 +368,7 @@ def test_urlitem_binary(tmp_path, png_image_url): ) -def test_urlitem_staticcontent(tmp_path, gzip_nonhtml_url): +def test_urlitem_staticcontent(tmp_path: pathlib.Path, gzip_nonhtml_url: str): fpath = tmp_path / "test.zim" with Creator(fpath, "").config_dev_metadata() as creator: creator.add_item(URLItem(url=gzip_nonhtml_url, content="hello")) @@ -374,7 +377,7 @@ def test_urlitem_staticcontent(tmp_path, gzip_nonhtml_url): assert bytes(zim.get_item("robots.txt").content) == b"hello" -def test_filelikeprovider_nosize(tmp_path, png_image_url): +def test_filelikeprovider_nosize(tmp_path: pathlib.Path, png_image_url: str): fileobj = io.BytesIO() stream_file(png_image_url, byte_stream=fileobj) @@ -386,7 +389,7 @@ def test_filelikeprovider_nosize(tmp_path, png_image_url): assert bytes(zim.get_item("one.png").content) == fileobj.getvalue() -def test_urlprovider(tmp_path, png_image_url): +def test_urlprovider(tmp_path: pathlib.Path, png_image_url: str): file_path = tmp_path / "file.png" save_large_file(png_image_url, file_path) with open(file_path, "rb") as fh: @@ -400,7 +403,9 @@ def test_urlprovider(tmp_path, png_image_url): assert bytes(zim.get_item("one.png").content) == file_bytes -def test_urlprovider_nolength(tmp_path, png_image_url, png_image): +def test_urlprovider_nolength( + tmp_path: pathlib.Path, png_image_url: str, png_image: pathlib.Path +): # save url's content locally using external tool png_image = tmp_path / "original.png" save_large_file(png_image_url, png_image) @@ -470,7 +475,7 @@ def do_GET(self): assert bytes(zim.get_item("B").content) == png_image_bytes -def test_item_callback(tmp_path, html_file): +def test_item_callback(tmp_path: pathlib.Path, html_file: pathlib.Path): fpath = tmp_path / "test.zim" class Store: @@ -488,7 +493,7 @@ def cb(): assert Store.called is True -def test_item_callbacks(tmp_path): +def test_item_callbacks(tmp_path: pathlib.Path): fpath = tmp_path / "test.zim" class Store: @@ -533,7 +538,7 @@ def callable(self) -> bool: assert Store.called == 5 -def test_compess_hints(tmp_path, html_file): +def test_compess_hints(tmp_path: pathlib.Path, html_file: pathlib.Path): with Creator(tmp_path / "test.zim", "").config_dev_metadata() as creator: creator.add_item_for( path=html_file.name, @@ -543,11 +548,11 @@ def test_compess_hints(tmp_path, html_file): ) -def test_callback_and_remove(tmp_path, html_file): +def test_callback_and_remove(tmp_path: pathlib.Path, html_file: pathlib.Path): class Store: called = 0 - def cb(*args): # noqa: ARG001 + def cb(*_: Any): Store.called += 1 # duplicate test file as we'll want to remove twice @@ -573,7 +578,7 @@ def cb(*args): # noqa: ARG001 assert Store.called == 2 -def test_duplicates(tmp_path): +def test_duplicates(tmp_path: pathlib.Path): with Creator(tmp_path / "test.zim", "").config_dev_metadata() as creator: creator.add_item_for(path="A", content="A") creator.add_item_for(path="C", content="C") @@ -584,7 +589,7 @@ def test_duplicates(tmp_path): creator.add_redirect(path="B", target_path="C") -def test_ignore_duplicates(tmp_path): +def test_ignore_duplicates(tmp_path: pathlib.Path): with Creator( tmp_path / "test.zim", "", ignore_duplicates=True ).config_dev_metadata() as creator: @@ -594,7 +599,7 @@ def test_ignore_duplicates(tmp_path): creator.add_redirect(path="B", target_path="C") -def test_without_metadata(tmp_path): +def test_without_metadata(tmp_path: pathlib.Path): with pytest.raises(ValueError, match="Mandatory metadata are not all set."): Creator(tmp_path, "").start() @@ -618,13 +623,15 @@ def test_without_metadata(tmp_path): ) @patch("zimscraperlib.zim.creator.logger", autospec=True) def test_start_logs_metadata_log_contents( - mocked_logger, - png_image, - tags, - tmp_path, - ignore_metadata_conventions, # noqa: ARG001 + mocked_logger: logging.Logger, + png_image: pathlib.Path, + tags: str | list[str], + tmp_path: pathlib.Path, + ignore_metadata_conventions: NoneType, # noqa: ARG001 ): - mocked_logger.isEnabledFor.side_effect = lambda level: level == logging.DEBUG + mocked_logger.isEnabledFor.side_effect = ( # pyright: ignore[reportFunctionMemberAccess] + lambda level: level == logging.DEBUG # pyright: ignore[reportUnknownLambdaType] + ) fpath = tmp_path / "test_config.zim" with open(png_image, "rb") as fh: png_data = fh.read() @@ -658,7 +665,7 @@ class NotPrintable(str): def __str__(self): raise ValueError("Not printable I said") - creator._metadata.update( + creator._metadata.update( # pyright: ignore[reportPrivateUsage] { "Illustration_96x96@1": Metadata( value=b"%PDF-1.5\n%\xe2\xe3\xcf\xd3", name="Illustration_96x96@1" @@ -676,12 +683,14 @@ def __str__(self): } # intentionaly bad, to handle case where user does bad things ) # intentionaly bad, to handle case where user does bad things - creator._metadata["Relation"] = None # pyright: ignore[reportArgumentType] - creator._metadata["BadRawValue"] = "Value" # pyright: ignore[reportArgumentType] - - creator._log_metadata() - # /!\ this must be alpha sorted - mocked_logger.debug.assert_has_calls( + creator._metadata[ # pyright: ignore[reportArgumentType, reportPrivateUsage] + "Relation" + ] = None + creator._metadata[ # pyright: ignore[reportArgumentType, reportPrivateUsage] + "BadRawValue" + ] = "Value" + creator._log_metadata() # pyright: ignore[reportPrivateUsage] + mocked_logger.debug.assert_has_calls( # pyright: ignore[reportFunctionMemberAccess] [ call("Metadata: BadRawValue is improper metadata type: str: Value"), call("Metadata: Chars = šɔɛ"), @@ -707,7 +716,7 @@ def __str__(self): ), call("Metadata: Name = wikipedia_fr_football"), call("Metadata: Publisher = Wikipedia user Foobar"), - call("Metadata: Relation is None"), + call("Metadata: Relation is improper metadata type: NoneType: None"), call("Metadata: Scraper = mwoffliner 1.2.3"), call("Metadata: Source = https://en.wikipedia.org/"), call(f"Metadata: Tags = {tags}"), @@ -721,8 +730,8 @@ def __str__(self): def test_relax_metadata( - tmp_path, - ignore_metadata_conventions, # noqa: ARG001 + tmp_path: pathlib.Path, + ignore_metadata_conventions: NoneType, # noqa: ARG001 ): Creator(tmp_path, "").config_dev_metadata(DescriptionMetadata("T" * 90)).start() @@ -746,7 +755,9 @@ def test_relax_metadata( ), ], ) -def test_config_metadata(tmp_path, png_image, tags): +def test_config_metadata( + tmp_path: pathlib.Path, png_image: pathlib.Path, tags: str | list[str] +): fpath = tmp_path / "test_config.zim" with open(png_image, "rb") as fh: png_data = fh.read() @@ -810,7 +821,7 @@ def test_config_metadata(tmp_path, png_image, tags): assert reader.get_text_metadata("X-TestMetadata") == "Test Metadata" -def test_config_metadata_control_characters(tmp_path): +def test_config_metadata_control_characters(tmp_path: pathlib.Path): fpath = tmp_path / "test_config.zim" creator = Creator(fpath, "").config_dev_metadata( [ @@ -865,7 +876,7 @@ def test_config_metadata_control_characters(tmp_path): class ExtraMetadataCase(NamedTuple): - extras: list[Metadata] + extras: list[AnyMetadata] fail_on_missing_prefix: bool id: str @@ -909,7 +920,7 @@ def metadata_extras(request: pytest.FixtureRequest): yield request.param -def test_metadata_extras(tmp_path, metadata_extras: ExtraMetadataCase): +def test_metadata_extras(tmp_path: pathlib.Path, metadata_extras: ExtraMetadataCase): Creator(tmp_path / "_.zim", "").config_metadata( DEFAULT_DEV_ZIM_METADATA, metadata_extras.extras, @@ -917,11 +928,13 @@ def test_metadata_extras(tmp_path, metadata_extras: ExtraMetadataCase): ) -def test_metadata_extras_dev(tmp_path, metadata_extras: ExtraMetadataCase): +def test_metadata_extras_dev( + tmp_path: pathlib.Path, metadata_extras: ExtraMetadataCase +): Creator(tmp_path / "_.zim", "").config_dev_metadata(metadata_extras.extras) -def test_metadata_extras_missing_prefix(tmp_path): +def test_metadata_extras_missing_prefix(tmp_path: pathlib.Path): with pytest.raises(ValueError, match="does not starts with X- as expected"): Creator(tmp_path / "_.zim", "").config_metadata( DEFAULT_DEV_ZIM_METADATA, @@ -944,7 +957,7 @@ def test_metadata_extras_missing_prefix(tmp_path): ], ) def test_add_metadata( - tmp_path: pathlib.Path, name: str, metadata: Metadata, expected_value: str + tmp_path: pathlib.Path, name: str, metadata: AnyMetadata, expected_value: str ): fpath = tmp_path / "test_blank.zim" with Creator(fpath, "").config_dev_metadata() as creator: @@ -954,7 +967,7 @@ def test_add_metadata( assert reader.get_text_metadata(name) == expected_value -def test_config_indexing(tmp_path): +def test_config_indexing(tmp_path: pathlib.Path): with pytest.raises(ValueError): Creator(tmp_path / "_.zim", "").config_indexing(True, "fr") with pytest.raises(ValueError): From e0f94d0910fceaf0a5246412b768188cb073d22d Mon Sep 17 00:00:00 2001 From: benoit74 Date: Fri, 20 Dec 2024 15:21:30 +0000 Subject: [PATCH 2/4] Add path_from helper context manager --- src/zimscraperlib/filesystem.py | 20 ++++++++++++++ tests/filesystem/test_filesystem.py | 41 +++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/src/zimscraperlib/filesystem.py b/src/zimscraperlib/filesystem.py index 6dce4d3e..7d22065e 100644 --- a/src/zimscraperlib/filesystem.py +++ b/src/zimscraperlib/filesystem.py @@ -4,6 +4,9 @@ import os import pathlib +from contextlib import contextmanager +from tempfile import TemporaryDirectory +from typing import Any import magic @@ -41,3 +44,20 @@ def delete_callback(fpath: str | pathlib.Path): """helper deleting passed filepath""" os.unlink(fpath) + + +@contextmanager +def path_from(path: pathlib.Path | TemporaryDirectory[Any] | str): + """Context manager to get a Path from a path as string, Path or TemporaryDirectory + + Since scraperlib wants to manipulate only Path, scrapers might often needs this + to create a path from what they have, especially since TemporaryDirectory context + manager returns a string which is not really handy. + """ + if isinstance(path, pathlib.Path): + yield path + elif isinstance(path, TemporaryDirectory): + with path as pathname: + yield pathlib.Path(pathname) + else: + yield pathlib.Path(path) diff --git a/tests/filesystem/test_filesystem.py b/tests/filesystem/test_filesystem.py index 1d9e783e..615030d3 100644 --- a/tests/filesystem/test_filesystem.py +++ b/tests/filesystem/test_filesystem.py @@ -1,4 +1,5 @@ import pathlib +from tempfile import TemporaryDirectory from typing import Any import magic @@ -8,6 +9,7 @@ delete_callback, get_content_mimetype, get_file_mimetype, + path_from, ) @@ -54,3 +56,42 @@ def test_delete_callback(tmp_path: pathlib.Path): delete_callback(fpath) assert not fpath.exists() + + +def test_path_from_tmp_dir(): + tempdir = TemporaryDirectory() + with path_from(tempdir) as tmp_dir: + file = tmp_dir / "file.txt" + file.touch() + assert file.exists() + assert pathlib.Path(tempdir.name).exists() + + assert not pathlib.Path(tempdir.name).exists() + + +def test_path_from_path(): + tempdir = TemporaryDirectory() + tempdir_path = pathlib.Path(tempdir.name) + with path_from(tempdir_path) as tmp_dir: + file = tmp_dir / "file.txt" + file.touch() + assert file.exists() + assert pathlib.Path(tempdir.name).exists() + + assert pathlib.Path(tempdir.name).exists() + tempdir.cleanup() + assert not pathlib.Path(tempdir.name).exists() + + +def test_path_from_str(): + tempdir = TemporaryDirectory() + tempdir_path = pathlib.Path(tempdir.name) + with path_from(str(tempdir_path)) as tmp_dir: + file = tmp_dir / "file.txt" + file.touch() + assert file.exists() + assert pathlib.Path(tempdir.name).exists() + + assert pathlib.Path(tempdir.name).exists() + tempdir.cleanup() + assert not pathlib.Path(tempdir.name).exists() From c314a77d3cbdecdb6da3a048a236a0cb2bf6d964 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Fri, 20 Dec 2024 15:27:43 +0000 Subject: [PATCH 3/4] Small enhancements following review --- pyproject.toml | 1 + src/zimscraperlib/image/__init__.py | 9 ++++----- src/zimscraperlib/video/__init__.py | 10 ++++------ src/zimscraperlib/video/probing.py | 2 +- src/zimscraperlib/zim/metadata.py | 12 ++++++------ tests/filesystem/test_filesystem.py | 2 +- 6 files changed, 17 insertions(+), 19 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 757243be..e113db0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -279,6 +279,7 @@ exclude_lines = [ "if __name__ == .__main__.:", "if TYPE_CHECKING:", "class .*Protocol.*", + "@abstractmethod", ] [tool.pyright] diff --git a/src/zimscraperlib/image/__init__.py b/src/zimscraperlib/image/__init__.py index 5a7039ca..5d34d816 100644 --- a/src/zimscraperlib/image/__init__.py +++ b/src/zimscraperlib/image/__init__.py @@ -1,7 +1,6 @@ -# flake8: noqa -from .conversion import convert_image -from .optimization import optimize_image -from .probing import is_valid_image -from .transformation import resize_image +from zimscraperlib.image.conversion import convert_image +from zimscraperlib.image.optimization import optimize_image +from zimscraperlib.image.probing import is_valid_image +from zimscraperlib.image.transformation import resize_image __all__ = ["convert_image", "is_valid_image", "optimize_image", "resize_image"] diff --git a/src/zimscraperlib/video/__init__.py b/src/zimscraperlib/video/__init__.py index d4a2a744..f641c414 100644 --- a/src/zimscraperlib/video/__init__.py +++ b/src/zimscraperlib/video/__init__.py @@ -1,7 +1,5 @@ -from .config import Config +from zimscraperlib.video.config import Config +from zimscraperlib.video.encoding import reencode +from zimscraperlib.video.probing import get_media_info -# flake8: noqa -from .encoding import reencode -from .probing import get_media_info - -__all__ = ["Config", "reencode", "get_media_info"] +__all__ = ["Config", "get_media_info", "reencode"] diff --git a/src/zimscraperlib/video/probing.py b/src/zimscraperlib/video/probing.py index 06b5d057..3fa511ab 100644 --- a/src/zimscraperlib/video/probing.py +++ b/src/zimscraperlib/video/probing.py @@ -2,7 +2,7 @@ import subprocess -def get_media_info(src_path: str | pathlib.Path): +def get_media_info(src_path: pathlib.Path): """dict of file's details from ffprobe codecs: list of codecs in use diff --git a/src/zimscraperlib/zim/metadata.py b/src/zimscraperlib/zim/metadata.py index 99eff296..7d2b75d0 100644 --- a/src/zimscraperlib/zim/metadata.py +++ b/src/zimscraperlib/zim/metadata.py @@ -148,14 +148,14 @@ def validate(self) -> None: _ = self.libzim_value @abstractmethod - def get_cleaned_value(self, value: Any) -> T: ... # pragma: no cover + def get_cleaned_value(self, value: Any) -> T: ... @property def libzim_value(self) -> bytes: return self.get_libzim_value() @abstractmethod - def get_libzim_value(self) -> bytes: ... # pragma: no cover + def get_libzim_value(self) -> bytes: ... # Alias for convenience when function accept any metadata @@ -236,12 +236,12 @@ def get_binary_from( elif isinstance(value, bytes): bvalue = value else: - last_pos: int + last_pos: int = 0 if isinstance(value, SupportsSeekableRead) and value.seekable(): last_pos = value.tell() bvalue = value.read() if isinstance(value, SupportsSeekableRead) and value.seekable(): - value.seek(last_pos) # pyright: ignore[reportPossiblyUnboundVariable] + value.seek(last_pos) if not self.empty_allowed and not value: raise ValueError("Missing value (empty not allowed)") return bvalue @@ -495,8 +495,8 @@ def get_reserved_names(cls) -> list[str]: for field in fields(cls): if not isinstance(field.type, type): continue - # if field type is a type, it means that it is required (otherwise field - # type is a string when None is allowed) + # field.type is a `type` only when expecting a single type + # and is a string in case of None Union names.append(getattr(field.type, "meta_name", "")) return names diff --git a/tests/filesystem/test_filesystem.py b/tests/filesystem/test_filesystem.py index 615030d3..6518c59c 100644 --- a/tests/filesystem/test_filesystem.py +++ b/tests/filesystem/test_filesystem.py @@ -33,7 +33,7 @@ def test_content_mimetype_fallback( assert get_content_mimetype(undecodable_byte_stream) == "application/octet-stream" # mock then so we keep coverage on systems where magic works - def raising_magic(*_: Any, **__: Any): + def raising_magic(*args: Any, **kwargs: Any): # noqa: ARG001 raise UnicodeDecodeError("nocodec", b"", 0, 1, "noreason") monkeypatch.setattr(magic, "from_buffer", raising_magic) From 39095bab2667212d8261acc437e0e6de4809ceca Mon Sep 17 00:00:00 2001 From: benoit74 Date: Fri, 20 Dec 2024 19:31:07 +0000 Subject: [PATCH 4/4] Make image optimization methods stricter with options types --- src/zimscraperlib/image/optimization.py | 239 ++++++++++------- src/zimscraperlib/image/presets.py | 141 +++++----- tests/image/test_image.py | 333 ++++++++++++++++++++---- 3 files changed, 497 insertions(+), 216 deletions(-) diff --git a/src/zimscraperlib/image/optimization.py b/src/zimscraperlib/image/optimization.py index 55e2f7f1..e17c84a5 100644 --- a/src/zimscraperlib/image/optimization.py +++ b/src/zimscraperlib/image/optimization.py @@ -18,13 +18,11 @@ can still run on default settings which give a bit less size than the original images but maintain a high quality. """ -import functools import io import os import pathlib import subprocess -from collections.abc import Callable -from typing import Any +from dataclasses import dataclass import piexif # pyright: ignore[reportMissingTypeStubs] from optimize_images.img_aux_processing import ( # pyright: ignore[reportMissingTypeStubs] @@ -54,18 +52,9 @@ def ensure_matches( raise ValueError(f"{src} is not of format {fmt}") -def optimize_png( - src: pathlib.Path | io.BytesIO, - dst: pathlib.Path | io.BytesIO | None = None, - max_colors: int = 256, - background_color: tuple[int, int, int] = (255, 255, 255), - *, - reduce_colors: bool | None = False, - fast_mode: bool | None = True, - remove_transparency: bool | None = False, - **_: Any, -) -> pathlib.Path | io.BytesIO: - """method to optimize PNG files using a pure python external optimizer +@dataclass +class OptimizePngOptions: + """Dataclass holding PNG optimization options Arguments: reduce_colors: Whether to reduce colors using adaptive color pallette (boolean) @@ -79,20 +68,38 @@ def optimize_png( values: True | False background_color: Background color if remove_transparency is True (tuple containing RGB values) - values: (255, 255, 255) | (221, 121, 108) | (XX, YY, ZZ)""" + values: (255, 255, 255) | (221, 121, 108) | (XX, YY, ZZ) + """ + + max_colors: int = 256 + background_color: tuple[int, int, int] = (255, 255, 255) + reduce_colors: bool | None = False + fast_mode: bool | None = True + remove_transparency: bool | None = False + + +def optimize_png( + src: pathlib.Path | io.BytesIO, + dst: pathlib.Path | io.BytesIO | None = None, + options: OptimizePngOptions | None = None, +) -> pathlib.Path | io.BytesIO: + """method to optimize PNG files using a pure python external optimizer""" ensure_matches(src, "PNG") img = Image.open(src) - if remove_transparency: - img = remove_alpha(img, background_color) + if options is None: + options = OptimizePngOptions() + + if options.remove_transparency: + img = remove_alpha(img, options.background_color) - if reduce_colors: - img, __, __ = do_reduce_colors(img, max_colors) + if options.reduce_colors: + img, _, _ = do_reduce_colors(img, options.max_colors) - if not fast_mode and img.mode == "P": - img, __ = rebuild_palette(img) + if not options.fast_mode and img.mode == "P": + img, _ = rebuild_palette(img) if dst is None: dst = io.BytesIO() @@ -102,16 +109,9 @@ def optimize_png( return dst -def optimize_jpeg( - src: pathlib.Path | io.BytesIO, - dst: pathlib.Path | io.BytesIO | None = None, - quality: int | None = 85, - *, - fast_mode: bool | None = True, - keep_exif: bool | None = True, - **_: Any, -) -> pathlib.Path | io.BytesIO: - """method to optimize JPEG files using a pure python external optimizer +@dataclass +class OptimizeJpgOptions: + """Dataclass holding JPG optimization options Arguments: quality: JPEG quality (integer between 1 and 100) @@ -120,7 +120,23 @@ def optimize_jpeg( values: True | False fast_mode: Use the supplied quality value. If turned off, optimizer will get dynamic quality value to ensure better compression - values: True | False""" + values: True | False + """ + + quality: int | None = 85 + fast_mode: bool | None = True + keep_exif: bool | None = True + + +def optimize_jpeg( + src: pathlib.Path | io.BytesIO, + dst: pathlib.Path | io.BytesIO | None = None, + options: OptimizeJpgOptions | None = None, +) -> pathlib.Path | io.BytesIO: + """method to optimize JPEG files using a pure python external optimizer""" + + if options is None: + options = OptimizeJpgOptions() ensure_matches(src, "JPEG") @@ -146,10 +162,10 @@ def optimize_jpeg( # only use progressive if file size is bigger use_progressive_jpg = orig_size > 10240 # 10KiB # noqa: PLR2004 - if fast_mode: - quality_setting = quality + if options.fast_mode: + quality_setting = options.quality else: - quality_setting, __ = jpeg_dynamic_quality(img) + quality_setting, _ = jpeg_dynamic_quality(img) if dst is None: dst = io.BytesIO() @@ -165,7 +181,7 @@ def optimize_jpeg( if isinstance(dst, io.BytesIO): dst.seek(0) - if keep_exif and had_exif: + if options.keep_exif and had_exif: piexif.transplant( # pyright: ignore[reportUnknownMemberType] exif_src=( str(src.resolve()) if isinstance(src, pathlib.Path) else src.getvalue() @@ -179,16 +195,9 @@ def optimize_jpeg( return dst -def optimize_webp( - src: pathlib.Path | io.BytesIO, - dst: pathlib.Path | io.BytesIO | None = None, - quality: int | None = 60, - method: int | None = 6, - *, - lossless: bool | None = False, - **_: Any, -) -> pathlib.Path | io.BytesIO: - """method to optimize WebP using Pillow options +@dataclass +class OptimizeWebpOptions: + """Dataclass holding WebP optimization options Arguments: lossless: Whether to use lossless compression (boolean); @@ -201,13 +210,29 @@ def optimize_webp( values: 1 | 2 | 3 | 4 | 5 | 6 refer to the link for more details - https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html#webp""" + https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html#webp + """ + + quality: int | None = 60 + method: int | None = 6 + lossless: bool | None = False + + +def optimize_webp( + src: pathlib.Path | io.BytesIO, + dst: pathlib.Path | io.BytesIO | None = None, + options: OptimizeWebpOptions | None = None, +) -> pathlib.Path | io.BytesIO: + """method to optimize WebP using Pillow options""" + + if options is None: + options = OptimizeWebpOptions() ensure_matches(src, "WEBP") params: dict[str, bool | int | None] = { - "lossless": lossless, - "quality": quality, - "method": method, + "lossless": options.lossless, + "quality": options.quality, + "method": options.method, } webp_image = Image.open(src) @@ -230,18 +255,9 @@ def optimize_webp( return dst -def optimize_gif( - src: pathlib.Path, - dst: pathlib.Path, - optimize_level: int | None = 1, - lossiness: int | None = None, - max_colors: int | None = None, - *, - interlace: bool | None = True, - no_extensions: bool | None = True, - **_: Any, -) -> pathlib.Path: - """method to optimize GIFs using gifsicle >= 1.92 +@dataclass +class OptimizeGifOptions: + """Dataclass holding GIF optimization options Arguments: optimize_level: Optimization level; higher values give better compression @@ -258,21 +274,37 @@ def optimize_gif( (integer between 2 and 256) values: 2 | 86 | 128 | 256 | XX - refer to the link for more details - https://www.lcdf.org/gifsicle/man.html""" + refer to the link for more details - https://www.lcdf.org/gifsicle/man.html + """ + + optimize_level: int | None = 1 + lossiness: int | None = None + max_colors: int | None = None + interlace: bool | None = True + no_extensions: bool | None = True + + +def optimize_gif( + src: pathlib.Path, dst: pathlib.Path, options: OptimizeGifOptions | None = None +) -> pathlib.Path: + """method to optimize GIFs using gifsicle >= 1.92""" + + if options is None: + options = OptimizeGifOptions() ensure_matches(src, "GIF") # use gifsicle args = ["/usr/bin/env", "gifsicle"] - if optimize_level: - args += [f"-O{optimize_level}"] - if max_colors: - args += ["--colors", str(max_colors)] - if lossiness: - args += [f"--lossy={lossiness}"] - if no_extensions: + if options.optimize_level: + args += [f"-O{options.optimize_level}"] + if options.max_colors: + args += ["--colors", str(options.max_colors)] + if options.lossiness: + args += [f"--lossy={options.lossiness}"] + if options.no_extensions: args += ["--no-extensions"] - if interlace: + if options.interlace: args += ["--interlace"] args += [str(src)] with open(dst, "w") as out_file: @@ -287,13 +319,39 @@ def optimize_gif( return dst +@dataclass +class OptimizeOptions: + """Dataclass holding GIF optimization options for all supported formats""" + + gif: OptimizeGifOptions + webp: OptimizeWebpOptions + jpg: OptimizeJpgOptions + png: OptimizePngOptions + + @classmethod + def of( + cls, + gif: OptimizeGifOptions | None = None, + webp: OptimizeWebpOptions | None = None, + jpg: OptimizeJpgOptions | None = None, + png: OptimizePngOptions | None = None, + ): + """Helper to override only few options from default value""" + return OptimizeOptions( + gif=gif or OptimizeGifOptions(), + png=png or OptimizePngOptions(), + webp=webp or OptimizeWebpOptions(), + jpg=jpg or OptimizeJpgOptions(), + ) + + def optimize_image( src: pathlib.Path, dst: pathlib.Path, + options: OptimizeOptions | None = None, *, delete_src: bool | None = False, convert: bool | str | None = False, - **options: Any, ): """Optimize image, automatically selecting correct optimizer @@ -305,6 +363,9 @@ def optimize_image( True: convert to format implied by dst suffix "FMT": convert to format FMT (use Pillow names)""" + if options is None: + options = OptimizeOptions.of() + src_format, dst_format = format_for(src, from_suffix=False), format_for(dst) if src_format is None: # pragma: no cover @@ -321,26 +382,20 @@ def optimize_image( else: src_img = pathlib.Path(src) - get_optimization_method(src_format)(src_img, dst, **options) + src_format = src_format.lower() + if src_format in ("jpg", "jpeg"): + optimize_jpeg(src=src_img, dst=dst, options=options.jpg) + elif src_format == "gif": + optimize_gif(src=src_img, dst=dst, options=options.gif) + elif src_format == "png": + optimize_png(src=src_img, dst=dst, options=options.png) + elif src_format == "webp": + optimize_webp(src=src_img, dst=dst, options=options.webp) + else: + raise NotImplementedError( + f"Image format '{src_format}' cannot yet be optimized" + ) # delete src image if requested if delete_src and src.exists() and src.resolve() != dst.resolve(): src.unlink() - - -def get_optimization_method(fmt: str) -> Callable[..., Any]: - """Return the proper optimization method to call for a given image format""" - - def raise_error(*_, orig_format: str): - raise NotImplementedError( - f"Image format '{orig_format}' cannot yet be optimized" - ) - - fmt = fmt.lower().strip() - return { - "gif": optimize_gif, - "jpg": optimize_jpeg, - "jpeg": optimize_jpeg, - "webp": optimize_webp, - "png": optimize_png, - }.get(fmt, functools.partial(raise_error, orig_format=fmt)) diff --git a/src/zimscraperlib/image/presets.py b/src/zimscraperlib/image/presets.py index 7be415f2..70f90c8f 100644 --- a/src/zimscraperlib/image/presets.py +++ b/src/zimscraperlib/image/presets.py @@ -1,4 +1,9 @@ -from typing import ClassVar +from zimscraperlib.image.optimization import ( + OptimizeGifOptions, + OptimizeJpgOptions, + OptimizePngOptions, + OptimizeWebpOptions, +) """ presets for ImageOptimizer in zimscraperlib.image.optimization module """ @@ -17,11 +22,11 @@ class WebpLow: ext = "webp" mimetype = f"{preset_type}/webp" - options: ClassVar[dict[str, str | bool | int | None]] = { - "lossless": False, - "quality": 40, - "method": 6, - } + options: OptimizeWebpOptions = OptimizeWebpOptions( + lossless=False, + quality=40, + method=6, + ) class WebpMedium: @@ -36,11 +41,11 @@ class WebpMedium: ext = "webp" mimetype = f"{preset_type}/webp" - options: ClassVar[dict[str, str | bool | int | None]] = { - "lossless": False, - "quality": 50, - "method": 6, - } + options: OptimizeWebpOptions = OptimizeWebpOptions( + lossless=False, + quality=50, + method=6, + ) class WebpHigh: @@ -55,11 +60,11 @@ class WebpHigh: ext = "webp" mimetype = f"{preset_type}/webp" - options: ClassVar[dict[str, str | bool | int | None]] = { - "lossless": False, - "quality": 90, - "method": 6, - } + options: OptimizeWebpOptions = OptimizeWebpOptions( + lossless=False, + quality=90, + method=6, + ) class GifLow: @@ -76,13 +81,13 @@ class GifLow: ext = "gif" mimetype = f"{preset_type}/gif" - options: ClassVar[dict[str, str | bool | int | None]] = { - "optimize_level": 3, - "max_colors": 256, - "lossiness": 80, - "no_extensions": True, - "interlace": True, - } + options: OptimizeGifOptions = OptimizeGifOptions( + optimize_level=3, + max_colors=256, + lossiness=80, + no_extensions=True, + interlace=True, + ) class GifMedium: @@ -99,12 +104,12 @@ class GifMedium: ext = "gif" mimetype = f"{preset_type}/gif" - options: ClassVar[dict[str, str | bool | int | None]] = { - "optimize_level": 3, - "lossiness": 20, - "no_extensions": True, - "interlace": True, - } + options: OptimizeGifOptions = OptimizeGifOptions( + optimize_level=3, + lossiness=20, + no_extensions=True, + interlace=True, + ) class GifHigh: @@ -121,12 +126,12 @@ class GifHigh: ext = "gif" mimetype = f"{preset_type}/gif" - options: ClassVar[dict[str, str | bool | int | None]] = { - "optimize_level": 2, - "lossiness": None, - "no_extensions": True, - "interlace": True, - } + options: OptimizeGifOptions = OptimizeGifOptions( + optimize_level=2, + lossiness=None, + no_extensions=True, + interlace=True, + ) class PngLow: @@ -140,12 +145,12 @@ class PngLow: ext = "png" mimetype = f"{preset_type}/png" - options: ClassVar[dict[str, str | bool | int | None]] = { - "reduce_colors": True, - "remove_transparency": False, - "max_colors": 256, - "fast_mode": False, - } + options: OptimizePngOptions = OptimizePngOptions( + reduce_colors=True, + remove_transparency=False, + max_colors=256, + fast_mode=False, + ) class PngMedium: @@ -159,11 +164,11 @@ class PngMedium: ext = "png" mimetype = f"{preset_type}/png" - options: ClassVar[dict[str, str | bool | int | None]] = { - "reduce_colors": False, - "remove_transparency": False, - "fast_mode": False, - } + options: OptimizePngOptions = OptimizePngOptions( + reduce_colors=False, + remove_transparency=False, + fast_mode=False, + ) class PngHigh: @@ -177,11 +182,11 @@ class PngHigh: ext = "png" mimetype = f"{preset_type}/png" - options: ClassVar[dict[str, str | bool | int | None]] = { - "reduce_colors": False, - "remove_transparency": False, - "fast_mode": True, - } + options: OptimizePngOptions = OptimizePngOptions( + reduce_colors=False, + remove_transparency=False, + fast_mode=True, + ) class JpegLow: @@ -193,14 +198,14 @@ class JpegLow: VERSION = 1 - ext = "png" - mimetype = f"{preset_type}/png" + ext = "jpg" + mimetype = f"{preset_type}/jpeg" - options: ClassVar[dict[str, str | bool | int | None]] = { - "quality": 45, - "keep_exif": False, - "fast_mode": True, - } + options: OptimizeJpgOptions = OptimizeJpgOptions( + quality=45, + keep_exif=False, + fast_mode=True, + ) class JpegMedium: @@ -215,11 +220,11 @@ class JpegMedium: ext = "jpg" mimetype = f"{preset_type}/jpeg" - options: ClassVar[dict[str, str | bool | int | None]] = { - "quality": 65, - "keep_exif": False, - "fast_mode": True, - } + options: OptimizeJpgOptions = OptimizeJpgOptions( + quality=65, + keep_exif=False, + fast_mode=True, + ) class JpegHigh: @@ -234,8 +239,8 @@ class JpegHigh: ext = "jpg" mimetype = f"{preset_type}/jpeg" - options: ClassVar[dict[str, str | bool | int | None]] = { - "quality": 80, - "keep_exif": True, - "fast_mode": True, - } + options: OptimizeJpgOptions = OptimizeJpgOptions( + quality=80, + keep_exif=True, + fast_mode=True, + ) diff --git a/tests/image/test_image.py b/tests/image/test_image.py index c39b367d..cea9204a 100644 --- a/tests/image/test_image.py +++ b/tests/image/test_image.py @@ -4,6 +4,7 @@ import pathlib import re import shutil +from dataclasses import asdict, is_dataclass from typing import Any import piexif # pyright: ignore[reportMissingTypeStubs] @@ -20,12 +21,17 @@ create_favicon, ) from zimscraperlib.image.optimization import ( + OptimizeGifOptions, + OptimizeJpgOptions, + OptimizeOptions, + OptimizePngOptions, + OptimizeWebpOptions, ensure_matches, - get_optimization_method, optimize_gif, optimize_image, optimize_jpeg, optimize_png, + optimize_webp, ) from zimscraperlib.image.presets import ( GifHigh, @@ -50,7 +56,11 @@ from zimscraperlib.image.transformation import resize_image from zimscraperlib.image.utils import save_image -ALL_PRESETS = [(n, p) for n, p in inspect.getmembers(presets) if inspect.isclass(p)] +ALL_PRESETS = [ + (n, p) + for n, p in inspect.getmembers(presets) + if inspect.isclass(p) and not is_dataclass(p) +] def get_image_size(fpath: pathlib.Path | io.BytesIO) -> tuple[int, int]: @@ -463,7 +473,7 @@ def test_wrong_extension( "fmt", ["png", "jpg", "gif", "webp"], ) -def test_optimize_image_default( +def test_optimize_image_default_generic( png_image2: pathlib.Path, jpg_image: pathlib.Path, gif_image: pathlib.Path, @@ -483,6 +493,40 @@ def test_optimize_image_default( assert os.path.getsize(dst) < os.path.getsize(src) +@pytest.mark.parametrize( + "fmt", + ["png", "jpg", "gif", "webp"], +) +def test_optimize_image_default_direct( + png_image2: pathlib.Path, + jpg_image: pathlib.Path, + gif_image: pathlib.Path, + webp_image: pathlib.Path, + tmp_path: pathlib.Path, + fmt: str, +): + src, dst = get_src_dst( + tmp_path, + fmt, + png_image=png_image2, + jpg_image=jpg_image, + gif_image=gif_image, + webp_image=webp_image, + ) + + if fmt in ("jpg", "jpeg"): + optimize_jpeg(src=src, dst=dst) + elif fmt == "gif": + optimize_gif(src=src, dst=dst) + elif fmt == "png": + optimize_png(src=src, dst=dst) + elif fmt == "webp": + optimize_webp(src=src, dst=dst) + else: + raise NotImplementedError(f"Image format '{fmt}' cannot yet be optimized") + assert os.path.getsize(dst) < os.path.getsize(src) + + def test_optimize_image_del_src(png_image: pathlib.Path, tmp_path: pathlib.Path): shutil.copy(png_image, tmp_path) src = tmp_path / png_image.name @@ -511,11 +555,54 @@ def test_optimize_image_bad_dst(png_image: pathlib.Path, tmp_path: pathlib.Path) @pytest.mark.parametrize( - "preset,expected_version,options,fmt", + "preset,expected_version,options", + [ + (WebpLow(), 1, {"lossless": False, "quality": 40, "method": 6}), + (WebpMedium(), 1, {"lossless": False, "quality": 50, "method": 6}), + (WebpHigh(), 1, {"lossless": False, "quality": 90, "method": 6}), + ], +) +def test_image_preset_webp( + preset: WebpLow | WebpMedium | WebpHigh, + expected_version: int, + options: dict[str, str | bool | int | None], + webp_image: pathlib.Path, + tmp_path: pathlib.Path, +): + assert preset.VERSION == expected_version + assert preset.ext == "webp" + assert preset.mimetype == "image/webp" + + default_options = OptimizeWebpOptions() + preset_options = asdict(preset.options) + + for key, value in preset_options.items(): + assert value == ( + options[key] if key in options else getattr(default_options, key) + ) + + src = webp_image + dst = tmp_path / f"out.{preset.ext}" + optimize_image( + src, + tmp_path / f"out.{preset.ext}", + delete_src=False, + options=OptimizeOptions.of(webp=preset.options), + ) + assert os.path.getsize(dst) < os.path.getsize(src) + + image_bytes = "" + with open(src, "rb") as fl: + image_bytes = fl.read() + byte_stream = io.BytesIO(image_bytes) + dst_bytes = optimize_webp(src=byte_stream, options=preset.options) + assert isinstance(dst_bytes, io.BytesIO) + assert dst_bytes.getbuffer().nbytes < byte_stream.getbuffer().nbytes + + +@pytest.mark.parametrize( + "preset,expected_version,options", [ - (WebpLow(), 1, {"lossless": False, "quality": 40, "method": 6}, "webp"), - (WebpMedium(), 1, {"lossless": False, "quality": 50, "method": 6}, "webp"), - (WebpHigh(), 1, {"lossless": False, "quality": 90, "method": 6}, "webp"), ( GifLow(), 1, @@ -526,7 +613,6 @@ def test_optimize_image_bad_dst(png_image: pathlib.Path, tmp_path: pathlib.Path) "no_extensions": True, "interlace": True, }, - "gif", ), ( GifMedium(), @@ -537,7 +623,6 @@ def test_optimize_image_bad_dst(png_image: pathlib.Path, tmp_path: pathlib.Path) "no_extensions": True, "interlace": True, }, - "gif", ), ( GifHigh(), @@ -548,8 +633,42 @@ def test_optimize_image_bad_dst(png_image: pathlib.Path, tmp_path: pathlib.Path) "no_extensions": True, "interlace": True, }, - "gif", ), + ], +) +def test_image_preset_gif( + preset: GifLow | GifMedium | GifHigh, + expected_version: int, + options: dict[str, str | bool | int | None], + gif_image: pathlib.Path, + tmp_path: pathlib.Path, +): + assert preset.VERSION == expected_version + assert preset.ext == "gif" + assert preset.mimetype == "image/gif" + + default_options = OptimizeGifOptions() + preset_options = asdict(preset.options) + + for key, value in preset_options.items(): + assert value == ( + options[key] if key in options else getattr(default_options, key) + ) + + src = gif_image + dst = tmp_path / f"out.{preset.ext}" + optimize_image( + src, + tmp_path / f"out.{preset.ext}", + delete_src=False, + options=OptimizeOptions.of(gif=preset.options), + ) + assert os.path.getsize(dst) < os.path.getsize(src) + + +@pytest.mark.parametrize( + "preset,expected_version,options", + [ ( PngLow(), 1, @@ -559,76 +678,105 @@ def test_optimize_image_bad_dst(png_image: pathlib.Path, tmp_path: pathlib.Path) "max_colors": 256, "fast_mode": False, }, - "png", ), ( PngMedium(), 1, {"reduce_colors": False, "remove_transparency": False, "fast_mode": False}, - "png", ), ( PngHigh(), 1, {"reduce_colors": False, "remove_transparency": False, "fast_mode": True}, - "png", ), - (JpegLow(), 1, {"quality": 45, "keep_exif": False, "fast_mode": True}, "jpg"), + ], +) +def test_image_preset_png( + preset: PngLow | PngMedium | PngHigh, + expected_version: int, + options: dict[str, str | bool | int | None], + png_image: pathlib.Path, + tmp_path: pathlib.Path, +): + assert preset.VERSION == expected_version + assert preset.ext == "png" + assert preset.mimetype == "image/png" + + default_options = OptimizePngOptions() + preset_options = asdict(preset.options) + + for key, value in preset_options.items(): + assert value == ( + options[key] if key in options else getattr(default_options, key) + ) + + src = png_image + dst = tmp_path / f"out.{preset.ext}" + optimize_image( + src, + tmp_path / f"out.{preset.ext}", + delete_src=False, + options=OptimizeOptions.of(png=preset.options), + ) + assert os.path.getsize(dst) < os.path.getsize(src) + + image_bytes = "" + with open(src, "rb") as fl: + image_bytes = fl.read() + byte_stream = io.BytesIO(image_bytes) + dst_bytes = optimize_png(src=byte_stream, options=preset.options) + assert isinstance(dst_bytes, io.BytesIO) + assert dst_bytes.getbuffer().nbytes < byte_stream.getbuffer().nbytes + + +@pytest.mark.parametrize( + "preset,expected_version,options", + [ + (JpegLow(), 1, {"quality": 45, "keep_exif": False, "fast_mode": True}), ( JpegMedium(), 1, {"quality": 65, "keep_exif": False, "fast_mode": True}, - "jpg", ), - (JpegHigh(), 1, {"quality": 80, "keep_exif": True, "fast_mode": True}, "jpg"), + (JpegHigh(), 1, {"quality": 80, "keep_exif": True, "fast_mode": True}), ], ) -def test_preset( - preset: ( - WebpLow - | WebpMedium - | WebpHigh - | JpegLow - | JpegMedium - | JpegHigh - | PngLow - | PngMedium - | PngHigh - ), +def test_image_preset_jpg( + preset: JpegLow | JpegMedium | JpegHigh, expected_version: int, options: dict[str, str | bool | int | None], - fmt: str, - png_image: pathlib.Path, jpg_image: pathlib.Path, - gif_image: pathlib.Path, - webp_image: pathlib.Path, tmp_path: pathlib.Path, ): assert preset.VERSION == expected_version - assert preset.options == options - src, dst = get_src_dst( - tmp_path, - fmt, - png_image=png_image, - jpg_image=jpg_image, - gif_image=gif_image, - webp_image=webp_image, - ) + assert preset.ext == "jpg" + assert preset.mimetype == "image/jpeg" + + default_options = OptimizeJpgOptions() + preset_options = asdict(preset.options) + + for key, value in preset_options.items(): + assert value == ( + options[key] if key in options else getattr(default_options, key) + ) + + src = jpg_image + dst = tmp_path / f"out.{preset.ext}" optimize_image( src, - dst, + tmp_path / f"out.{preset.ext}", delete_src=False, - **preset.options, # pyright: ignore[reportArgumentType] + options=OptimizeOptions.of(jpg=preset.options), ) assert os.path.getsize(dst) < os.path.getsize(src) - if fmt in ["jpg", "webp", "png"]: - image_bytes = "" - with open(src, "rb") as fl: - image_bytes = fl.read() - byte_stream = io.BytesIO(image_bytes) - dst_bytes = get_optimization_method(fmt)(src=byte_stream, **preset.options) - assert dst_bytes.getbuffer().nbytes < byte_stream.getbuffer().nbytes + image_bytes = "" + with open(src, "rb") as fl: + image_bytes = fl.read() + byte_stream = io.BytesIO(image_bytes) + dst_bytes = optimize_jpeg(src=byte_stream, options=preset.options) + assert isinstance(dst_bytes, io.BytesIO) + assert dst_bytes.getbuffer().nbytes < byte_stream.getbuffer().nbytes def test_optimize_image_unsupported_format(): @@ -640,7 +788,7 @@ def test_optimize_image_unsupported_format(): optimize_image(src, dst, delete_src=False) -def test_preset_has_mime_and_ext(): +def test_image_preset_has_mime_and_ext(): for _, preset in ALL_PRESETS: assert preset().ext assert preset().mimetype.startswith("image/") @@ -648,7 +796,9 @@ def test_preset_has_mime_and_ext(): def test_remove_png_transparency(png_image: pathlib.Path, tmp_path: pathlib.Path): dst = tmp_path / "out.png" - optimize_png(src=png_image, dst=dst, remove_transparency=True) + optimize_png( + src=png_image, dst=dst, options=OptimizePngOptions(remove_transparency=True) + ) assert os.path.getsize(dst) == 2352 @@ -683,7 +833,7 @@ def test_jpeg_exif_preserve(jpg_exif_image: pathlib.Path, tmp_path: pathlib.Path def test_dynamic_jpeg_quality(jpg_image: pathlib.Path, tmp_path: pathlib.Path): # check optimization without fast mode dst = tmp_path / "out.jpg" - optimize_jpeg(src=jpg_image, dst=dst, fast_mode=False) + optimize_jpeg(src=jpg_image, dst=dst, options=OptimizeJpgOptions(fast_mode=False)) assert os.path.getsize(dst) < os.path.getsize(jpg_image) @@ -822,12 +972,83 @@ def test_is_valid_image( def test_optimize_gif_no_optimize_level( gif_image: pathlib.Path, tmp_path: pathlib.Path ): - optimize_gif(gif_image, tmp_path / "out.gif", delete_src=False, optimize_level=None) + optimize_gif( + gif_image, tmp_path / "out.gif", options=OptimizeGifOptions(optimize_level=None) + ) def test_optimize_gif_no_no_extensions(gif_image: pathlib.Path, tmp_path: pathlib.Path): - optimize_gif(gif_image, tmp_path / "out.gif", delete_src=False, no_extensions=None) + optimize_gif( + gif_image, tmp_path / "out.gif", options=OptimizeGifOptions(no_extensions=None) + ) def test_optimize_gif_no_interlace(gif_image: pathlib.Path, tmp_path: pathlib.Path): - optimize_gif(gif_image, tmp_path / "out.gif", delete_src=False, interlace=None) + optimize_gif( + gif_image, tmp_path / "out.gif", options=OptimizeGifOptions(interlace=None) + ) + + +@pytest.mark.parametrize( + "fmt, preset", + [ + ("png", "low"), + ("jpg", "low"), + ("gif", "low"), + ("webp", "low"), + ("png", "medium"), + ("jpg", "medium"), + ("gif", "medium"), + ("webp", "medium"), + ("png", "high"), + ("jpg", "high"), + ("gif", "high"), + ("webp", "high"), + ], +) +def test_optimize_any_image( + png_image: pathlib.Path, + jpg_image: pathlib.Path, + gif_image: pathlib.Path, + webp_image: pathlib.Path, + tmp_path: pathlib.Path, + fmt: str, + preset: str, +): + src, dst = get_src_dst( + tmp_path, + fmt, + png_image=png_image, + jpg_image=jpg_image, + gif_image=gif_image, + webp_image=webp_image, + ) + # test call to optimize_image where src format is not set and all options are + # different than default values, just checking that at least we can set these opts + optimize_image( + src, + dst, + options=OptimizeOptions( + gif=( + GifMedium.options + if preset == "low" + else GifHigh.options if preset == "high" else GifMedium.options + ), + webp=( + WebpLow.options + if preset == "low" + else WebpHigh.options if preset == "high" else WebpMedium.options + ), + jpg=( + JpegLow.options + if preset == "low" + else JpegHigh.options if preset == "high" else JpegMedium.options + ), + png=( + PngLow.options + if preset == "low" + else PngHigh.options if preset == "high" else PngMedium.options + ), + ), + ) + assert os.path.getsize(dst) < os.path.getsize(src)