Skip to content

Commit

Permalink
refactor: Clean up typing and helpers (#2)
Browse files Browse the repository at this point in the history
* refactor: extract reusable functions

* refactor: fix dataclass typing

* refactor: rename assertions
  • Loading branch information
msto authored Mar 24, 2024
1 parent ae683cb commit 3a99190
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 49 deletions.
Empty file added dataclass_io/_lib/__init__.py
Empty file.
10 changes: 6 additions & 4 deletions dataclass_io/lib.py → dataclass_io/_lib/assertions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
from os import access
from pathlib import Path

from dataclass_io._lib.dataclass_extensions import DataclassInstance

def assert_readable_file(path: Path) -> None:

def assert_file_is_readable(path: Path) -> None:
"""
Check that the input file exists and is readable.
Expand All @@ -24,13 +26,13 @@ def assert_readable_file(path: Path) -> None:
raise PermissionError(f"The input file is not readable: {path}")


def assert_readable_dataclass(dc_type: type) -> None:
def assert_dataclass_is_valid(dataclass_type: type[DataclassInstance]) -> None:
"""
Check that the input type is a parseable dataclass.
Raises:
TypeError: If the provided type is not a dataclass.
"""

if not is_dataclass(dc_type):
raise TypeError(f"The provided type must be a dataclass: {dc_type.__name__}")
if not is_dataclass(dataclass_type):
raise TypeError(f"The provided type must be a dataclass: {dataclass_type.__name__}")
18 changes: 18 additions & 0 deletions dataclass_io/_lib/dataclass_extensions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@

from typing import Any
from typing import ClassVar
from typing import Protocol


class DataclassInstance(Protocol):
"""
Type hint for a non-specific instance of a dataclass.
`DataclassReader` is an iterator over instances of the specified dataclass type. However, the
actual type is not known prior to instantiation. This `Protocol` is used to type hint the return
signature of `DataclassReader`'s `__next__` method.
https://stackoverflow.com/a/55240861
"""

__dataclass_fields__: ClassVar[dict[str, Any]]
37 changes: 6 additions & 31 deletions dataclass_io/reader.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,23 @@
from csv import DictReader
from dataclasses import dataclass
from dataclasses import fields
from dataclasses import is_dataclass
from io import TextIOWrapper
from pathlib import Path
from types import TracebackType
from typing import IO
from typing import Any
from typing import ClassVar
from typing import Optional
from typing import Protocol
from typing import TextIO
from typing import Type
from typing import TypeAlias

from dataclass_io.lib import assert_readable_dataclass
from dataclass_io.lib import assert_readable_file
from dataclass_io._lib.assertions import assert_dataclass_is_valid
from dataclass_io._lib.assertions import assert_file_is_readable
from dataclass_io._lib.dataclass_extensions import DataclassInstance

ReadableFileHandle: TypeAlias = TextIOWrapper | IO | TextIO


class DataclassInstance(Protocol):
"""
Type hint for a non-specific instance of a dataclass.
`DataclassReader` is an iterator over instances of the specified dataclass type. However, the
actual type is not known prior to instantiation. This `Protocol` is used to type hint the return
signature of `DataclassReader`'s `__next__` method.
https://stackoverflow.com/a/55240861
"""

__dataclass_fields__: ClassVar[dict[str, Any]]


@dataclass(frozen=True, kw_only=True)
class FileHeader:
"""
Expand All @@ -55,7 +39,7 @@ class DataclassReader:
def __init__(
self,
path: Path,
dataclass_type: type,
dataclass_type: type[DataclassInstance],
delimiter: str = "\t",
header_comment_char: str = "#",
**kwds: Any,
Expand All @@ -72,17 +56,8 @@ def __init__(
TypeError: If the provided type is not a dataclass.
"""

assert_readable_file(path)
assert_readable_dataclass(dataclass_type)

# NB: Somewhat annoyingly, when this validation is extracted into an external helper,
# mypy can no longer recognize that `self._dataclass_type` is a dataclass, and complains
# about the return type on `_row_to_dataclass`.
#
# I'm leaving `assert_readable_dataclass` in case we want to extend the definition of what
# it means to be a valid dataclass, but this is needed here to satisfy type checking.
if not is_dataclass(dataclass_type):
raise TypeError(f"The provided type must be a dataclass: {dataclass_type.__name__}")
assert_file_is_readable(path)
assert_dataclass_is_valid(dataclass_type)

self.dataclass_type = dataclass_type
self.delimiter = delimiter
Expand Down
Empty file added tests/_lib/__init__.py
Empty file.
30 changes: 16 additions & 14 deletions tests/test_lib.py → tests/_lib/test_assertions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@

import pytest

from dataclass_io.lib import assert_readable_dataclass
from dataclass_io.lib import assert_readable_file
from dataclass_io._lib.assertions import assert_dataclass_is_valid
from dataclass_io._lib.assertions import assert_file_is_readable


def test_assert_readable_dataclass() -> None:
def test_assert_dataclass_is_valid() -> None:
"""
Test that we can validate if a dataclass is valid for reading.
"""
Expand All @@ -18,12 +18,12 @@ class FakeDataclass:
bar: int

try:
assert_readable_dataclass(FakeDataclass)
assert_dataclass_is_valid(FakeDataclass)
except TypeError:
raise AssertionError("Failed to validate a valid dataclass") from None


def test_assert_readable_dataclass_raises_if_not_a_dataclass() -> None:
def test_assert_dataclass_is_valid_raises_if_not_a_dataclass() -> None:
"""
Test that we raise an error if the provided type is not a dataclass.
"""
Expand All @@ -33,10 +33,12 @@ class BadDataclass:
bar: int

with pytest.raises(TypeError, match="The provided type must be a dataclass: BadDataclass"):
assert_readable_dataclass(BadDataclass)
# mypy (correctly) flags that `BadDataclass` is not a dataclass.
# We still want to test that we can enforce this at runtime, so here it's ok to ignore.
assert_dataclass_is_valid(BadDataclass) # type: ignore[arg-type]


def test_assert_readable_file(tmp_path: Path) -> None:
def test_assert_file_is_readable(tmp_path: Path) -> None:
"""
Test that we can validate if a file is valid for reading.
"""
Expand All @@ -45,30 +47,30 @@ def test_assert_readable_file(tmp_path: Path) -> None:
fpath.touch()

try:
assert_readable_file(fpath)
assert_file_is_readable(fpath)
except Exception:
raise AssertionError("Failed to validate a valid file") from None


def test_assert_readable_file_raises_if_file_does_not_exist(tmp_path: Path) -> None:
def test_assert_file_is_readable_raises_if_file_does_not_exist(tmp_path: Path) -> None:
"""
Test that we can validate if a file does not exist.
"""

with pytest.raises(FileNotFoundError, match="The input file does not exist: "):
assert_readable_file(tmp_path / "does_not_exist.txt")
assert_file_is_readable(tmp_path / "does_not_exist.txt")


def test_assert_readable_file_raises_if_file_is_a_directory(tmp_path: Path) -> None:
def test_assert_file_is_readable_raises_if_file_is_a_directory(tmp_path: Path) -> None:
"""
Test that we can validate if a file does not exist.
"""

with pytest.raises(IsADirectoryError, match="The input file path is a directory: "):
assert_readable_file(tmp_path)
assert_file_is_readable(tmp_path)


def test_assert_readable_file_raises_if_file_is_unreadable(tmp_path: Path) -> None:
def test_assert_file_is_readable_raises_if_file_is_unreadable(tmp_path: Path) -> None:
"""
Test that we can validate if a file cannot be read.
"""
Expand All @@ -77,4 +79,4 @@ def test_assert_readable_file_raises_if_file_is_unreadable(tmp_path: Path) -> No
fpath.touch(0)

with pytest.raises(PermissionError, match="The input file is not readable: "):
assert_readable_file(fpath)
assert_file_is_readable(fpath)

0 comments on commit 3a99190

Please sign in to comment.