diff --git a/docs/models/schema.md b/docs/models/schema.md index 74ddc46..9c97a89 100644 --- a/docs/models/schema.md +++ b/docs/models/schema.md @@ -5,10 +5,10 @@ The Table Schema model allows to manipulate a Pydantic model in Python according ## Usage ```python -from dplib.models import Schema, Field +from dplib.models import Schema, IntegerField schema = Schema() -schema.add_field(Field(name='id', type='integer')) +schema.add_field(IntegerField(name='id')) schema.missingValues = ['-'] print(schema.to_text(format="json")) ``` @@ -28,7 +28,29 @@ print(schema.to_text(format="json")) ## Reference ::: dplib.models.Schema -::: dplib.models.Field -::: dplib.models.Constraints +::: dplib.models.IFieldsMatch ::: dplib.models.ForeignKey ::: dplib.models.ForeignKeyReference +::: dplib.models.BaseField +::: dplib.models.Field +::: dplib.models.AnyField +::: dplib.models.ArrayField +::: dplib.models.BooleanField +::: dplib.models.DateField +::: dplib.models.DatetimeField +::: dplib.models.DurationField +::: dplib.models.GeojsonField +::: dplib.models.GeopointField +::: dplib.models.IntegerField +::: dplib.models.ListField +::: dplib.models.NumberField +::: dplib.models.ObjectField +::: dplib.models.StringField +::: dplib.models.TimeField +::: dplib.models.YearField +::: dplib.models.YearmonthField +::: dplib.models.BaseConstraints +::: dplib.models.CollectionConstraints +::: dplib.models.JsonConstraints +::: dplib.models.StringConstraints +::: dplib.models.ValueConstraints diff --git a/dplib/models/__init__.py b/dplib/models/__init__.py index 92106af..f488116 100644 --- a/dplib/models/__init__.py +++ b/dplib/models/__init__.py @@ -1,8 +1,8 @@ from .contributor import Contributor from .dialect import Dialect -from .field import Constraints, Field +from .field import * from .license import License from .package import Package from .resource import Resource -from .schema import ForeignKey, ForeignKeyReference, Schema +from .schema import * from .source import Source diff --git a/dplib/models/field/__init__.py b/dplib/models/field/__init__.py index ea58d13..eec43e4 100644 --- a/dplib/models/field/__init__.py +++ b/dplib/models/field/__init__.py @@ -1,2 +1,4 @@ -from .constraints import Constraints +from .constraints import * +from .datatypes import * from .field import Field +from .types import IField diff --git a/dplib/models/field/__spec__/test_field.py b/dplib/models/field/__spec__/test_field.py index 73e63dd..ed5ff49 100644 --- a/dplib/models/field/__spec__/test_field.py +++ b/dplib/models/field/__spec__/test_field.py @@ -1,14 +1,14 @@ -from dplib.models import Field +from dplib.models import AnyField, IntegerField def test_field_defaults(): - field = Field() + field = AnyField() assert field.type == "any" assert field.missingValues == [""] def test_field_constraints(): - field = Field() + field = IntegerField() field.constraints.minimum = 1 assert field.constraints.minimum == 1 assert field.to_dict() == {"constraints": {"minimum": 1}} diff --git a/dplib/models/field/constraints.py b/dplib/models/field/constraints.py deleted file mode 100644 index 2035695..0000000 --- a/dplib/models/field/constraints.py +++ /dev/null @@ -1,21 +0,0 @@ -from __future__ import annotations - -from typing import Any, List, Optional - -from ... import types -from ...system import Model - - -class Constraints(Model): - required: Optional[bool] = None - unique: Optional[bool] = None - enum: Optional[List[Any]] = None - minimum: Optional[Any] = None - maximum: Optional[Any] = None - exclusiveMinimum: Optional[Any] = None - exclusiveMaximum: Optional[Any] = None - maximum: Optional[Any] = None - minLength: Optional[int] = None - maxLength: Optional[int] = None - jsonSchema: Optional[types.IData] = None - pattern: Optional[str] = None diff --git a/dplib/models/field/constraints/__init__.py b/dplib/models/field/constraints/__init__.py new file mode 100644 index 0000000..eb84ae8 --- /dev/null +++ b/dplib/models/field/constraints/__init__.py @@ -0,0 +1,5 @@ +from .base import BaseConstraints +from .collection import CollectionConstraints +from .json import JsonConstraints +from .string import StringConstraints +from .value import ValueConstraints diff --git a/dplib/models/field/constraints/base.py b/dplib/models/field/constraints/base.py new file mode 100644 index 0000000..a81879a --- /dev/null +++ b/dplib/models/field/constraints/base.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from typing import Generic, List, Optional, TypeVar, Union + +from ....system import Model + +NativeType = TypeVar("NativeType") + + +class BaseConstraints(Model, Generic[NativeType]): + required: Optional[bool] = None + unique: Optional[bool] = None + enum: Optional[List[Union[str, NativeType]]] = None diff --git a/dplib/models/field/constraints/collection.py b/dplib/models/field/constraints/collection.py new file mode 100644 index 0000000..aba3281 --- /dev/null +++ b/dplib/models/field/constraints/collection.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +from typing import Optional + +from .base import BaseConstraints + + +class CollectionConstraints(BaseConstraints[str]): + minLength: Optional[int] = None + maxLength: Optional[int] = None diff --git a/dplib/models/field/constraints/json.py b/dplib/models/field/constraints/json.py new file mode 100644 index 0000000..d9ca610 --- /dev/null +++ b/dplib/models/field/constraints/json.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from typing import Any, Dict, Optional + +from .collection import CollectionConstraints + + +class JsonConstraints(CollectionConstraints): + jsonSchema: Optional[Dict[str, Any]] = None diff --git a/dplib/models/field/constraints/string.py b/dplib/models/field/constraints/string.py new file mode 100644 index 0000000..193b108 --- /dev/null +++ b/dplib/models/field/constraints/string.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from typing import Optional + +from .collection import CollectionConstraints + + +class StringConstraints(CollectionConstraints): + pattern: Optional[str] = None diff --git a/dplib/models/field/constraints/value.py b/dplib/models/field/constraints/value.py new file mode 100644 index 0000000..c929655 --- /dev/null +++ b/dplib/models/field/constraints/value.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import Generic, Optional, TypeVar, Union + +from .base import BaseConstraints + +NativeType = TypeVar("NativeType") + + +# TODO: tweak serialization if needed +class ValueConstraints(BaseConstraints[NativeType], Generic[NativeType]): + minimum: Optional[Union[str, NativeType]] = None + maximum: Optional[Union[str, NativeType]] = None + exclusiveMinimum: Optional[Union[str, NativeType]] = None + exclusiveMaximum: Optional[Union[str, NativeType]] = None diff --git a/dplib/models/field/datatypes/__init__.py b/dplib/models/field/datatypes/__init__.py new file mode 100644 index 0000000..8ed1d40 --- /dev/null +++ b/dplib/models/field/datatypes/__init__.py @@ -0,0 +1,17 @@ +from .any import AnyField +from .array import ArrayField +from .base import BaseField +from .boolean import BooleanField +from .date import DateField +from .datetime import DatetimeField +from .duration import DurationField +from .geojson import GeojsonField +from .geopoint import GeopointField +from .integer import IntegerField +from .list import ListField +from .number import NumberField +from .object import ObjectField +from .string import StringField +from .time import TimeField +from .year import YearField +from .yearmonth import YearmonthField diff --git a/dplib/models/field/datatypes/any.py b/dplib/models/field/datatypes/any.py new file mode 100644 index 0000000..1e0a571 --- /dev/null +++ b/dplib/models/field/datatypes/any.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from typing import Literal, Optional + +import pydantic + +from ..constraints import BaseConstraints +from .base import BaseField + + +class AnyField(BaseField): + """The field contains values of a unspecified or mixed type.""" + + type: Literal["any"] = "any" + format: Optional[Literal["default"]] = None + constraints: BaseConstraints[str] = pydantic.Field(default_factory=BaseConstraints) diff --git a/dplib/models/field/datatypes/array.py b/dplib/models/field/datatypes/array.py new file mode 100644 index 0000000..ec30ca8 --- /dev/null +++ b/dplib/models/field/datatypes/array.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from typing import Literal, Optional + +import pydantic + +from ..constraints import JsonConstraints +from .base import BaseField + + +class ArrayField(BaseField): + """The field contains a valid JSON array.""" + + type: Literal["array"] = "array" + format: Optional[Literal["default"]] = None + constraints: JsonConstraints = pydantic.Field(default_factory=JsonConstraints) diff --git a/dplib/models/field/datatypes/base.py b/dplib/models/field/datatypes/base.py new file mode 100644 index 0000000..a29aa22 --- /dev/null +++ b/dplib/models/field/datatypes/base.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from typing import List, Optional + +import pydantic + +from .... import types +from ....system import Model + + +class BaseField(Model): + """Base Field""" + + name: Optional[str] = None + """ + The field descriptor MUST contain a name property. + """ + + title: Optional[str] = None + """ + A human readable label or title for the field + """ + + description: Optional[str] = None + """ + A description for this field e.g. “The recipient of the funds” + """ + + missingValues: List[str] = [""] + """ + A list of field values to consider as null values + """ + + # Compat + + @pydantic.model_validator(mode="before") + @classmethod + def compat(cls, data: types.IData): + if not isinstance(data, dict): # type: ignore + return data + + # field.format + format = data.get("format") + if format: + if format.startswith("fmt:"): + data["format"] = format[4:] + + return data diff --git a/dplib/models/field/datatypes/boolean.py b/dplib/models/field/datatypes/boolean.py new file mode 100644 index 0000000..580e387 --- /dev/null +++ b/dplib/models/field/datatypes/boolean.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from typing import List, Literal, Optional + +import pydantic + +from ..constraints import BaseConstraints +from .base import BaseField + + +class BooleanField(BaseField): + """The field contains boolean (true/false) data.""" + + type: Literal["boolean"] = "boolean" + format: Optional[Literal["default"]] = None + constraints: BaseConstraints[bool] = pydantic.Field(default_factory=BaseConstraints) + + trueValues: List[str] = ["true", "True", "TRUE", "1"] + """ + Values to be interpreted as “true” for boolean fields + """ + + falseValues: List[str] = ["false", "False", "FALSE", "0"] + """ + Values to be interpreted as “false” for boolean fields + """ diff --git a/dplib/models/field/datatypes/date.py b/dplib/models/field/datatypes/date.py new file mode 100644 index 0000000..9ecfa59 --- /dev/null +++ b/dplib/models/field/datatypes/date.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import datetime +from typing import Literal, Optional + +import pydantic + +from ..constraints import ValueConstraints +from .base import BaseField + + +class DateField(BaseField): + """he field contains a date without a time.""" + + type: Literal["date"] = "date" + format: Optional[str] = None + constraints: ValueConstraints[datetime.date] = pydantic.Field( + default_factory=ValueConstraints + ) diff --git a/dplib/models/field/datatypes/datetime.py b/dplib/models/field/datatypes/datetime.py new file mode 100644 index 0000000..e27251c --- /dev/null +++ b/dplib/models/field/datatypes/datetime.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import datetime +from typing import Literal, Optional + +import pydantic + +from ..constraints import ValueConstraints +from .base import BaseField + + +class DatetimeField(BaseField): + """The field contains a date with a time.""" + + type: Literal["datetime"] = "datetime" + format: Optional[str] = None + constraints: ValueConstraints[datetime.datetime] = pydantic.Field( + default_factory=ValueConstraints + ) diff --git a/dplib/models/field/datatypes/duration.py b/dplib/models/field/datatypes/duration.py new file mode 100644 index 0000000..0648079 --- /dev/null +++ b/dplib/models/field/datatypes/duration.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from typing import Literal, Optional + +import pydantic + +from ..constraints import ValueConstraints +from .base import BaseField + + +class DurationField(BaseField): + """The field contains a duration of time.""" + + type: Literal["duration"] = "duration" + format: Optional[Literal["default"]] = None + constraints: ValueConstraints[str] = pydantic.Field(default_factory=ValueConstraints) diff --git a/dplib/models/field/datatypes/geojson.py b/dplib/models/field/datatypes/geojson.py new file mode 100644 index 0000000..fcf99f9 --- /dev/null +++ b/dplib/models/field/datatypes/geojson.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from typing import Literal, Optional, Union + +import pydantic + +from ..constraints import BaseConstraints +from .base import BaseField + +IGeojsonFormat = Union[ + Literal["default"], + Literal["topojson"], +] + + +class GeojsonField(BaseField): + """The field contains a JSON object according to GeoJSON or TopoJSON spec.""" + + type: Literal["geojson"] = "geojson" + format: Optional[IGeojsonFormat] = None + constraints: BaseConstraints[str] = pydantic.Field(default_factory=BaseConstraints) diff --git a/dplib/models/field/datatypes/geopoint.py b/dplib/models/field/datatypes/geopoint.py new file mode 100644 index 0000000..f35369d --- /dev/null +++ b/dplib/models/field/datatypes/geopoint.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from typing import Literal, Optional, Union + +import pydantic + +from ..constraints import BaseConstraints +from .base import BaseField + +IGeojsonFormat = Union[ + Literal["default"], + Literal["array"], + Literal["object"], +] + + +class GeopointField(BaseField): + """The field contains data describing a geographic point.""" + + type: Literal["geopoint"] = "geopoint" + format: Optional[IGeojsonFormat] = None + constraints: BaseConstraints[str] = pydantic.Field(default_factory=BaseConstraints) diff --git a/dplib/models/field/datatypes/integer.py b/dplib/models/field/datatypes/integer.py new file mode 100644 index 0000000..7e584a7 --- /dev/null +++ b/dplib/models/field/datatypes/integer.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from typing import Literal, Optional + +import pydantic + +from ..constraints import ValueConstraints +from .base import BaseField + + +class IntegerField(BaseField): + """The field contains integers - that is whole numbers.""" + + type: Literal["integer"] = "integer" + format: Optional[Literal["default"]] = None + constraints: ValueConstraints[int] = pydantic.Field(default_factory=ValueConstraints) + + groupChar: Optional[str] = None + """ + String whose value is used to group digits for integer/number fields + """ + + bareNumber: bool = True + """ + If false leading and trailing non numbers will be removed for integer/number fields + """ diff --git a/dplib/models/field/datatypes/list.py b/dplib/models/field/datatypes/list.py new file mode 100644 index 0000000..aef1cc2 --- /dev/null +++ b/dplib/models/field/datatypes/list.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import Literal, Optional, Union + +import pydantic + +from ..constraints import CollectionConstraints +from .base import BaseField + +IItemType = Union[ + Literal["boolean"], + Literal["date"], + Literal["datetime"], + Literal["integer"], + Literal["number"], + Literal["string"], + Literal["time"], +] + + +class ListField(BaseField): + """The field contains data that is an ordered + one-level depth collection of primitive values with a fixed item type. + """ + + type: Literal["list"] = "list" + format: Optional[Literal["default"]] = None + constraints: CollectionConstraints = pydantic.Field( + default_factory=CollectionConstraints + ) + + delimiter: Optional[str] = None + """ + Specifies the character sequence which separates lexically represented list items. + """ + + itemType: Optional[IItemType] = None + """ + Specifies the list item type in terms of existent Table Schema types. + """ diff --git a/dplib/models/field/datatypes/number.py b/dplib/models/field/datatypes/number.py new file mode 100644 index 0000000..a238a3e --- /dev/null +++ b/dplib/models/field/datatypes/number.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from typing import Literal, Optional + +import pydantic + +from ..constraints import ValueConstraints +from .base import BaseField + + +class NumberField(BaseField): + """The field contains numbers of any kind including decimals.""" + + type: Literal["number"] = "number" + format: Optional[Literal["default"]] = None + constraints: ValueConstraints[float] = pydantic.Field( + default_factory=ValueConstraints + ) + + decimalChar: str = "." + """ + String whose value is used to represent a decimal point for number fields + """ + + groupChar: Optional[str] = None + """ + String whose value is used to group digits for integer/number fields + """ + + bareNumber: bool = True + """ + If false leading and trailing non numbers will be removed for integer/number fields + """ diff --git a/dplib/models/field/datatypes/object.py b/dplib/models/field/datatypes/object.py new file mode 100644 index 0000000..427a342 --- /dev/null +++ b/dplib/models/field/datatypes/object.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from typing import Literal, Optional + +import pydantic + +from ..constraints import JsonConstraints +from .base import BaseField + + +class ObjectField(BaseField): + """The field contains a valid JSON object.""" + + type: Literal["object"] = "object" + format: Optional[Literal["default"]] = None + constraints: JsonConstraints = pydantic.Field(default_factory=JsonConstraints) diff --git a/dplib/models/field/datatypes/string.py b/dplib/models/field/datatypes/string.py new file mode 100644 index 0000000..8e26c6c --- /dev/null +++ b/dplib/models/field/datatypes/string.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing import Literal, Optional, Union + +import pydantic + +from ..constraints import StringConstraints +from .base import BaseField + +IStringFormat = Union[ + Literal["binary"], + Literal["default"], + Literal["email"], + Literal["uri"], + Literal["uuid"], +] + + +class StringField(BaseField): + """The field contains strings, that is, sequences of characters.""" + + type: Literal["string"] = "string" + format: Optional[IStringFormat] = None + constraints: StringConstraints = pydantic.Field(default_factory=StringConstraints) diff --git a/dplib/models/field/datatypes/time.py b/dplib/models/field/datatypes/time.py new file mode 100644 index 0000000..1f24ad3 --- /dev/null +++ b/dplib/models/field/datatypes/time.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import datetime +from typing import Literal, Optional + +import pydantic + +from ..constraints import ValueConstraints +from .base import BaseField + + +class TimeField(BaseField): + """The field contains a time without a date.""" + + type: Literal["time"] = "time" + format: Optional[str] = None + constraints: ValueConstraints[datetime.time] = pydantic.Field( + default_factory=ValueConstraints + ) diff --git a/dplib/models/field/datatypes/year.py b/dplib/models/field/datatypes/year.py new file mode 100644 index 0000000..135352d --- /dev/null +++ b/dplib/models/field/datatypes/year.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from typing import Literal, Optional + +import pydantic + +from ..constraints import ValueConstraints +from .base import BaseField + + +class YearField(BaseField): + """The field contains a calendar year.""" + + type: Literal["year"] = "year" + format: Optional[Literal["default"]] = None + constraints: ValueConstraints[int] = pydantic.Field(default_factory=ValueConstraints) diff --git a/dplib/models/field/datatypes/yearmonth.py b/dplib/models/field/datatypes/yearmonth.py new file mode 100644 index 0000000..be205bf --- /dev/null +++ b/dplib/models/field/datatypes/yearmonth.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from typing import Literal, Optional + +import pydantic + +from ..constraints import ValueConstraints +from .base import BaseField + + +class YearmonthField(BaseField): + """The field contains a specific month of a specific year.""" + + type: Literal["yearmonth"] = "yearmonth" + format: Optional[Literal["default"]] = None + constraints: ValueConstraints[str] = pydantic.Field(default_factory=ValueConstraints) diff --git a/dplib/models/field/field.py b/dplib/models/field/field.py index 410675d..3b0f821 100644 --- a/dplib/models/field/field.py +++ b/dplib/models/field/field.py @@ -1,111 +1,16 @@ from __future__ import annotations -from typing import List, Optional +from typing import Literal, Optional import pydantic -from ... import types -from ...system import Model -from .constraints import Constraints -from .types import IFieldType, IItemType +from .constraints import BaseConstraints +from .datatypes import BaseField -class Field(Model): - """Table Schema Field model""" +class Field(BaseField): + """Field with unspecified type.""" - name: Optional[str] = None - """ - The field descriptor MUST contain a name property. - """ - - type: IFieldType = "any" - """ - A field’s type property is a string indicating the type of this field. - """ - - format: Optional[str] = None - """ - A field’s format property is a string, indicating a format for the field type. - """ - - title: Optional[str] = None - """ - A human readable label or title for the field - """ - - description: Optional[str] = None - """ - A description for this field e.g. “The recipient of the funds” - """ - - missingValues: List[str] = [""] - """ - A list of field values to consider as null values - """ - - constraints: Constraints = pydantic.Field(default_factory=Constraints) - """ - The constraints property on Table Schema Fields can be used by consumers - to list constraints for validating field values. - """ - - # Array - - # Boolean - - trueValues: Optional[List[str]] = None - """ - Values to be interpreted as “true” for boolean fields - """ - - falseValues: Optional[List[str]] = None - """ - Values to be interpreted as “false” for boolean fields - """ - - # Integer/Number - - bareNumber: Optional[bool] = None - """ - If false leading and trailing non numbers will be removed for integer/number fields - """ - - groupChar: Optional[str] = None - """ - String whose value is used to group digits for integer/number fields - """ - - # List - - delimiter: Optional[str] = None - """ - Specifies the character sequence which separates lexically represented list items. - """ - - itemType: Optional[IItemType] = None - """ - Specifies the list item type in terms of existent Table Schema types. - """ - - # Number - - decimalChar: Optional[str] = None - """ - String whose value is used to represent a decimal point for number fields - """ - - # Compat - - @pydantic.model_validator(mode="before") - @classmethod - def compat(cls, data: types.IData): - if not isinstance(data, dict): # type: ignore - return data - - # field.format - format = data.get("format") - if format: - if format.startswith("fmt:"): - data["format"] = format[4:] - - return data + type: Literal[None] = None + format: Optional[Literal["default"]] = None + constraints: BaseConstraints[str] = pydantic.Field(default_factory=BaseConstraints) diff --git a/dplib/models/field/types.py b/dplib/models/field/types.py index a6af949..523f38f 100644 --- a/dplib/models/field/types.py +++ b/dplib/models/field/types.py @@ -1,32 +1,32 @@ from __future__ import annotations -from typing import Literal, Union +from typing import Union -IFieldType = Union[ - Literal["any"], - Literal["array"], - Literal["boolean"], - Literal["date"], - Literal["datetime"], - Literal["duration"], - Literal["geojson"], - Literal["geopoint"], - Literal["integer"], - Literal["list"], - Literal["number"], - Literal["object"], - Literal["string"], - Literal["time"], - Literal["year"], - Literal["yearmonth"], -] +import pydantic +from typing_extensions import Annotated + +from . import datatypes +from .field import Field -IItemType = Union[ - Literal["boolean"], - Literal["date"], - Literal["datetime"], - Literal["integer"], - Literal["number"], - Literal["string"], - Literal["time"], +IField = Annotated[ + Union[ + Field, + datatypes.AnyField, + datatypes.ArrayField, + datatypes.BooleanField, + datatypes.DateField, + datatypes.DatetimeField, + datatypes.DurationField, + datatypes.GeojsonField, + datatypes.GeopointField, + datatypes.IntegerField, + datatypes.ListField, + datatypes.NumberField, + datatypes.ObjectField, + datatypes.StringField, + datatypes.TimeField, + datatypes.YearField, + datatypes.YearmonthField, + ], + pydantic.Field(discriminator="type"), ] diff --git a/dplib/models/resource/datatypes/__init__.py b/dplib/models/resource/datatypes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dplib/models/resource/datatypes/table.py b/dplib/models/resource/datatypes/table.py new file mode 100644 index 0000000..6d8e5ea --- /dev/null +++ b/dplib/models/resource/datatypes/table.py @@ -0,0 +1 @@ +# TODO: implement (see `field`) diff --git a/dplib/models/schema/__init__.py b/dplib/models/schema/__init__.py index 59f53a2..cb3070b 100644 --- a/dplib/models/schema/__init__.py +++ b/dplib/models/schema/__init__.py @@ -1,2 +1,3 @@ from .foreignKey import ForeignKey, ForeignKeyReference from .schema import Schema +from .types import IFieldsMatch diff --git a/dplib/models/schema/__spec__/test_schema.py b/dplib/models/schema/__spec__/test_schema.py index 636437d..0a95204 100644 --- a/dplib/models/schema/__spec__/test_schema.py +++ b/dplib/models/schema/__spec__/test_schema.py @@ -1,7 +1,7 @@ import pytest from pydantic import ValidationError -from dplib.models import Field, Schema +from dplib.models import IntegerField, Schema def test_schema_from_path(): @@ -54,7 +54,7 @@ def test_schema_set_proprty_invalid(): def test_schema_add_field(): schema = Schema() - schema.add_field(Field(name="id", type="integer")) + schema.add_field(IntegerField(name="id")) field = schema.get_field(name="id") assert field assert field.name == "id" diff --git a/dplib/models/schema/schema.py b/dplib/models/schema/schema.py index ed3e04a..590da06 100644 --- a/dplib/models/schema/schema.py +++ b/dplib/models/schema/schema.py @@ -6,7 +6,7 @@ from ... import settings, types from ...system import Model -from ..field import Field +from ..field import IField from .foreignKey import ForeignKey from .types import IFieldsMatch @@ -31,7 +31,7 @@ class Schema(Model): this also allows for simple plain text as plain text is itself valid markdown. """ - fields: List[Field] = [] + fields: List[IField] = [] """ List of fields in the table schema """ @@ -68,7 +68,7 @@ class Schema(Model): # Getters - def get_field(self, *, name: Optional[str] = None) -> Optional[Field]: + def get_field(self, *, name: Optional[str] = None) -> Optional[IField]: """Get a field by name Parameters: @@ -100,12 +100,12 @@ def get_field_types(self) -> List[str]: """ types: List[str] = [] for field in self.fields: - types.append(field.type) + types.append(field.type or "any") return types # Setters - def add_field(self, field: Field): + def add_field(self, field: IField): """Add a field to the schema Parameters: diff --git a/dplib/plugins/pandas/models/field.py b/dplib/plugins/pandas/models/field.py index 6571f8f..e87315c 100644 --- a/dplib/plugins/pandas/models/field.py +++ b/dplib/plugins/pandas/models/field.py @@ -8,8 +8,8 @@ import pandas as pd import pandas.core.dtypes.api as pdc +from dplib import models from dplib.error import Error -from dplib.models import Field from dplib.system import Model @@ -22,43 +22,46 @@ class PandasField(Model, arbitrary_types_allowed=True): # Converters - def to_dp(self) -> Field: + def to_dp(self) -> models.IField: """Convert to Table Schema Field Returns: Table Schema Field """ - field = Field(name=self.name) # Type + Field = models.Field if pdc.is_bool_dtype(self.dtype): # type: ignore - field.type = "boolean" + Field = models.BooleanField elif pdc.is_datetime64_any_dtype(self.dtype): # type: ignore - field.type = "datetime" + Field = models.DatetimeField elif pdc.is_integer_dtype(self.dtype): # type: ignore - field.type = "integer" + Field = models.IntegerField elif pdc.is_numeric_dtype(self.dtype): # type: ignore - field.type = "number" + Field = models.NumberField elif self.dvalue is not None: if isinstance(self.dvalue, (list, tuple)): # type: ignore - field.type = "array" + Field = models.ArrayField elif isinstance(self.dvalue, datetime.datetime): - field.type = "datetime" + Field = models.DatetimeField elif isinstance(self.dvalue, datetime.date): - field.type = "date" + Field = models.DateField elif isinstance(self.dvalue, isodate.Duration): # type: ignore - field.type = "duration" + Field = models.DurationField elif isinstance(self.dvalue, dict): - field.type = "object" + Field = models.ObjectField elif isinstance(self.dvalue, str): - field.type = "string" + Field = models.StringField elif isinstance(self.dvalue, datetime.time): - field.type = "time" + Field = models.TimeField + + # Name + field = Field(name=self.name) return field @classmethod - def from_dp(cls, field: Field) -> PandasField: + def from_dp(cls, field: models.IField) -> PandasField: """Create Pandas Field from Table Schema Field Parameters: diff --git a/dplib/plugins/polars/models/field.py b/dplib/plugins/polars/models/field.py index 84bd9c0..9b719ee 100644 --- a/dplib/plugins/polars/models/field.py +++ b/dplib/plugins/polars/models/field.py @@ -4,8 +4,8 @@ import polars as pl +from dplib import models from dplib.error import Error -from dplib.models import Field from dplib.system import Model @@ -18,39 +18,43 @@ class PolarsField(Model, arbitrary_types_allowed=True): # Converters - def to_dp(self) -> Field: + def to_dp(self) -> models.IField: """Convert to Table Schema Field Returns: Table Schema Field """ - field = Field(name=self.name) + # Type + Field = models.Field if self.dtype in ARRAY_TYPES: - field.type = "array" + Field = models.ArrayField elif self.dtype in BOOLEAN_TYPES: - field.type = "boolean" + Field = models.BooleanField elif self.dtype in DATE_TYPES: - field.type = "date" + Field = models.DateField elif self.dtype in DATETIME_TYPES: - field.type = "datetime" + Field = models.DatetimeField elif self.dtype in DURATION_TYPES: - field.type = "duration" + Field = models.DurationField elif self.dtype in INTEGER_TYPES: - field.type = "integer" + Field = models.IntegerField elif self.dtype in NUMBER_TYPES: - field.type = "number" + Field = models.NumberField elif self.dtype in OBJECT_TYPES: - field.type = "object" + Field = models.ObjectField elif self.dtype in STRING_TYPES: - field.type = "string" + Field = models.StringField elif self.dtype in TIME_TYPES: - field.type = "time" + Field = models.TimeField + + # Name + field = Field(name=self.name) return field @classmethod - def from_dp(cls, field: Field) -> PolarsField: + def from_dp(cls, field: models.IField) -> PolarsField: """Create Polars Field from Table Schema Field Parameters: diff --git a/dplib/plugins/sql/models/__spec__/test_schema.py b/dplib/plugins/sql/models/__spec__/test_schema.py index 21167eb..709cd42 100644 --- a/dplib/plugins/sql/models/__spec__/test_schema.py +++ b/dplib/plugins/sql/models/__spec__/test_schema.py @@ -1,7 +1,7 @@ import sqlalchemy as sa from sqlalchemy.dialects import postgresql as pg -from dplib.models import Field, Schema +from dplib.models import Schema, StringField from dplib.plugins.sql.models import SqlSchema @@ -93,7 +93,7 @@ def test_sql_schema_to_dp_round_trip(): def test_sql_schema_from_dp_with_string_constraints(): - field = Field(name="string", type="string") + field = StringField(name="string") field.constraints.minLength = 1 field.constraints.enum = ["a", "b", "c"] field.constraints.pattern = "^[a-z]+$" diff --git a/dplib/plugins/sql/models/field.py b/dplib/plugins/sql/models/field.py index a7ddc6c..3ad2438 100644 --- a/dplib/plugins/sql/models/field.py +++ b/dplib/plugins/sql/models/field.py @@ -10,7 +10,7 @@ from sqlalchemy.dialects import registry from sqlalchemy.schema import Column -from dplib.models import Field +from dplib import models from dplib.system import Model from . import settings @@ -23,33 +23,35 @@ class SqlField(Model, arbitrary_types_allowed=True): # Converters - def to_dp(self) -> Field: + def to_dp(self) -> models.IField: """Convert to Table Schema Field Returns: Table Schema Field """ - field = Field(name=self.column.name) - # Type + Field = models.Field if isinstance(self.column.type, ARRAY_TYPES): - field.type = "array" + Field = models.ArrayField elif isinstance(self.column.type, BOOLEAN_TYPES): - field.type = "boolean" + Field = models.BooleanField elif isinstance(self.column.type, DATE_TYPES): - field.type = "date" + Field = models.DateField elif isinstance(self.column.type, DATETIME_TYPES): - field.type = "datetime" + Field = models.DatetimeField elif isinstance(self.column.type, INTEGER_TYPES): - field.type = "integer" + Field = models.IntegerField elif isinstance(self.column.type, NUMBER_TYPES): - field.type = "number" + Field = models.NumberField elif isinstance(self.column.type, OBJECT_TYPES): - field.type = "object" - elif isinstance(self.column.type, TEXT_TYPES): - field.type = "string" + Field = models.ObjectField + elif isinstance(self.column.type, STRING_TYPES): + Field = models.StringField elif isinstance(self.column.type, TIME_TYPES): - field.type = "time" + Field = models.TimeField + + # Name + field = Field(name=self.column.name) # Description if self.column.comment: @@ -58,22 +60,23 @@ def to_dp(self) -> Field: # Constraints if not self.column.nullable: field.constraints.required = True - if isinstance(self.column.type, (sa.CHAR, sa.VARCHAR)): - if self.column.type.length: - field.constraints.maxLength = self.column.type.length - if isinstance(self.column.type, sa.CHAR): - if self.column.type.length: - field.constraints.minLength = self.column.type.length if isinstance(self.column.type, sa.Enum): if self.column.enums: field.constraints.enum = self.column.enums + if isinstance(field, models.StringField): + if isinstance(self.column.type, (sa.CHAR, sa.VARCHAR)): + if self.column.type.length: + field.constraints.maxLength = self.column.type.length + if isinstance(self.column.type, sa.CHAR): + if self.column.type.length: + field.constraints.minLength = self.column.type.length return field @classmethod def from_dp( cls, - field: Field, + field: models.IField, *, dialect: str = settings.DEFAULT_DIALECT, table_name: Optional[str] = None, @@ -148,7 +151,7 @@ def from_dp( checks.append(Check("LENGTH(%s) >= %s" % (quoted_name, min))) # Limit contstraints - if field.type in ["integer", "number"]: + if isinstance(field, (models.IntegerField, models.NumberField)): min = field.constraints.minimum max = field.constraints.maximum if min is not None: @@ -193,5 +196,4 @@ def from_dp( NUMBER_TYPES = (sa.Float, sa.Numeric) # type: ignore STRING_TYPES = (ml.BIT, ml.VARBINARY, ml.VARCHAR, pg.UUID, sa.Text, sa.VARCHAR) # type: ignore OBJECT_TYPES = (pg.JSONB, pg.JSON) -TEXT_TYPES = (sa.Text,) TIME_TYPES = (sa.Time,) diff --git a/pyproject.toml b/pyproject.toml index b211bd4..03db7af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,8 +90,8 @@ dependencies = [ coverage = [ "sensible-browser coverage/index.html", ] -build = [ - "mkdocs build", +docs = [ + "mkdocs serve", ] format = [ "ruff check --fix", @@ -110,9 +110,6 @@ release = [ git push --follow-tags """ ] -serve = [ - "mkdocs serve", -] spec = [ "pytest --cov dplib --cov-report term-missing --cov-report html:coverage --cov-fail-under 0 --timeout=300", ]