Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
2f9a8ac
fix pylint warnings
ericvergnaud Mar 13, 2025
8e62ca4
add support for marshalling 'object' and 'any'
ericvergnaud Mar 13, 2025
15952ca
also support 'Any'
ericvergnaud Mar 13, 2025
a371465
lowercase 'any' is not a type
ericvergnaud Mar 14, 2025
5d16b1f
Merge branch 'main' into support-marshalling-of-object-and-any
asnare May 2, 2025
a7f0eb5
fix issue where genuine strings were converted to bool within a union
ericvergnaud May 15, 2025
43e7524
fix unmarshalling of complex unions
ericvergnaud May 15, 2025
179e504
Merge branch 'support-marshalling-of-object-and-any' of github.com:da…
ericvergnaud May 15, 2025
af2107f
fix crash when unmarshalling complex unions
ericvergnaud May 15, 2025
353233c
Merge branch 'main' into support-marshalling-of-object-and-any
ericvergnaud May 15, 2025
d9d2e7e
add tests
ericvergnaud May 19, 2025
9df6876
make weak types support optional for testing
ericvergnaud May 19, 2025
26aa538
fix unmarshalling of raw list
ericvergnaud May 19, 2025
10dd453
fix unmarshalling of raw dict
ericvergnaud May 19, 2025
5eab803
more scenarios
ericvergnaud May 19, 2025
d5a5cbd
more scenarios
ericvergnaud May 19, 2025
d86e651
fix unmarshalling of raw list
ericvergnaud May 19, 2025
efd6028
fix unmarshalling of raw dict
ericvergnaud May 19, 2025
4b9ab32
Merge branch 'support-marshalling-of-object-and-any' into marshalling…
ericvergnaud May 19, 2025
5512c27
more flags and scenarios
ericvergnaud May 19, 2025
83872af
fix marshalling of None
ericvergnaud May 19, 2025
80c9de9
fix marshalling of None
ericvergnaud May 19, 2025
f2c9268
fix marshalling of None
ericvergnaud May 19, 2025
d731738
fix marshalling of None
ericvergnaud May 19, 2025
ad2dee0
Merge branch 'support-marshalling-of-object-and-any' into marshalling…
ericvergnaud May 19, 2025
f962a1f
rename
ericvergnaud May 19, 2025
8e9ed4c
handle invalid conversion
ericvergnaud May 19, 2025
3cf14ba
handle invalid conversion
ericvergnaud May 19, 2025
2a7645f
fix crasher with union type
ericvergnaud May 19, 2025
7fbd299
fix incorrect test
ericvergnaud May 19, 2025
bc84426
formatting
ericvergnaud May 19, 2025
92c42d0
Merge branch 'support-marshalling-of-object-and-any' into marshalling…
ericvergnaud May 19, 2025
f26c031
more scenarios
ericvergnaud May 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 99 additions & 24 deletions src/databricks/labs/blueprint/installation.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class Installation:

T = TypeVar("T")
_PRIMITIVES = (int, bool, float, str)
allow_raw_types = True
allow_weak_types = True

def __init__(self, ws: WorkspaceClient, product: str, *, install_folder: str | None = None):
"""The `Installation` class constructor creates an `Installation` object for the given product in
Expand Down Expand Up @@ -469,19 +471,27 @@ def _get_list_type_ref(inst: T) -> type[list[T]]:
item_type = type(from_list[0]) # type: ignore[misc]
return list[item_type] # type: ignore[valid-type]

# pylint: disable=too-complex
def _marshal(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, bool]:
"""The `_marshal` method is a private method that is used to serialize an object of type `type_ref` to
a dictionary. This method is called by the `save` method."""
if inst is None:
return None, False
none_allowed = type_ref is types.NoneType or (isinstance(type_ref, types.UnionType) and types.NoneType in get_args(type_ref))
return None, none_allowed
if isinstance(inst, databricks.sdk.core.Config):
return self._marshal_databricks_config(inst)
if hasattr(inst, "as_dict"):
return inst.as_dict(), True
if dataclasses.is_dataclass(type_ref):
return self._marshal_dataclass(type_ref, path, inst)
if type_ref == list:
return self._marshal_list(type_ref, path, inst)
if self.allow_raw_types:
if type_ref == list:
return self._marshal_raw_list(path, inst)
if type_ref == dict:
return self._marshal_raw_dict(path, inst)
if self.allow_weak_types:
if type_ref in (object, Any):
return self._marshal(type(inst), path, inst)
if isinstance(type_ref, enum.EnumMeta):
return self._marshal_enum(inst)
if type_ref == types.NoneType:
Expand Down Expand Up @@ -523,8 +533,8 @@ def _marshal_generic(self, type_ref: type, path: list[str], inst: Any) -> tuple[
if not type_args:
raise SerdeError(f"Missing type arguments: {type_args}")
if len(type_args) == 2:
return self._marshal_dict(type_args[1], path, inst)
return self._marshal_list(type_args[0], path, inst)
return self._marshal_generic_dict(type_args[1], path, inst)
return self._marshal_generic_list(type_args[0], path, inst)

@staticmethod
def _marshal_generic_alias(type_ref, inst):
Expand All @@ -534,29 +544,54 @@ def _marshal_generic_alias(type_ref, inst):
return None, False
return inst, isinstance(inst, type_ref.__origin__) # type: ignore[attr-defined]

def _marshal_list(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, bool]:
"""The `_marshal_list` method is a private method that is used to serialize an object of type `type_ref` to
a dictionary. This method is called by the `save` method."""
def _marshal_generic_list(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, bool]:
"""The `_marshal_generic_list` method is a private method that is used to serialize an object of type list[type_ref] to
an array. This method is called by the `save` method."""
as_list = []
if not isinstance(inst, list):
return None, False
for i, v in enumerate(inst):
value, ok = self._marshal(type_ref, [*path, f"{i}"], v)
if not ok:
raise SerdeError(self._explain_why(type_ref, [*path, f"{i}"], v))
raise SerdeError(self._explain_why(type(v), [*path, f"{i}"], v))
as_list.append(value)
return as_list, True

def _marshal_raw_list(self, path: list[str], inst: Any) -> tuple[Any, bool]:
"""The `_marshal_raw_list` method is a private method that is used to serialize an object of type list to
an array. This method is called by the `save` method."""
as_list = []
if not isinstance(inst, list):
return None, False
for i, v in enumerate(inst):
value, ok = self._marshal(type(v), [*path, f"{i}"], v)
if not ok:
raise SerdeError(self._explain_why(type(v), [*path, f"{i}"], v))
as_list.append(value)
return as_list, True

def _marshal_dict(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, bool]:
"""The `_marshal_dict` method is a private method that is used to serialize an object of type `type_ref` to
def _marshal_generic_dict(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, bool]:
"""The `_marshal_generic_dict` method is a private method that is used to serialize an object of type dict[str, type_ref] to
a dictionary. This method is called by the `save` method."""
if not isinstance(inst, dict):
return None, False
as_dict = {}
for k, v in inst.items():
as_dict[k], ok = self._marshal(type_ref, [*path, k], v)
if not ok:
raise SerdeError(self._explain_why(type_ref, [*path, k], v))
raise SerdeError(self._explain_why(type(v), [*path, k], v))
return as_dict, True

def _marshal_raw_dict(self, path: list[str], inst: Any) -> tuple[Any, bool]:
"""The `_marshal_raw_dict` method is a private method that is used to serialize an object of type dict to
a dictionary. This method is called by the `save` method."""
if not isinstance(inst, dict):
return None, False
as_dict = {}
for k, v in inst.items():
as_dict[k], ok = self._marshal(type(v), [*path, k], v)
if not ok:
raise SerdeError(self._explain_why(type(v), [*path, k], v))
return as_dict, True

def _marshal_dataclass(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, bool]:
Expand Down Expand Up @@ -616,6 +651,8 @@ def from_dict(cls, raw: dict):
def _unmarshal(cls, inst: Any, path: list[str], type_ref: type[T]) -> T | None:
"""The `_unmarshal` method is a private method that is used to deserialize a dictionary to an object of type
`type_ref`. This method is called by the `load` method."""
if type_ref == types.NoneType:
return None
if dataclasses.is_dataclass(type_ref):
return cls._unmarshal_dataclass(inst, path, type_ref)
if isinstance(type_ref, enum.EnumMeta):
Expand All @@ -624,12 +661,14 @@ def _unmarshal(cls, inst: Any, path: list[str], type_ref: type[T]) -> T | None:
return type_ref(inst)
if type_ref in cls._PRIMITIVES:
return cls._unmarshal_primitive(inst, type_ref)
if type_ref == list:
return cls._unmarshal_list(inst, path, Any)
if type_ref == dict:
return cls._unmarshal_dict(inst, path, Any)
if type_ref == databricks.sdk.core.Config:
if not inst:
inst = {}
return databricks.sdk.core.Config(**inst) # type: ignore[return-value]
if type_ref == types.NoneType:
return None
if isinstance(type_ref, cls._FromDict):
return type_ref.from_dict(inst)
return cls._unmarshal_generic_types(type_ref, path, inst)
Expand All @@ -646,8 +685,23 @@ def _unmarshal_generic_types(cls, type_ref, path, inst):
return cls._unmarshal_union(inst, path, type_ref)
if isinstance(type_ref, (_GenericAlias, types.GenericAlias)):
return cls._unmarshal_generic(inst, path, type_ref)
if cls.allow_weak_types and type_ref in (object, Any):
return cls._unmarshal_object(inst, path)
raise SerdeError(f'{".".join(path)}: unknown: {type_ref}: {inst}')

@classmethod
def _unmarshal_object(cls, inst, path):
if inst is None:
return None
if isinstance(inst, (bool, int, float, str)):
return cls._unmarshal_primitive(inst, type(inst))
if cls.allow_raw_types:
if isinstance(inst, list):
return cls._unmarshal_list(inst, path, object)
if isinstance(inst, dict):
return cls._unmarshal_dict(inst, path, object)
raise SerdeError(f'{".".join(path)}: unknown: {type(inst)}: {inst}')

@classmethod
def _unmarshal_dataclass(cls, inst, path, type_ref):
"""The `_unmarshal_dataclass` method is a private method that is used to deserialize a dictionary to an object
Expand Down Expand Up @@ -682,9 +736,14 @@ def _unmarshal_union(cls, inst, path, type_ref):
"""The `_unmarshal_union` method is a private method that is used to deserialize a dictionary to an object
of type `type_ref`. This method is called by the `load` method."""
for variant in get_args(type_ref):
value = cls._unmarshal(inst, path, variant)
if value:
return value
if variant == type(None) and inst is None:
return None
try:
value = cls._unmarshal(inst, path, variant)
if value is not None:
return value
except SerdeError:
pass
return None

@classmethod
Expand All @@ -706,14 +765,16 @@ def _unmarshal_generic(cls, inst, path, type_ref):
return cls._unmarshal_list(inst, path, type_args[0])

@classmethod
def _unmarshal_list(cls, inst, path, hint):
"""The `_unmarshal_list` method is a private method that is used to deserialize a dictionary to an object
def _unmarshal_list(cls, inst, path, type_ref):
"""The `_unmarshal_list` method is a private method that is used to deserialize an array to a list
of type `type_ref`. This method is called by the `load` method."""
if inst is None:
return None
if not isinstance(inst, list):
raise SerdeError(cls._explain_why(type_ref, path, inst))
as_list = []
for i, v in enumerate(inst):
as_list.append(cls._unmarshal(v, [*path, f"{i}"], hint))
as_list.append(cls._unmarshal(v, [*path, f"{i}"], type_ref or type(v)))
return as_list

@classmethod
Expand All @@ -733,10 +794,23 @@ def _unmarshal_dict(cls, inst, path, type_ref):
def _unmarshal_primitive(cls, inst, type_ref):
"""The `_unmarshal_primitive` method is a private method that is used to deserialize a dictionary to an object
of type `type_ref`. This method is called by the `load` method."""
if not inst:
if inst is None:
return None
if isinstance(inst, type_ref):
return inst
# convert from str to int if necessary
converted = type_ref(inst) # type: ignore[call-arg]
converted = inst
# convert from str
if isinstance(inst, str):
if type_ref in (int, float):
try:
converted = type_ref(inst) # type: ignore[call-arg]
except ValueError as exc:
raise SerdeError(f"Not a number {inst}!") from exc
elif type_ref == bool:
if inst.lower() == "true":
converted = True
elif inst.lower() == "false":
converted = False
return converted

@staticmethod
Expand All @@ -745,7 +819,8 @@ def _explain_why(type_ref: type, path: list[str], raw: Any) -> str:
type. This method is called by the `_unmarshal` and `_marshal` methods."""
if raw is None:
raw = "value is missing"
return f'{".".join(path)}: not a {type_ref.__name__}: {raw}'
type_name = getattr(type_ref, "__name__", str(type_ref))
return f'{".".join(path)}: not a {type_name}: {raw}'

@staticmethod
def _dump_json(as_dict: Json, _: type) -> bytes:
Expand Down
2 changes: 2 additions & 0 deletions src/databricks/labs/blueprint/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def __new__(cls, *args, **kwargs):
# Force all initialisation to go via __init__() irrespective of the (Python-specific) base version.
return object.__new__(cls)

# pylint: disable=super-init-not-called
def __init__(self, ws: WorkspaceClient, *args: str | bytes | os.PathLike) -> None:
# We deliberately do _not_ call the super initializer because we're taking over complete responsibility for the
# implementation of the public API.
Expand Down Expand Up @@ -398,6 +399,7 @@ def with_suffix(self: P, suffix: str) -> P:
raise ValueError(msg)
return self.with_name(stem + suffix)

# pylint: disable=arguments-differ
def relative_to(self: P, *other: str | bytes | os.PathLike, walk_up: bool = False) -> P:
normalized = self.with_segments(*other)
if self.anchor != normalized.anchor:
Expand Down
90 changes: 90 additions & 0 deletions tests/unit/test_installation.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,30 @@ class SampleClass:
assert loaded == saved


def test_generic_dict_object():
@dataclass
class SampleClass:
field: dict[str, object]

installation = MockInstallation()
saved = SampleClass(field={"a": ["x", "y"], "b": [], "c": 3, "d": True, "e": {"a": "b"}})
installation.save(saved, filename="backups/SampleClass.json")
loaded = installation.load(SampleClass, filename="backups/SampleClass.json")
assert loaded == saved


def test_generic_dict_any():
@dataclass
class SampleClass:
field: dict[str, typing.Any]

installation = MockInstallation()
saved = SampleClass(field={"a": ["x", "y"], "b": [], "c": 3, "d": True, "e": {"a": "b"}})
installation.save(saved, filename="backups/SampleClass.json")
loaded = installation.load(SampleClass, filename="backups/SampleClass.json")
assert loaded == saved


def test_generic_list_str() -> None:
@dataclass
class SampleClass:
Expand Down Expand Up @@ -548,3 +572,69 @@ class SampleClass:
installation.save(saved, filename="backups/SampleClass.json")
loaded = installation.load(SampleClass, filename="backups/SampleClass.json")
assert loaded == saved


def test_generic_list_object():
@dataclass
class SampleClass:
field: list[object]

installation = MockInstallation()
saved = SampleClass(field=[["x", "y"], [], 3, True, {"a": "b"}])
installation.save(saved, filename="backups/SampleClass.json")
loaded = installation.load(SampleClass, filename="backups/SampleClass.json")
assert loaded == saved


def test_generic_list_any():
@dataclass
class SampleClass:
field: list[typing.Any]

installation = MockInstallation()
saved = SampleClass(field=[["x", "y"], [], 3, True, {"a": "b"}])
installation.save(saved, filename="backups/SampleClass.json")
loaded = installation.load(SampleClass, filename="backups/SampleClass.json")
assert loaded == saved


def test_bool_in_union():
@dataclass
class SampleClass:
field: dict[str, bool | str]

installation = MockInstallation()
saved = SampleClass(field={"a": "b"})
installation.save(saved, filename="backups/SampleClass.json")
loaded = installation.load(SampleClass, filename="backups/SampleClass.json")
assert loaded == saved


JsonType: typing.TypeAlias = None | bool | int | float | str | list["JsonType"] | dict[str, "JsonType"]


def test_complex_union():
@dataclass
class SampleClass:
field: dict[str, JsonType]

installation = MockInstallation()
saved = SampleClass(field={"a": "b"})
installation.save(saved, filename="backups/SampleClass.json")
loaded = installation.load(SampleClass, filename="backups/SampleClass.json")
assert loaded == saved


JsonType2: typing.TypeAlias = dict[str, "JsonType2"] | list["JsonType2"] | str | float | int | bool | None


def test_complex_union2():
@dataclass
class SampleClass:
field: dict[str, JsonType2]

installation = MockInstallation()
saved = SampleClass(field={"a": "b"})
installation.save(saved, filename="backups/SampleClass.json")
loaded = installation.load(SampleClass, filename="backups/SampleClass.json")
assert loaded == saved
2 changes: 1 addition & 1 deletion tests/unit/test_installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_jobs_state():

state = InstallState(ws, "blueprint")

assert {"foo": "123"} == state.jobs
assert {"foo": 123} == state.jobs
assert {} == state.dashboards
ws.workspace.download.assert_called_with("/Users/foo/.blueprint/state.json")

Expand Down
Loading
Loading