Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
bb5527d
WIP: reorg validation
msschwartz21 Jul 22, 2025
6074605
Specify mandatory int node id dtype in specification docs
msschwartz21 Jul 22, 2025
8bd1096
Validate that node ids are integers
msschwartz21 Jul 22, 2025
2df5987
Update test to reflect node dtype change
msschwartz21 Jul 22, 2025
7b3d01a
Move basic structural validation into a function that can be run on n…
msschwartz21 Jul 22, 2025
0e61b87
Add function for validating zarr structure with regards to axis metadata
msschwartz21 Jul 22, 2025
25ec2fa
Merge remote-tracking branch 'origin/main' into validator-framework
msschwartz21 Jul 23, 2025
b025edd
Mock up data validation function
msschwartz21 Jul 23, 2025
7e7914b
Update mock up for validation data/optional data api
msschwartz21 Jul 23, 2025
ce4c01a
Merge remote-tracking branch 'origin/main' into validator-framework
msschwartz21 Jul 23, 2025
7cd0ffe
Add edge/lineage/tracklet validation options
msschwartz21 Jul 23, 2025
885917e
Add in progress tests for new validate functions
msschwartz21 Jul 23, 2025
39d1427
WIP: Add shapes data validation
bentaculum Jul 23, 2025
7b771d3
Merge remote-tracking branch 'origin/main' into validator-framework
msschwartz21 Aug 13, 2025
b4dc731
Introduce data validate module
msschwartz21 Aug 13, 2025
77ce558
Fix typing problems post merge
msschwartz21 Aug 13, 2025
5adc83f
Fix tests
msschwartz21 Aug 13, 2025
be92065
Refactor shapes validation functions into stand alones
msschwartz21 Aug 13, 2025
7ddcc30
Merge remote-tracking branch 'origin/main' into validator-framework
msschwartz21 Aug 13, 2025
9b5a9e6
Test validate ellipsoid and update to handle arbitrary spatial dimens…
msschwartz21 Aug 14, 2025
04c5f85
Test sphere validation
msschwartz21 Aug 14, 2025
a7e1897
Make validate_axes_structure private
msschwartz21 Aug 23, 2025
fff4fdc
refactor: :construction: Refactor validation tests
cmalinmayor Aug 23, 2025
08e3937
Reorganize validate structure tests and remove some code that will be…
msschwartz21 Aug 23, 2025
b4e1b75
Use _path in validation tests
msschwartz21 Aug 23, 2025
5d362d6
Update validate node/edge/props subfunction tests to call the lower l…
msschwartz21 Aug 23, 2025
66dbfac
Refactor tests to be test classes instead of functions
msschwartz21 Aug 23, 2025
99f6538
Create separate testing file for validate.data
msschwartz21 Aug 23, 2025
aefc420
Test validate optional data
msschwartz21 Aug 23, 2025
3677b99
Expose new validation options in read functions
msschwartz21 Aug 23, 2025
a4d68ca
Add tests to improve coverage
msschwartz21 Aug 23, 2025
8bc9656
Validate unique node ids
msschwartz21 Aug 24, 2025
5ccfdce
Remove cast to tuple
msschwartz21 Aug 24, 2025
40584c5
Fix sad docstring
msschwartz21 Aug 24, 2025
5c22dc9
Consolidate all data validation into one function (instead of graph/o…
msschwartz21 Aug 24, 2025
f4bd40a
Fix benchmarking
msschwartz21 Aug 24, 2025
b6713c0
Node ids must be positive integers
msschwartz21 Aug 24, 2025
2eab0a5
Node ids must be unsigned int
msschwartz21 Aug 24, 2025
76cb9ee
Revert to just integer node ids for now, will enforce uint in another PR
msschwartz21 Aug 24, 2025
b8f2887
Merge branch 'main' into validator-framework
msschwartz21 Aug 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/specification.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ The optional `extra` object is a free-form dictionary that can hold any addition
## The `nodes` group
The nodes group will contain an `ids` array and optionally a `props` group.
### The `ids` array
The `nodes\ids` array is a 1D array of node IDs of length `N` >= 0, where `N` is the number of nodes in the graph. Node ids must be unique. Node IDs can have any type supported by zarr (except floats), but we recommend integer dtypes. For large graphs, `uint64` might be necessary to provide enough range for every node to have a unique ID. In the minimal case of an empty graph, the `ids` array will be present but empty.
The `nodes\ids` array is a 1D array of node IDs of length `N` >= 0, where `N` is the number of nodes in the graph. Node ids must be unique. Node IDs must have an unsigned integer dtype. For large graphs, `uint64` might be necessary to provide enough range for every node to have a unique ID. In the minimal case of an empty graph, the `ids` array will be present but empty.


### The `props` group and `node property` groups
Expand Down
2 changes: 1 addition & 1 deletion justfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ test-cov:

# run benchmarks
benchmark:
uv run --group bench pytest tests/bench.py
uv run --group bench pytest tests/test_bench.py

# build wheel and sdist
build:
Expand Down
21 changes: 15 additions & 6 deletions src/geff/_graph_libs/_api_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from geff._typing import PropDictNpArray
from geff.metadata._schema import GeffMetadata
from geff.validate.data import ValidationConfig

SupportedBackend = Literal["networkx", "rustworkx", "spatial-graph"]

Expand Down Expand Up @@ -107,41 +108,45 @@ def get_construct_func(backend: SupportedBackend) -> ConstructFunc[Any]:
@overload
def read(
store: StoreLike,
validate: bool = True,
structure_validation: bool = True,
node_props: list[str] | None = None,
edge_props: list[str] | None = None,
backend: Literal["networkx"] = "networkx",
data_validation: ValidationConfig | None = None,
) -> tuple[nx.Graph | nx.DiGraph, GeffMetadata]: ...


@overload
def read(
store: StoreLike,
validate: bool,
structure_validation: bool,
node_props: list[str] | None,
edge_props: list[str] | None,
backend: Literal["rustworkx"],
data_validation: ValidationConfig | None = None,
) -> tuple[rx.PyGraph | rx.PyDiGraph, GeffMetadata]: ...


@overload
def read(
store: StoreLike,
validate: bool,
structure_validation: bool,
node_props: list[str] | None,
edge_props: list[str] | None,
backend: Literal["spatial-graph"],
data_validation: ValidationConfig | None = None,
*,
position_attr: str = "position",
) -> tuple[sg.SpatialGraph | sg.SpatialDiGraph, GeffMetadata]: ...


def read(
store: StoreLike,
validate: bool = True,
structure_validation: bool = True,
node_props: list[str] | None = None,
edge_props: list[str] | None = None,
backend: SupportedBackend = "networkx",
data_validation: ValidationConfig | None = None,
**backend_kwargs: Any,
) -> tuple[Any, GeffMetadata]:
"""
Expand All @@ -150,7 +155,7 @@ def read(
Args:
store (StoreLike): The path or zarr store to the root of the geff zarr, where
the .attrs contains the geff metadata.
validate (bool, optional): Flag indicating whether to perform validation on the
structure_validation (bool, optional): Flag indicating whether to perform validation on the
geff file before loading into memory. If set to False and there are
format issues, will likely fail with a cryptic error. Defaults to True.
node_props (list of str, optional): The names of the node properties to load,
Expand All @@ -159,14 +164,18 @@ def read(
if None all properties will be loaded, defaults to None.
backend ({"networkx", "rustworkx", "spatial-graph"}): Flag for the chosen backend, default
is "networkx".
data_validation (ValidationConfig, optional): Optional configuration for which
optional types of data to validate. Each option defaults to False.
backend_kwargs (Any): Additional kwargs that may be accepted by
the backend when reading the data.

Returns:
tuple[Any, GeffMetadata]: Graph object of the chosen backend, and the GEFF metadata.
"""
construct_func = get_construct_func(backend)
in_memory_geff = read_to_memory(store, validate, node_props, edge_props)
in_memory_geff = read_to_memory(
store, structure_validation, node_props, edge_props, data_validation
)
return (
construct_func(**in_memory_geff, **backend_kwargs),
in_memory_geff["metadata"],
Expand Down
12 changes: 9 additions & 3 deletions src/geff/_graph_libs/_networkx.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from zarr.storage import StoreLike

from geff._typing import PropDictNpArray
from geff.validate.data import ValidationConfig

import logging

Expand Down Expand Up @@ -190,28 +191,33 @@ def construct_nx(

def read_nx(
store: StoreLike,
validate: bool = True,
structure_validation: bool = True,
node_props: list[str] | None = None,
edge_props: list[str] | None = None,
data_validation: ValidationConfig | None = None,
) -> tuple[nx.Graph, GeffMetadata]:
"""Read a geff file into a networkx graph. Metadata properties will be stored in
the graph properties, accessed via `G.graph[key]` where G is a networkx graph.

Args:
store (str | Path | zarr store): The path/str to the geff zarr, or the store
itself. Opens in append mode, so will only overwrite geff-controlled groups.
validate (bool, optional): Flag indicating whether to perform validation on the
structure_validation (bool, optional): Flag indicating whether to perform validation on the
geff file before loading into memory. If set to False and there are
format issues, will likely fail with a cryptic error. Defaults to True.
node_props (list of str, optional): The names of the node properties to load,
if None all properties will be loaded, defaults to None.
edge_props (list of str, optional): The names of the edge properties to load,
if None all properties will be loaded, defaults to None.
data_validation (ValidationConfig, optional): Optional configuration for which
optional types of data to validate. Each option defaults to false.

Returns:
A networkx graph containing the graph that was stored in the geff file format
"""
in_memory_geff = read_to_memory(store, validate, node_props, edge_props)
in_memory_geff = read_to_memory(
store, structure_validation, node_props, edge_props, data_validation
)
graph = construct_nx(**in_memory_geff)

return graph, in_memory_geff["metadata"]
12 changes: 9 additions & 3 deletions src/geff/_graph_libs/_rustworkx.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from zarr.storage import StoreLike

from geff._typing import PropDictNpArray
from geff.validate.data import ValidationConfig


def get_roi_rx(
Expand Down Expand Up @@ -250,9 +251,10 @@ def construct_rx(

def read_rx(
store: StoreLike,
validate: bool = True,
structure_validation: bool = True,
node_props: list[str] | None = None,
edge_props: list[str] | None = None,
data_validation: ValidationConfig | None = None,
) -> tuple[rx.PyGraph | rx.PyDiGraph, GeffMetadata]:
"""Read a geff file into a rustworkx graph.
Metadata properties will be stored in the graph.attrs dict
Expand All @@ -264,16 +266,20 @@ def read_rx(

Args:
store: The path/str to the geff zarr, or the store itself.
validate: Whether to validate the geff file.
structure_validation: Whether to validate the geff file.
node_props: The names of the node properties to load,
if None all properties will be loaded, defaults to None.
edge_props: The names of the edge properties to load,
if None all properties will be loaded, defaults to None.
data_validation (ValidationConfig, optional): Optional configuration for which
optional types of data to validate. Each option defaults to False.

Returns:
A tuple containing the rustworkx graph and the metadata.
"""
graph_dict = read_to_memory(store, validate, node_props, edge_props)
graph_dict = read_to_memory(
store, structure_validation, node_props, edge_props, data_validation
)
graph = construct_rx(**graph_dict)

return graph, graph_dict["metadata"]
22 changes: 9 additions & 13 deletions src/geff/_graph_libs/_spatial_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from zarr.storage import StoreLike

from geff._typing import PropDictNpArray
from geff.validate.data import ValidationConfig

import geff
from geff.core_io import write_arrays
Expand Down Expand Up @@ -116,10 +117,11 @@ def write_sg(

def read_sg(
store: StoreLike,
validate: bool = True,
structure_validation: bool = True,
position_attr: str = "position",
node_props: list[str] | None = None,
edge_props: list[str] | None = None,
data_validation: ValidationConfig | None = None,
) -> tuple[sg.SpatialGraph | sg.SpatialDiGraph, GeffMetadata]:
"""Read a geff file into a SpatialGraph.

Expand All @@ -129,37 +131,31 @@ def read_sg(
Args:

store (Path | str | zarr store):

The path to the root of the geff zarr, where the .attrs contains
the geff metadata.

validate (bool, optional):

structure_validation (bool, optional):
Flag indicating whether to perform validation on the geff file
before loading into memory. If set to False and there are format
issues, will likely fail with a cryptic error. Defaults to True.

position_attr (str, optional):

How to call the position attribute in the returned SpatialGraph.
Defaults to "position".

node_props (list of str, optional):

The names of the node properties to load, if None all properties
will be loaded, defaults to None.

edge_props (list of str, optional):

The names of the edge properties to load, if None all properties
will be loaded, defaults to None.
data_validation (ValidationConfig, optional): Optional configuration for which
optional types of data to validate. Each option defaults to False.

Returns:

A tuple containing the spatial_graph graph and the metadata.
"""

in_memory_geff = read_to_memory(store, validate, node_props, edge_props)
in_memory_geff = read_to_memory(
store, structure_validation, node_props, edge_props, data_validation
)
graph = construct_sg(**in_memory_geff, position_attr=position_attr)

return graph, in_memory_geff["metadata"]
Expand Down
18 changes: 13 additions & 5 deletions src/geff/core_io/_base_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from geff import _path
from geff.core_io import _utils
from geff.metadata._schema import GeffMetadata
from geff.validate.data import ValidationConfig, validate_data
from geff.validate.structure import validate_structure

if TYPE_CHECKING:
Expand Down Expand Up @@ -214,9 +215,10 @@ def build(
# added to this function to select between them.
def read_to_memory(
source: StoreLike,
validate: bool = True,
structure_validation: bool = True,
node_props: Iterable[str] | None = None,
edge_props: Iterable[str] | None = None,
data_validation: ValidationConfig | None = None,
) -> InMemoryGeff:
"""
Read a GEFF zarr file to into memory as a series of numpy arrays in a dictionary.
Expand All @@ -227,9 +229,11 @@ def read_to_memory(
Args:
source (str | Path | zarr store): Either a path to the root of the geff zarr
(where the .attrs contains the geff metadata), or a zarr store object
validate (bool, optional): Flag indicating whether to perform validation on the
geff file before loading into memory. If set to False and there are
format issues, will likely fail with a cryptic error. Defaults to True.
structure_validation (bool, optional): Flag indicating whether to perform metadata/structure
validation on the geff file before loading into memory. If set to False and
there are format issues, will likely fail with a cryptic error. Defaults to True.
data_validation (ValidationConfig, optional): Optional configuration for which
optional types of data to validate. Each option defaults to False.
node_props (iterable of str, optional): The names of the node properties to load,
if None all properties will be loaded, defaults to None.
edge_props (iterable of str, optional): The names of the edge properties to load,
Expand All @@ -240,10 +244,14 @@ def read_to_memory(
(metadata, node_ids, edge_ids, node_props, edge_props)
"""

file_reader = GeffReader(source, validate)
file_reader = GeffReader(source, structure_validation)

file_reader.read_node_props(node_props)
file_reader.read_edge_props(edge_props)

in_memory_geff = file_reader.build()

if data_validation is not None:
validate_data(config=data_validation, memory_geff=in_memory_geff)

return in_memory_geff
9 changes: 5 additions & 4 deletions src/geff/testing/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@
if TYPE_CHECKING:
from numpy.typing import NDArray


DTypeStr = Literal["double", "int", "int8", "uint8", "int16", "uint16", "float32", "float64", "str"]
NodeIdDTypeStr = Literal["int", "int8", "uint8", "int16", "uint16"]
NodeIdDTypeStr = Literal["uint", "uint8", "uint16", "uint32", "uint64"]
Axes = Literal["t", "z", "y", "x"]


Expand Down Expand Up @@ -509,7 +510,7 @@ def create_simple_2d_geff(
>>> # graph is a networkx Graph with 2D spatial data (x, y, t)
"""
return create_memory_mock_geff(
node_id_dtype="int",
node_id_dtype="uint",
node_axis_dtypes={"position": "float64", "time": "float64"},
directed=directed,
num_nodes=num_nodes,
Expand Down Expand Up @@ -568,7 +569,7 @@ def create_simple_3d_geff(
>>> x, y, z, t = node_data['x'], node_data['y'], node_data['z'], node_data['t']
"""
return create_memory_mock_geff(
node_id_dtype="int",
node_id_dtype="uint",
node_axis_dtypes={"position": "float64", "time": "float64"},
directed=directed,
num_nodes=num_nodes,
Expand Down Expand Up @@ -616,7 +617,7 @@ def create_simple_temporal_geff(
>>> # Each node has only 't' coordinate, no x, y, z
"""
return create_memory_mock_geff(
node_id_dtype="int",
node_id_dtype="uint",
node_axis_dtypes={"position": "float64", "time": "float64"},
directed=directed,
num_nodes=num_nodes,
Expand Down
Loading
Loading