-
Notifications
You must be signed in to change notification settings - Fork 10
Improve framework for validation #204
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 31 commits
bb5527d
6074605
8bd1096
2df5987
7b3d01a
0e61b87
25ec2fa
b025edd
7e7914b
ce4c01a
7cd0ffe
885917e
39d1427
7b771d3
b4dc731
77ce558
5adc83f
be92065
7ddcc30
9b5a9e6
04c5f85
a7e1897
fff4fdc
08e3937
b4e1b75
5d362d6
66dbfac
99f6538
aefc420
3677b99
a4d68ca
8bc9656
5ccfdce
40584c5
5c22dc9
f4bd40a
b6713c0
2eab0a5
76cb9ee
b8f2887
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,86 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from typing import TYPE_CHECKING | ||
|
|
||
| from pydantic import BaseModel | ||
|
|
||
| from geff.validate.graph import ( | ||
| validate_no_repeated_edges, | ||
| validate_no_self_edges, | ||
| validate_nodes_for_edges, | ||
| ) | ||
| from geff.validate.shapes import validate_ellipsoid, validate_sphere | ||
| from geff.validate.tracks import ( | ||
| validate_lineages, | ||
| validate_tracklets, | ||
| ) | ||
|
|
||
| if TYPE_CHECKING: | ||
| from geff._typing import InMemoryGeff | ||
|
|
||
|
|
||
| def validate_zarr_data(memory_geff: InMemoryGeff) -> None: | ||
| """Checks whether the graph meets spec requirements | ||
| Args: | ||
| memory_geff (InMemoryGeff): An InMemoryGeff object which contains metadata and | ||
| dictionaries of node/edge property arrays | ||
| """ | ||
| node_ids = memory_geff["node_ids"] | ||
msschwartz21 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| edge_ids = memory_geff["edge_ids"] | ||
|
|
||
| valid, invalid_edges = validate_nodes_for_edges(node_ids, edge_ids) | ||
| if not valid: | ||
| raise ValueError(f"Some edges are missing nodes:\n{invalid_edges}") | ||
|
|
||
| valid, invalid_edges = validate_no_self_edges(edge_ids) | ||
| if not valid: | ||
| raise ValueError(f"Self edges found in data:\n{invalid_edges}") | ||
|
|
||
| valid, invalid_edges = validate_no_repeated_edges(edge_ids) | ||
| if not valid: | ||
| raise ValueError(f"Repeated edges found in data:\n{invalid_edges}") | ||
|
|
||
|
|
||
| class ValidationConfig(BaseModel): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thoughts on adding nodes/edges to this config and then only passing one argument to the read/write functions? I assume you considered it and rejected, and I don't have a strong opinion, but bringing it up just in case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ben and I had created a separation in our brains between the graph (as mandatory data) and optional attribute data. We hadn't considered putting the graph into the validation config, but I like it. I'll make that change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As discussed elsewhere, could consider rolling the structure into the same argument but up to you There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I decided to leave structure out, but consolidated all the data once into the validation config |
||
| sphere: bool = False | ||
| ellipsoid: bool = False | ||
| lineage: bool = False | ||
| tracklet: bool = False | ||
|
|
||
|
|
||
| def validate_optional_data(config: ValidationConfig, memory_geff: InMemoryGeff) -> None: | ||
| """Run data validation on optional data types based on the input | ||
| Args: | ||
| config (ValidationConfig): Configuration for which validation to run | ||
| memory_geff (InMemoryGeff): A graphdict object which contains metadata and | ||
| dictionaries of node/edge property arrays | ||
| """ | ||
| meta = memory_geff["metadata"] | ||
| if config.sphere and meta.sphere is not None: | ||
| radius = memory_geff["node_props"][meta.sphere]["values"] | ||
| validate_sphere(radius) | ||
|
|
||
| if config.ellipsoid and meta.ellipsoid is not None: | ||
| covariance = memory_geff["node_props"][meta.ellipsoid]["values"] | ||
| validate_ellipsoid(covariance, memory_geff["metadata"].axes) | ||
|
|
||
| if meta.track_node_props is not None: | ||
| if config.tracklet and "tracklet" in meta.track_node_props: | ||
| node_ids = memory_geff["node_ids"] | ||
| edge_ids = memory_geff["edge_ids"] | ||
| tracklet_key = meta.track_node_props["tracklet"] | ||
| tracklet_ids = memory_geff["node_props"][tracklet_key]["values"] | ||
| valid, errors = validate_tracklets(node_ids, edge_ids, tracklet_ids) | ||
| if not valid: | ||
| raise ValueError("Found invalid tracklets:\n", "\n".join(errors)) | ||
|
|
||
| if config.lineage and "lineage" in meta.track_node_props: | ||
| node_ids = memory_geff["node_ids"] | ||
| edge_ids = memory_geff["edge_ids"] | ||
| lineage_key = meta.track_node_props["lineage"] | ||
| lineage_ids = memory_geff["node_props"][lineage_key]["values"] | ||
| valid, errors = validate_lineages(node_ids, edge_ids, lineage_ids) | ||
| if not valid: | ||
| raise ValueError("Found invalid lineages:\n", "\n".join(errors)) | ||
Uh oh!
There was an error while loading. Please reload this page.