Skip to content

Commit 941bf15

Browse files
authored
Merge pull request #198 from neo4j/fix-gds-sampling-setup
Fix bug in heterogenous gds
2 parents 323f194 + 22c7dc9 commit 941bf15

File tree

4 files changed

+132
-153
lines changed

4 files changed

+132
-153
lines changed

changelog.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
## Bug fixes
1111

12+
* Fixed a bug with `from_gds` where graphs with different relationship types would fail if they had different properties.
13+
1214

1315
## Improvements
1416

examples/gds-example.ipynb

Lines changed: 82 additions & 117 deletions
Large diffs are not rendered by default.

python-wrapper/src/neo4j_viz/gds.py

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import pandas as pd
99
from graphdatascience import Graph, GraphDataScience
10-
from pandas import Series
1110

1211
from .pandas import _from_dfs
1312
from .visualization_graph import VisualizationGraph
@@ -24,22 +23,25 @@ def _fetch_node_dfs(
2423
}
2524

2625

27-
def _fetch_rel_df(gds: GraphDataScience, G: Graph) -> pd.DataFrame:
28-
relationship_properties = G.relationship_properties()
29-
assert isinstance(relationship_properties, Series)
26+
def _fetch_rel_dfs(gds: GraphDataScience, G: Graph) -> list[pd.DataFrame]:
27+
rel_types = G.relationship_types()
3028

31-
relationship_properties_per_type = relationship_properties.tolist()
32-
property_set: set[str] = set()
33-
for props in relationship_properties_per_type:
34-
if props:
35-
property_set.update(props)
29+
rel_props = {rel_type: G.relationship_properties(rel_type) for rel_type in rel_types}
3630

37-
if len(property_set) > 0:
38-
return gds.graph.relationshipProperties.stream(
39-
G, relationship_properties=list(property_set), separate_property_columns=True
40-
)
31+
rel_dfs: list[pd.DataFrame] = []
32+
# Have to call per stream per relationship type as there was a bug in GDS < 2.21
33+
for rel_type, props in rel_props.items():
34+
assert isinstance(props, list)
35+
if len(props) > 0:
36+
rel_df = gds.graph.relationshipProperties.stream(
37+
G, relationship_types=rel_type, relationship_properties=list(props), separate_property_columns=True
38+
)
39+
else:
40+
rel_df = gds.graph.relationships.stream(G, relationship_types=[rel_type])
41+
42+
rel_dfs.append(rel_df)
4143

42-
return gds.graph.relationships.stream(G)
44+
return rel_dfs
4345

4446

4547
def from_gds(
@@ -131,7 +133,7 @@ def from_gds(
131133
for df in node_dfs.values():
132134
df.drop(columns=[property_name], inplace=True)
133135

134-
rel_df = _fetch_rel_df(gds, G_fetched)
136+
rel_dfs = _fetch_rel_dfs(gds, G_fetched)
135137
finally:
136138
if G_fetched.name() != G.name():
137139
G_fetched.drop()
@@ -161,12 +163,13 @@ def from_gds(
161163
if "caption" not in all_actual_node_properties:
162164
node_df["caption"] = node_df["labels"].astype(str)
163165

164-
if "caption" not in rel_df.columns:
165-
rel_df["caption"] = rel_df["relationshipType"]
166+
for rel_df in rel_dfs:
167+
if "caption" not in rel_df.columns:
168+
rel_df["caption"] = rel_df["relationshipType"]
166169

167170
try:
168171
return _from_dfs(
169-
node_df, rel_df, node_radius_min_max=node_radius_min_max, rename_properties={"__size": "size"}, dropna=True
172+
node_df, rel_dfs, node_radius_min_max=node_radius_min_max, rename_properties={"__size": "size"}, dropna=True
170173
)
171174
except ValueError as e:
172175
err_msg = str(e)

python-wrapper/tests/test_gds.py

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -163,13 +163,15 @@ def test_from_gds_mocked(mocker: MockerFixture) -> None:
163163
}
164164
),
165165
}
166-
rels = pd.DataFrame(
167-
{
168-
"sourceNodeId": [0, 1, 2],
169-
"targetNodeId": [1, 2, 0],
170-
"relationshipType": ["REL", "REL2", "REL"],
171-
}
172-
)
166+
rels = [
167+
pd.DataFrame(
168+
{
169+
"sourceNodeId": [0, 1, 2],
170+
"targetNodeId": [1, 2, 0],
171+
"relationshipType": ["REL", "REL2", "REL"],
172+
}
173+
)
174+
]
173175

174176
mocker.patch(
175177
"graphdatascience.Graph.__init__",
@@ -188,7 +190,7 @@ def test_from_gds_mocked(mocker: MockerFixture) -> None:
188190
mocker.patch("graphdatascience.Graph.node_count", lambda x: sum(len(df) for df in nodes.values()))
189191
mocker.patch("graphdatascience.GraphDataScience.__init__", lambda x: None)
190192
mocker.patch("neo4j_viz.gds._fetch_node_dfs", return_value=nodes)
191-
mocker.patch("neo4j_viz.gds._fetch_rel_df", return_value=rels)
193+
mocker.patch("neo4j_viz.gds._fetch_rel_dfs", return_value=rels)
192194

193195
gds = GraphDataScience() # type: ignore[call-arg]
194196
G = Graph() # type: ignore[call-arg]
@@ -303,16 +305,24 @@ def test_from_gds_hetero(gds: Any) -> None:
303305
# No 'component' property
304306
}
305307
)
306-
rels = pd.DataFrame(
308+
X_rels = pd.DataFrame(
309+
{
310+
"sourceNodeId": [1],
311+
"targetNodeId": [3],
312+
"weight": [1.5],
313+
"relationshipType": ["X"],
314+
}
315+
)
316+
Y_rels = pd.DataFrame(
307317
{
308-
"sourceNodeId": [0, 1],
309-
"targetNodeId": [2, 3],
310-
"weight": [0.5, 1.5],
311-
"relationshipType": ["REL", "REL2"],
318+
"sourceNodeId": [0],
319+
"targetNodeId": [2],
320+
"score": [1],
321+
"relationshipType": ["Y"],
312322
}
313323
)
314324

315-
with gds.graph.construct("flo", [A_nodes, B_nodes], rels) as G:
325+
with gds.graph.construct("flo", [A_nodes, B_nodes], [X_rels, Y_rels]) as G:
316326
VG = from_gds(
317327
gds,
318328
G,
@@ -333,14 +343,13 @@ def test_from_gds_hetero(gds: Any) -> None:
333343
e.source,
334344
e.target,
335345
e.caption,
336-
e.properties["relationshipType"],
337-
e.properties["weight"],
346+
e.properties,
338347
)
339348
for e in VG.relationships
340349
],
341350
key=lambda x: x[0],
342351
)
343352
assert vg_rels == [
344-
(0, 2, "REL", "REL", 0.5),
345-
(1, 3, "REL2", "REL2", 1.5),
353+
(0, 2, "Y", {"relationshipType": "Y", "score": 1.0}),
354+
(1, 3, "X", {"relationshipType": "X", "weight": 1.5}),
346355
]

0 commit comments

Comments
 (0)