From 9982ef09378214d22dad619b22da114d8e9d296b Mon Sep 17 00:00:00 2001 From: Ben Pedigo Date: Mon, 11 Mar 2024 13:28:37 -0700 Subject: [PATCH] add k hop neighborhood --- networkframe/__init__.py | 4 ++-- networkframe/networkframe.py | 46 ++++++++++++++++++++++++++++++++++++ tests/test_networkframe.py | 7 ++++++ 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/networkframe/__init__.py b/networkframe/__init__.py index 7018d12..6c5c0e1 100644 --- a/networkframe/__init__.py +++ b/networkframe/__init__.py @@ -1,9 +1,9 @@ """Top-level package for networkframe.""" -import pkg_resources +from importlib.metadata import version from .networkframe import LocIndexer, NetworkFrame, NodeGroupBy __all__ = ["NetworkFrame", "NodeGroupBy", "LocIndexer"] -__version__ = pkg_resources.get_distribution("networkframe").version +__version__ = version("networkframe") diff --git a/networkframe/networkframe.py b/networkframe/networkframe.py index 7529a3a..1ba7424 100644 --- a/networkframe/networkframe.py +++ b/networkframe/networkframe.py @@ -1064,6 +1064,52 @@ def node_agreement(self, other: Self) -> float: """ return self.nodes.index.isin(other.nodes.index).mean() + def k_hop_neighborhood( + self, node_id: Union[int, str], k: int, directed: bool = False, method="power" + ): + """ + Return the k-hop neighborhood of a node. + + Parameters + ---------- + node_id + The node ID to use to select the k-hop neighborhood. + k + The number of hops to consider. + directed + Whether to consider the network as directed for computing the reachable + nodes. + method + The method to use to compute the k-hop neighborhood. Currently only "power" is + supported. Dijkstra's algorithm may be supported in the future. + + Returns + ------- + : + A new NetworkFrame with only the k-hop neighborhood of the given node. + """ + if k < 0: + raise ValueError("k must be non-negative.") + if k == 0: + return self.query_nodes("index == @node_id", local_dict=locals()) + + sparse_adjacency = self.to_sparse_adjacency() + if not directed: + sparse_adjacency = sparse_adjacency.maximum(sparse_adjacency.T) + sparse_adjacency = (sparse_adjacency > 0).astype(bool) + power_adjacency = sparse_adjacency + for _ in range(k - 1): + power_adjacency = power_adjacency @ sparse_adjacency + index = self.nodes.index.get_loc(node_id) + row_mask = power_adjacency[[index], :] + nonzero_indices = row_mask.nonzero() + targets = nonzero_indices[1] + select_indices = self.nodes.index[targets].to_list() + if node_id not in select_indices: + select_indices.append(node_id) + select_indices + return self.query_nodes("index in @select_indices", local_dict=locals()) + class LocIndexer: """A class for indexing a NetworkFrame using .loc.""" diff --git a/tests/test_networkframe.py b/tests/test_networkframe.py index 72de32d..8fd549d 100644 --- a/tests/test_networkframe.py +++ b/tests/test_networkframe.py @@ -21,6 +21,7 @@ def simple_networkframe(): ) nodes.set_index("name", inplace=True) + # A -> B, A -> C, B -> C, C -> D edges = pd.DataFrame( { "source": ["A", "A", "B", "C"], @@ -28,6 +29,7 @@ def simple_networkframe(): "weight": [1, 2, 3, 4], } ) + return NetworkFrame(nodes, edges) @@ -63,3 +65,8 @@ def test_query_nodes(simple_networkframe): def test_query_edges(simple_networkframe): query_networkframe = simple_networkframe.query_edges("weight > 2") assert len(query_networkframe.edges) == 2 + +def test_k_hop_neighborhood(simple_networkframe): + assert len(simple_networkframe.k_hop_neighborhood("A", 0)) == 1 + assert len(simple_networkframe.k_hop_neighborhood("A", 1)) == 3 + assert len(simple_networkframe.k_hop_neighborhood("A", 2)) == 4