Skip to content
Merged
37 changes: 35 additions & 2 deletions spras/analysis/summary.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pathlib import Path
from statistics import median
from typing import Iterable

import networkx as nx
Expand Down Expand Up @@ -50,8 +51,40 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame, alg
number_edges = nw.number_of_edges()
ncc = nx.number_connected_components(nw)

# Save the max/median degree, average clustering coefficient, and density
if number_nodes == 0:
max_degree = 0
median_degree = 0.0
density = 0.0
else:
degrees = [deg for _, deg in nw.degree()]
max_degree = max(degrees)
median_degree = median(degrees)
density = nx.density(nw)

cc = list(nx.connected_components(nw))
# Save the max diameter
# Use diameter only for components with ≥2 nodes (singleton components have diameter 0)
diameters = [
nx.diameter(nw.subgraph(c).copy()) if len(c) > 1 else 0
for c in cc
]
max_diameter = max(diameters, default=0)

# Save the average path lengths
# Compute average shortest path length only for components with ≥2 nodes (undefined for singletons, set to 0.0)
avg_path_lengths = [
nx.average_shortest_path_length(nw.subgraph(c).copy()) if len(c) > 1 else 0.0
for c in cc
]

if len(avg_path_lengths) != 0:
avg_path_len = sum(avg_path_lengths) / len(avg_path_lengths)
else:
avg_path_len = 0.0

# Initialize list to store current network information
cur_nw_info = [nw_name, number_nodes, number_edges, ncc]
cur_nw_info = [nw_name, number_nodes, number_edges, ncc, density, max_degree, median_degree, max_diameter, avg_path_len]

# Iterate through each node property and save the intersection with the current network
for node_list in nodes_by_col:
Expand All @@ -71,7 +104,7 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame, alg
nw_info.append(cur_nw_info)

# Prepare column names
col_names = ['Name', 'Number of nodes', 'Number of edges', 'Number of connected components']
col_names = ['Name', 'Number of nodes', 'Number of edges', 'Number of connected components', 'Density', 'Max degree', 'Median degree', 'Max diameter', 'Average path length']
col_names.extend(nodes_by_col_labs)
col_names.append('Parameter combination')

Expand Down
10 changes: 10 additions & 0 deletions test/analysis/expected_output/expected_egfr_summary.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Name Number of nodes Number of edges Number of connected components Density Max degree Median degree Max diameter Average path length Nodes in prize Nodes in sources Nodes in targets Nodes in active Nodes in dummy Parameter combination
test/analysis/input/egfr/tps-egfr-domino-params-V3X4RW7_pathway.txt 48 45 3 0.0398936170212766 5 2.0 16 3.882808476926124 27 0 27 27 0 {'slice_threshold': 0.3, 'module_threshold': 0.05}
test/analysis/input/egfr/tps-egfr-meo-params-GKEDDFZ_pathway.txt 1877 12845 1 0.007295700506524384 469 6.0 6 2.7973618474338107 621 1 620 621 1 {'local_search': 'Yes', 'max_path_length': 3, 'rand_restarts': 10}
test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt 28 20 8 0.05291005291005291 4 1.0 5 1.306439393939394 28 1 27 28 1 {'b': 2, 'd': 10, 'g': '1e-3', 'r': 0.01, 'w': 0.1, 'mu': 0.008, 'dummy_mode': 'file'}
test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt 39 31 8 0.04183535762483131 6 1.0 5 1.5084498834498834 39 1 38 39 1 {'b': 10, 'd': 10, 'g': '1e-3', 'r': 0.01, 'w': 0.1, 'mu': 0.008, 'dummy_mode': 'file'}
test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt 14 9 5 0.0989010989010989 4 1.0 2 1.1866666666666668 14 0 14 14 0 {'b': 0.55, 'd': 10, 'g': '1e-3', 'r': 0.01, 'w': 0.1, 'mu': 0.008, 'dummy_mode': 'file'}
test/analysis/input/egfr/tps-egfr-omicsintegrator2-params-EHHWPMD_pathway.txt 593 591 2 0.0033669841848593955 32 1.0 30 6.72248989073389 531 1 530 531 1 {'b': 2, 'g': 3}
test/analysis/input/egfr/tps-egfr-omicsintegrator2-params-IV3IPCJ_pathway.txt 704 702 2 0.002836867968446916 35 1.0 24 6.038766691954387 616 1 615 616 1 {'b': 4, 'g': 0}
test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt 14 17 1 0.18681318681318682 6 2.0 7 2.857142857142857 6 1 5 6 1 {'k': 10}
test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt 25 32 1 0.10666666666666667 8 2.0 7 3.486666666666667 11 1 10 11 1 {'k': 20}
13 changes: 13 additions & 0 deletions test/analysis/expected_output/expected_example_summary.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Name Number of nodes Number of edges Number of connected components Density Max degree Median degree Max diameter Average path length Nodes in prize Nodes in active Nodes in dummy Nodes in sources Nodes in targets Parameter combination
test/analysis/input/example/data0-allpairs-params-BEH6YB2_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'spras_placeholder': 'no parameters'}
test/analysis/input/example/data0-domino-params-V3X4RW7_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'slice_threshold': 0.3, 'module_threshold': 0.05}
test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'max_path_length': 3, 'local_search': 'Yes', 'rand_restarts': 10}
test/analysis/input/example/data0-mincostflow-params-SZPZVU6_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'flow': 1, 'capacity': 1}
test/analysis/input/example/data0-omicsintegrator1-params-E3LSEZQ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'b': 6, 'w': 5.0, 'd': 10, 'dummy_mode': 'file'}
test/analysis/input/example/data0-omicsintegrator1-params-NFIPHUX_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'b': 6, 'w': 0.0, 'd': 10, 'dummy_mode': 'file'}
test/analysis/input/example/data0-omicsintegrator1-params-SU2S63Y_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'b': 5, 'w': 0.0, 'd': 10, 'dummy_mode': 'file'}
test/analysis/input/example/data0-omicsintegrator1-params-V26JBGX_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'b': 5, 'w': 5.0, 'd': 10, 'dummy_mode': 'file'}
test/analysis/input/example/data0-omicsintegrator2-params-EHHWPMD_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'b': 2, 'g': 3}
test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'b': 4, 'g': 0}
test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'k': 200}
test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'k': 100}
10 changes: 0 additions & 10 deletions test/analysis/expected_output/test_egfr_summary.txt

This file was deleted.

13 changes: 0 additions & 13 deletions test/analysis/expected_output/test_example_summary.txt

This file was deleted.

4 changes: 2 additions & 2 deletions test/analysis/test_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def test_example_networks(self):

# Comparing the dataframes directly with equals does not match because of how the parameter
# combinations column is loaded from disk. Therefore, write both to disk and compare the files.
assert filecmp.cmp(out_path, EXPECT_DIR + "test_example_summary.txt", shallow=False)
assert filecmp.cmp(out_path, EXPECT_DIR + "expected_example_summary.txt", shallow=False)

def test_egfr_networks(self):
"""Test data from EGFR workflow"""
Expand Down Expand Up @@ -80,7 +80,7 @@ def test_egfr_networks(self):

# Comparing the dataframes directly with equals does not match because of how the parameter
# combinations column is loaded from disk. Therefore, write both to disk and compare the files.
assert filecmp.cmp(out_path, EXPECT_DIR + "test_egfr_summary.txt", shallow=False)
assert filecmp.cmp(out_path, EXPECT_DIR + "expected_egfr_summary.txt", shallow=False)

def test_load_dataset_dict(self):
"""Test loading files from dataset_dict"""
Expand Down
Loading