Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion python_api/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,6 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
#.idea/

output/
12 changes: 11 additions & 1 deletion python_api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,18 @@
This server is responsible for doing the protein prediction calculation. This server uses another port and the express.js server acts as a main server that will call the APIs in the flask server

## Setup

1. Install Conda
2. Open up terminal and go to the python_api directory
3. Do `conda env create -f environment.yml` to create the conda environment
4. Activate the conda environment
5. Do `python app.py` to run the server
5. Do `python app.py` to run the server

## Adding new species

When a new species is added, we have to generate a new go_protein.pickle file for the protein function prediction to work.
This pickle file represents the network which include go terms and their edge to proteins (inferred and direct). we can generate this file if we have the exported csv files for the three edge types from neo4j, proGo, proPro, and reg, and use the generate_pickles.py file. we can use the following example to generate the files. make sure that you execute this command inside of the python_api folder. we will use the ecoli network as an example.

```
python3 generate_pickles.py -p networks/ecoli_proPro.csv -r networks/ecoli_reg.csv -g networks/ecoli_proGo.csv
```
77 changes: 38 additions & 39 deletions python_api/environment.yml
Original file line number Diff line number Diff line change
@@ -1,43 +1,42 @@
name: protein-weaver
channels:
- conda-forge
- defaults
- https://conda.anaconda.org/gurobi
- https://repo.anaconda.com/pkgs/main
- https://repo.anaconda.com/pkgs/r
dependencies:
- blinker=1.8.2=pyhd8ed1ab_0
- bzip2=1.0.8=hfdf4475_7
- ca-certificates=2024.7.4=h8857fd0_0
- click=8.1.7=unix_pyh707e725_0
- flask=3.0.3=pyhd8ed1ab_0
- importlib-metadata=8.0.0=pyha770c72_0
- itsdangerous=2.2.0=pyhd8ed1ab_0
- jinja2=3.1.4=pyhd8ed1ab_0
- libblas=3.9.0=22_osx64_openblas
- libcblas=3.9.0=22_osx64_openblas
- libcxx=18.1.8=hef8daea_0
- libexpat=2.6.2=h73e2aa4_0
- libffi=3.4.2=h0d85af4_5
- libgfortran=5.0.0=13_2_0_h97931a8_3
- libgfortran5=13.2.0=h2873a65_3
- liblapack=3.9.0=22_osx64_openblas
- libopenblas=0.3.27=openmp_h8869122_1
- libsqlite=3.46.0=h1b8f9f3_0
- libzlib=1.3.1=h87427d6_1
- llvm-openmp=18.1.8=h15ab845_0
- markupsafe=2.1.5=py312h41838bb_0
- ncurses=6.5=h5846eda_0
- networkx=3.3=pyhd8ed1ab_1
- numpy=2.0.0=py312h8813227_0
- openssl=3.3.1=h87427d6_2
- pip=24.0=pyhd8ed1ab_0
- python=3.12.4=h37a9e06_0_cpython
- python_abi=3.12=4_cp312
- readline=8.2=h9e318b2_1
- scipy=1.14.0=py312hb9702fa_1
- setuptools=71.0.4=pyhd8ed1ab_0
- tk=8.6.13=h1abcd95_1
- tzdata=2024a=h0c530f3_0
- werkzeug=3.0.3=pyhd8ed1ab_0
- wheel=0.43.0=pyhd8ed1ab_1
- xz=5.2.6=h775f41a_0
- zipp=3.19.2=pyhd8ed1ab_0
- blinker=1.9.0
- bzip2=1.0.8
- ca-certificates=2025.7.14
- click=8.2.1
- flask=3.1.1
- importlib-metadata=8.7.0
- itsdangerous=2.2.0
- jinja2=3.1.6
- libblas=3.9.0
- libcblas=3.9.0
- libcxx=20.1.7
- libexpat=2.7.0
- libffi=3.4.6
- libgfortran=5.0.0
- libgfortran5=14.2.0
- liblapack=3.9.0
- liblzma=5.8.1
- libmpdec=4.0.0
- libopenblas=0.3.30
- libsqlite=3.50.2
- libzlib=1.3.1
- llvm-openmp=20.1.7
- markupsafe=3.0.2
- ncurses=6.5
- networkx=3.5
- numpy=2.3.1
- openssl=3.5.1
- pip=25.1.1
- python=3.13.5
- python_abi=3.13
- readline=8.2
- scipy=1.16.0
- tk=8.6.13
- tzdata=2025b
- werkzeug=3.1.3
- zipp=3.23.0
161 changes: 161 additions & 0 deletions python_api/generate_pickles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import pickle
import networkx as nx
import os
import argparse


def create_go_protein_only_network(
ppi_network_file, regulatory_network_file, go_network_file
):
i = 1
G = nx.DiGraph()
protein_go_edge = 0
protein_node = 0
go_node = 0
protein_list = []
go_term_list = []

# only add nodes and not their ppi or regulatory edges
for line in ppi_network_file:
if not G.has_node(line[0]):
G.add_node(line[0], name=line[0], type="protein")
protein_list.append({"id": line[0], "name": line[0]})
protein_node += 1

if not G.has_node(line[1]):
G.add_node(line[1], name=line[1], type="protein")
protein_list.append({"id": line[1], "name": line[1]})
protein_node += 1

for line in regulatory_network_file:
if not G.has_node(line[0]):
G.add_node(line[0], name=line[0], type="protein")
protein_list.append({"id": line[0], "name": line[0]})
protein_node += 1

if not G.has_node(line[1]):
G.add_node(line[1], name=line[1], type="protein")
protein_list.append({"id": line[1], "name": line[1]})
protein_node += 1

# Proteins annotated with a GO term have an edge to a GO term node
for line in go_network_file:
if not G.has_node(line[1]):
G.add_node(line[1], type="go_term")
go_term_list.append(line[1]) #
go_node += 1

if not G.has_node(line[0]):
if line[0] == "FBgn0069446":
print("found FBgn0069446")
G.add_node(line[0], name=line[0], type="protein")
protein_list.append({"id": line[0], "name": line[0]})
protein_node += 1

G.add_edge(line[0], line[1], type="protein_go_term")
G.add_edge(line[1], line[0], type="protein_go_term")
protein_go_edge += 1
i += 1

print("")
print("")
print("ProGO edge only network summary")

print("protein-go edge count: ", protein_go_edge)
print("protein node count: ", protein_node)
print("go node count: ", go_node)
print("total edge count: ", len(G.edges()))
print("total node count: ", len(G.nodes()))

return G


def read_specific_columns(file_path, delimit):
try:
with open(file_path, "r") as file:
next(file)
data = []
for line in file:
parts = line.strip().split(delimit)
filtered_columns = []
for col in parts:
filtered_columns.append(col.replace('"', ""))
data.append(filtered_columns)
# print(filtered_columns)
return data
except FileNotFoundError:
print(f"Error: File '{file_path}' not found.")
return None
except Exception as e:
print(f"An error occurred: {e}")
return None


def read_pro_go_data(file_path, columns, delimit):
try:
with open(file_path, "r") as file:
next(file)
data = []
for line in file:
parts = line.strip().split(delimit)
selected_columns = []
for col in columns:
selected_columns.append(parts[col].replace('"', ""))
data.append(selected_columns)
return data
except FileNotFoundError:
print(f"Error: File '{file_path}' not found.")
return None
except Exception as e:
print(f"An error occurred: {e}")
return None


def get_network_pickle(ppi_network_file, regulatory_network_file, go_network_file):

ppi_data = read_specific_columns(ppi_network_file, ",")
reg_data = read_specific_columns(regulatory_network_file, ",")
go_cols = [0, 2] # only want the protein and go term
go_data = read_pro_go_data(go_network_file, go_cols, ",")

G = create_go_protein_only_network(ppi_data, reg_data, go_data)
output_dir = "./output"
pickle_file = "go_protein.pickle"

try:
os.makedirs(output_dir, exist_ok=True)
except OSError as e:
print(f"Error creating directory '{output_dir}': {e}")

with open(f"{output_dir}/{pickle_file}", "wb") as f:
pickle.dump(G, f)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Generate network pickle files from network files for protein function prediction"
)
parser.add_argument(
"-p",
"--ppi",
type=str,
help="Path to the PPI edges input file.",
required=True,
)
parser.add_argument(
"-r",
"--regulatory",
type=str,
help="Path to the Reg edges input file.",
required=True,
)
parser.add_argument(
"-g",
"--go",
type=str,
help="Path to the GO network file.",
required=True,
)

args = parser.parse_args()
get_network_pickle(args.ppi, args.regulatory, args.go)
1 change: 1 addition & 0 deletions python_api/helper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pickle
import numpy as np


def import_graph_from_pickle(filename):
with open(filename, "rb") as f:
return pickle.load(f)
Expand Down
Loading