Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,5 @@ dist-ssr
*.ntvs*
*.njsproj
*.sln
*.sw?
*.sw?
backend/src/services/remote/blast_service/align2.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this here? Remove it

5 changes: 4 additions & 1 deletion backend/src/services/remote/blast_service/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
from pathlib import Path
from typing import Optional, List, Dict, Any
from Bio.Blast import NCBIXML
from services.remote.database_service.xml_to_db import xml_to_db

from src.shared.constants import (
from shared.constants import (
PROGRAM_STORAGE_DIR_SHARED_BLAST,
PROGRAM_STORAGE_DIR_SHARED_DATA_FASTA,
)
Comment on lines +18 to 21
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
from shared.constants import (
PROGRAM_STORAGE_DIR_SHARED_BLAST,
PROGRAM_STORAGE_DIR_SHARED_DATA_FASTA,
)
from shared import PROGRAM_STORAGE_DIR_SHARED_BLAST, PROGRAM_STORAGE_DIR_SHARED_DATA_FASTA

Expand Down Expand Up @@ -152,6 +153,8 @@ def blast_cmdline(
logger.error("`blastn` command not found. Is BLAST+ installed and in PATH?")
raise

xml_to_db(output_file)

return str(output_file)


Expand Down
18 changes: 15 additions & 3 deletions backend/src/services/remote/blast_service/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from .align import process_single_fasta
from .find import process_variants
from services.remote.database_service.json_to_db import json_to_db


class BlastService:
Expand All @@ -30,12 +31,23 @@ def disease_extraction(self, fasta_file: str):
try:
print(f"Processing file: {fasta_file}")
# Perform blast aligning
# +++++++++++++++++++++++++++++++++++++++++++++++++
# Takes in fasta file and returns json file
result_file = process_single_fasta(fasta_file)

if not result_file:
raise Exception("Failed to perform blast aligning")



#++++++++++++++++++++++++++++++++++++++++++++++++
# Takes in Json file and returns db file
db_file = json_to_db(result_file)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what for db_file is used? If you dont use for anything, dont apply for anything


if not db_file:
raise Exception("Failed to insert data into the database")


# +++++++++++++++++++++++++++++++++++++++++++++++++
# Takes in Json file and returns csv file
disease_file = process_variants(result_file)

if not disease_file:
Expand Down
6 changes: 6 additions & 0 deletions backend/src/services/remote/database_service/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .xml_to_db import xml_to_db
from .json_to_db import json_to_db

__all__ = [
"xml_to_db",
]
132 changes: 132 additions & 0 deletions backend/src/services/remote/database_service/json_to_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import sqlite3
import argparse
from pathlib import Path
import os
import logging
import json

# Configure logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Always use this database file
BASE_OUTPUT_DIR = os.path.join(os.path.expanduser("~"), ".kath", "shared", "data", "blast_results")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be from constants.py

DB_FILENAME = "json.db"
SQLITE_DB_FILE = os.path.join(BASE_OUTPUT_DIR, DB_FILENAME)

def create_database(sqlite_db_file: str):
"""
Create the SQLite database and the variants table if it doesn't exist.
"""
conn = sqlite3.connect(sqlite_db_file)
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS variants (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_id INTEGER,
query_id TEXT,
subject_id TEXT,
chromosome TEXT,
position INTEGER,
variation_type TEXT,
reference_allele TEXT,
query_allele TEXT,
query_position INTEGER,
hsp_score REAL,
hsp_evalue REAL,
hsp_identity REAL,
hsp_align_length INTEGER,
hsp_query_start INTEGER,
hsp_subject_start INTEGER,
hsp_strand TEXT,
hsp_gaps INTEGER
);
""")
conn.commit()
conn.close()

def get_next_file_id(sqlite_db_file: str) -> int:
"""
Get the next file_id to use for a new JSON file.
"""
conn = sqlite3.connect(sqlite_db_file)
cursor = conn.cursor()
cursor.execute("SELECT MAX(file_id) FROM variants")
result = cursor.fetchone()
conn.close()
if result and result[0]:
return result[0] + 1
else:
return 1

def json_to_db(json_file_path: str) -> str:
"""
Parse a JSON file and insert its data into a shared SQLite database.

Args:
json_file_path: Path to the JSON file.

Returns:
Path to the shared SQLite database.
"""
# Ensure the output directory exists
os.makedirs(BASE_OUTPUT_DIR, exist_ok=True)

# Always use the same database file
sqlite_db_file = SQLITE_DB_FILE

# Create the database if needed
create_database(sqlite_db_file)

# Determine the file_id for this JSON file
file_id = get_next_file_id(sqlite_db_file)

# Load the JSON file
with open(json_file_path, "r", encoding="utf-8") as f:
variant_list = json.load(f)

# Insert data into the database
conn = sqlite3.connect(sqlite_db_file)
cursor = conn.cursor()

for variant in variant_list:
cursor.execute("""
INSERT INTO variants (
file_id, query_id, subject_id, chromosome, position, variation_type,
reference_allele, query_allele, query_position, hsp_score, hsp_evalue,
hsp_identity, hsp_align_length, hsp_query_start, hsp_subject_start,
hsp_strand, hsp_gaps
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
file_id,
variant.get("query_id"),
variant.get("subject_id"),
variant.get("chromosome"),
variant.get("position"),
variant.get("variation_type"),
variant.get("reference_allele"),
variant.get("query_allele"),
variant.get("query_position"),
variant.get("hsp_score"),
variant.get("hsp_evalue"),
variant.get("hsp_identity"),
variant.get("hsp_align_length"),
variant.get("hsp_query_start"),
variant.get("hsp_subject_start"),
json.dumps(variant.get("hsp_strand")), # store as JSON string
variant.get("hsp_gaps"),
))

conn.commit()
conn.close()
logger.info(f"Inserted variant data from file_id {file_id} into the database: {sqlite_db_file}")

return sqlite_db_file

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Parse JSON and insert results into a shared SQLite database.")
parser.add_argument("--json", required=True, help="Path to JSON file")
args = parser.parse_args()

db_path = json_to_db(args.json)
print(f"Database updated at: {db_path}")
2 changes: 2 additions & 0 deletions backend/src/services/remote/database_service/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
if __name__ == "__main__":
print("Database service module executed directly.")
135 changes: 135 additions & 0 deletions backend/src/services/remote/database_service/xml_to_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import sqlite3
import argparse
from pathlib import Path
import os
import logging
from xml.etree import ElementTree as ET

# Configure logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Always use this database file
BASE_OUTPUT_DIR = os.path.join(os.path.expanduser("~"), ".kath", "shared", "data", "blast_results")
DB_FILENAME = "xml.db"
SQLITE_DB_FILE = os.path.join(BASE_OUTPUT_DIR, DB_FILENAME)
Comment on lines +13 to +15
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

constants 😅


def create_database(sqlite_db_file: str):
"""
Create the SQLite database and the hsps table if it doesn't exist.
"""
conn = sqlite3.connect(sqlite_db_file)
cursor = conn.cursor()

# Add file_id column
cursor.execute("""
CREATE TABLE IF NOT EXISTS hsps (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_id INTEGER,
hit_id INTEGER,
hsp_num INTEGER,
bit_score REAL,
score INTEGER,
evalue REAL,
query_from INTEGER,
query_to INTEGER,
hit_from INTEGER,
hit_to INTEGER,
query_frame INTEGER,
hit_frame INTEGER,
identity INTEGER,
positive INTEGER,
gaps INTEGER,
align_len INTEGER,
qseq TEXT,
hseq TEXT,
midline TEXT
);
""")

conn.commit()
conn.close()

def get_next_file_id(sqlite_db_file: str) -> int:
"""
Get the next file_id to use for a new XML file.
"""
conn = sqlite3.connect(sqlite_db_file)
cursor = conn.cursor()
cursor.execute("SELECT MAX(file_id) FROM hsps")
result = cursor.fetchone()
conn.close()
if result and result[0]:
return result[0] + 1
else:
return 1

def xml_to_db(xml_file_path: str) -> str:
"""
Parse a BLAST XML file and insert its data into a shared SQLite database.

Args:
xml_file_path: Path to the BLAST XML file.

Returns:
Path to the shared SQLite database.
"""
# Ensure the output directory exists
os.makedirs(BASE_OUTPUT_DIR, exist_ok=True)

# Always use the same database file
sqlite_db_file = SQLITE_DB_FILE

# Create the database if needed
create_database(sqlite_db_file)

# Determine the file_id for this XML file
file_id = get_next_file_id(sqlite_db_file)

# Parse the XML file
tree = ET.parse(xml_file_path)
root = tree.getroot()

# Insert data into the database
conn = sqlite3.connect(sqlite_db_file)
cursor = conn.cursor()

for hsp in root.findall(".//Hsp"):
hsp_num = int(hsp.findtext("Hsp_num"))
bit_score = float(hsp.findtext("Hsp_bit-score"))
score = int(hsp.findtext("Hsp_score"))
evalue = float(hsp.findtext("Hsp_evalue"))
query_from = int(hsp.findtext("Hsp_query-from"))
query_to = int(hsp.findtext("Hsp_query-to"))
hit_from = int(hsp.findtext("Hsp_hit-from"))
hit_to = int(hsp.findtext("Hsp_hit-to"))
query_frame = int(hsp.findtext("Hsp_query-frame"))
hit_frame = int(hsp.findtext("Hsp_hit-frame"))
identity = int(hsp.findtext("Hsp_identity"))
positive = int(hsp.findtext("Hsp_positive"))
gaps = int(hsp.findtext("Hsp_gaps"))
align_len = int(hsp.findtext("Hsp_align-len"))
qseq = hsp.findtext("Hsp_qseq")
hseq = hsp.findtext("Hsp_hseq")
midline = hsp.findtext("Hsp_midline")

cursor.execute("""
INSERT INTO hsps (file_id, hit_id, hsp_num, bit_score, score, evalue, query_from, query_to, hit_from, hit_to,
query_frame, hit_frame, identity, positive, gaps, align_len, qseq, hseq, midline)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (file_id, None, hsp_num, bit_score, score, evalue, query_from, query_to, hit_from, hit_to,
query_frame, hit_frame, identity, positive, gaps, align_len, qseq, hseq, midline))

conn.commit()
conn.close()
logger.info(f"Inserted HSP data from file_id {file_id} into the database: {sqlite_db_file}")

return sqlite_db_file

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Parse BLAST XML and insert results into a shared SQLite database.")
parser.add_argument("--xml", required=True, help="Path to BLAST XML file")
args = parser.parse_args()

db_path = xml_to_db(args.xml)
print(f"Database updated at: {db_path}")
2 changes: 1 addition & 1 deletion backend/src/services/remote/fasta_service/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from Bio import Entrez
from dotenv import load_dotenv

from src.utils.logger import get_logger
from utils.logger import get_logger
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
from utils.logger import get_logger
from utils import get_logger

from services.utils.script_setup import (
EnvSetup,
FolderSetup,
Expand Down
2 changes: 1 addition & 1 deletion backend/src/services/utils/script_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
from pathlib import Path
from Bio import Entrez
from src.shared.constants import *
from shared.constants import *

from utils.logger import get_logger

Expand Down
Loading
Loading