-
Notifications
You must be signed in to change notification settings - Fork 0
(temporary) Creates 2 databases, uses one of them in json file creation. #23
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
0835ba4
022b008
48473a9
e598ff0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -75,4 +75,5 @@ dist-ssr | |
| *.ntvs* | ||
| *.njsproj | ||
| *.sln | ||
| *.sw? | ||
| *.sw? | ||
| backend/src/services/remote/blast_service/align2.py | ||
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -13,8 +13,9 @@ | |||||||||||
| from pathlib import Path | ||||||||||||
| from typing import Optional, List, Dict, Any | ||||||||||||
| from Bio.Blast import NCBIXML | ||||||||||||
| from services.remote.database_service.xml_to_db import xml_to_db | ||||||||||||
|
|
||||||||||||
| from src.shared.constants import ( | ||||||||||||
| from shared.constants import ( | ||||||||||||
| PROGRAM_STORAGE_DIR_SHARED_BLAST, | ||||||||||||
| PROGRAM_STORAGE_DIR_SHARED_DATA_FASTA, | ||||||||||||
| ) | ||||||||||||
|
Comment on lines
+18
to
21
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
|
|
@@ -152,6 +153,8 @@ def blast_cmdline( | |||||||||||
| logger.error("`blastn` command not found. Is BLAST+ installed and in PATH?") | ||||||||||||
| raise | ||||||||||||
|
|
||||||||||||
| xml_to_db(output_file) | ||||||||||||
|
|
||||||||||||
| return str(output_file) | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
|
|
||
| from .align import process_single_fasta | ||
| from .find import process_variants | ||
| from services.remote.database_service.json_to_db import json_to_db | ||
|
|
||
|
|
||
| class BlastService: | ||
|
|
@@ -30,12 +31,23 @@ def disease_extraction(self, fasta_file: str): | |
| try: | ||
| print(f"Processing file: {fasta_file}") | ||
| # Perform blast aligning | ||
| # +++++++++++++++++++++++++++++++++++++++++++++++++ | ||
| # Takes in fasta file and returns json file | ||
| result_file = process_single_fasta(fasta_file) | ||
|
|
||
| if not result_file: | ||
| raise Exception("Failed to perform blast aligning") | ||
|
|
||
|
|
||
|
|
||
| #++++++++++++++++++++++++++++++++++++++++++++++++ | ||
| # Takes in Json file and returns db file | ||
| db_file = json_to_db(result_file) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what for db_file is used? If you dont use for anything, dont apply for anything |
||
|
|
||
| if not db_file: | ||
| raise Exception("Failed to insert data into the database") | ||
|
|
||
|
|
||
| # +++++++++++++++++++++++++++++++++++++++++++++++++ | ||
| # Takes in Json file and returns csv file | ||
| disease_file = process_variants(result_file) | ||
|
|
||
| if not disease_file: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| from .xml_to_db import xml_to_db | ||
| from .json_to_db import json_to_db | ||
|
|
||
| __all__ = [ | ||
| "xml_to_db", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,132 @@ | ||
| import sqlite3 | ||
| import argparse | ||
| from pathlib import Path | ||
| import os | ||
| import logging | ||
| import json | ||
|
|
||
| # Configure logger | ||
| logging.basicConfig(level=logging.INFO) | ||
| logger = logging.getLogger(__name__) | ||
|
|
||
| # Always use this database file | ||
| BASE_OUTPUT_DIR = os.path.join(os.path.expanduser("~"), ".kath", "shared", "data", "blast_results") | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be from constants.py |
||
| DB_FILENAME = "json.db" | ||
| SQLITE_DB_FILE = os.path.join(BASE_OUTPUT_DIR, DB_FILENAME) | ||
|
|
||
| def create_database(sqlite_db_file: str): | ||
| """ | ||
| Create the SQLite database and the variants table if it doesn't exist. | ||
| """ | ||
| conn = sqlite3.connect(sqlite_db_file) | ||
| cursor = conn.cursor() | ||
| cursor.execute(""" | ||
| CREATE TABLE IF NOT EXISTS variants ( | ||
| id INTEGER PRIMARY KEY AUTOINCREMENT, | ||
| file_id INTEGER, | ||
| query_id TEXT, | ||
| subject_id TEXT, | ||
| chromosome TEXT, | ||
| position INTEGER, | ||
| variation_type TEXT, | ||
| reference_allele TEXT, | ||
| query_allele TEXT, | ||
| query_position INTEGER, | ||
| hsp_score REAL, | ||
| hsp_evalue REAL, | ||
| hsp_identity REAL, | ||
| hsp_align_length INTEGER, | ||
| hsp_query_start INTEGER, | ||
| hsp_subject_start INTEGER, | ||
| hsp_strand TEXT, | ||
| hsp_gaps INTEGER | ||
| ); | ||
| """) | ||
| conn.commit() | ||
| conn.close() | ||
|
|
||
| def get_next_file_id(sqlite_db_file: str) -> int: | ||
| """ | ||
| Get the next file_id to use for a new JSON file. | ||
| """ | ||
| conn = sqlite3.connect(sqlite_db_file) | ||
| cursor = conn.cursor() | ||
| cursor.execute("SELECT MAX(file_id) FROM variants") | ||
| result = cursor.fetchone() | ||
| conn.close() | ||
| if result and result[0]: | ||
| return result[0] + 1 | ||
| else: | ||
| return 1 | ||
|
|
||
| def json_to_db(json_file_path: str) -> str: | ||
| """ | ||
| Parse a JSON file and insert its data into a shared SQLite database. | ||
|
|
||
| Args: | ||
| json_file_path: Path to the JSON file. | ||
|
|
||
| Returns: | ||
| Path to the shared SQLite database. | ||
| """ | ||
| # Ensure the output directory exists | ||
| os.makedirs(BASE_OUTPUT_DIR, exist_ok=True) | ||
|
|
||
| # Always use the same database file | ||
| sqlite_db_file = SQLITE_DB_FILE | ||
|
|
||
| # Create the database if needed | ||
| create_database(sqlite_db_file) | ||
|
|
||
| # Determine the file_id for this JSON file | ||
| file_id = get_next_file_id(sqlite_db_file) | ||
|
|
||
| # Load the JSON file | ||
| with open(json_file_path, "r", encoding="utf-8") as f: | ||
| variant_list = json.load(f) | ||
|
|
||
| # Insert data into the database | ||
| conn = sqlite3.connect(sqlite_db_file) | ||
| cursor = conn.cursor() | ||
|
|
||
| for variant in variant_list: | ||
| cursor.execute(""" | ||
| INSERT INTO variants ( | ||
| file_id, query_id, subject_id, chromosome, position, variation_type, | ||
| reference_allele, query_allele, query_position, hsp_score, hsp_evalue, | ||
| hsp_identity, hsp_align_length, hsp_query_start, hsp_subject_start, | ||
| hsp_strand, hsp_gaps | ||
| ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) | ||
| """, ( | ||
| file_id, | ||
| variant.get("query_id"), | ||
| variant.get("subject_id"), | ||
| variant.get("chromosome"), | ||
| variant.get("position"), | ||
| variant.get("variation_type"), | ||
| variant.get("reference_allele"), | ||
| variant.get("query_allele"), | ||
| variant.get("query_position"), | ||
| variant.get("hsp_score"), | ||
| variant.get("hsp_evalue"), | ||
| variant.get("hsp_identity"), | ||
| variant.get("hsp_align_length"), | ||
| variant.get("hsp_query_start"), | ||
| variant.get("hsp_subject_start"), | ||
| json.dumps(variant.get("hsp_strand")), # store as JSON string | ||
| variant.get("hsp_gaps"), | ||
| )) | ||
|
|
||
| conn.commit() | ||
| conn.close() | ||
| logger.info(f"Inserted variant data from file_id {file_id} into the database: {sqlite_db_file}") | ||
|
|
||
| return sqlite_db_file | ||
|
|
||
| if __name__ == "__main__": | ||
| parser = argparse.ArgumentParser(description="Parse JSON and insert results into a shared SQLite database.") | ||
| parser.add_argument("--json", required=True, help="Path to JSON file") | ||
| args = parser.parse_args() | ||
|
|
||
| db_path = json_to_db(args.json) | ||
| print(f"Database updated at: {db_path}") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| if __name__ == "__main__": | ||
| print("Database service module executed directly.") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,135 @@ | ||
| import sqlite3 | ||
| import argparse | ||
| from pathlib import Path | ||
| import os | ||
| import logging | ||
| from xml.etree import ElementTree as ET | ||
|
|
||
| # Configure logger | ||
| logging.basicConfig(level=logging.INFO) | ||
| logger = logging.getLogger(__name__) | ||
|
|
||
| # Always use this database file | ||
| BASE_OUTPUT_DIR = os.path.join(os.path.expanduser("~"), ".kath", "shared", "data", "blast_results") | ||
| DB_FILENAME = "xml.db" | ||
| SQLITE_DB_FILE = os.path.join(BASE_OUTPUT_DIR, DB_FILENAME) | ||
|
Comment on lines
+13
to
+15
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. constants 😅 |
||
|
|
||
| def create_database(sqlite_db_file: str): | ||
| """ | ||
| Create the SQLite database and the hsps table if it doesn't exist. | ||
| """ | ||
| conn = sqlite3.connect(sqlite_db_file) | ||
| cursor = conn.cursor() | ||
|
|
||
| # Add file_id column | ||
| cursor.execute(""" | ||
| CREATE TABLE IF NOT EXISTS hsps ( | ||
| id INTEGER PRIMARY KEY AUTOINCREMENT, | ||
| file_id INTEGER, | ||
| hit_id INTEGER, | ||
| hsp_num INTEGER, | ||
| bit_score REAL, | ||
| score INTEGER, | ||
| evalue REAL, | ||
| query_from INTEGER, | ||
| query_to INTEGER, | ||
| hit_from INTEGER, | ||
| hit_to INTEGER, | ||
| query_frame INTEGER, | ||
| hit_frame INTEGER, | ||
| identity INTEGER, | ||
| positive INTEGER, | ||
| gaps INTEGER, | ||
| align_len INTEGER, | ||
| qseq TEXT, | ||
| hseq TEXT, | ||
| midline TEXT | ||
| ); | ||
| """) | ||
|
|
||
| conn.commit() | ||
| conn.close() | ||
|
|
||
| def get_next_file_id(sqlite_db_file: str) -> int: | ||
| """ | ||
| Get the next file_id to use for a new XML file. | ||
| """ | ||
| conn = sqlite3.connect(sqlite_db_file) | ||
| cursor = conn.cursor() | ||
| cursor.execute("SELECT MAX(file_id) FROM hsps") | ||
| result = cursor.fetchone() | ||
| conn.close() | ||
| if result and result[0]: | ||
| return result[0] + 1 | ||
| else: | ||
| return 1 | ||
|
|
||
| def xml_to_db(xml_file_path: str) -> str: | ||
| """ | ||
| Parse a BLAST XML file and insert its data into a shared SQLite database. | ||
|
|
||
| Args: | ||
| xml_file_path: Path to the BLAST XML file. | ||
|
|
||
| Returns: | ||
| Path to the shared SQLite database. | ||
| """ | ||
| # Ensure the output directory exists | ||
| os.makedirs(BASE_OUTPUT_DIR, exist_ok=True) | ||
|
|
||
| # Always use the same database file | ||
| sqlite_db_file = SQLITE_DB_FILE | ||
|
|
||
| # Create the database if needed | ||
| create_database(sqlite_db_file) | ||
|
|
||
| # Determine the file_id for this XML file | ||
| file_id = get_next_file_id(sqlite_db_file) | ||
|
|
||
| # Parse the XML file | ||
| tree = ET.parse(xml_file_path) | ||
| root = tree.getroot() | ||
|
|
||
| # Insert data into the database | ||
| conn = sqlite3.connect(sqlite_db_file) | ||
| cursor = conn.cursor() | ||
|
|
||
| for hsp in root.findall(".//Hsp"): | ||
| hsp_num = int(hsp.findtext("Hsp_num")) | ||
| bit_score = float(hsp.findtext("Hsp_bit-score")) | ||
| score = int(hsp.findtext("Hsp_score")) | ||
| evalue = float(hsp.findtext("Hsp_evalue")) | ||
| query_from = int(hsp.findtext("Hsp_query-from")) | ||
| query_to = int(hsp.findtext("Hsp_query-to")) | ||
| hit_from = int(hsp.findtext("Hsp_hit-from")) | ||
| hit_to = int(hsp.findtext("Hsp_hit-to")) | ||
| query_frame = int(hsp.findtext("Hsp_query-frame")) | ||
| hit_frame = int(hsp.findtext("Hsp_hit-frame")) | ||
| identity = int(hsp.findtext("Hsp_identity")) | ||
| positive = int(hsp.findtext("Hsp_positive")) | ||
| gaps = int(hsp.findtext("Hsp_gaps")) | ||
| align_len = int(hsp.findtext("Hsp_align-len")) | ||
| qseq = hsp.findtext("Hsp_qseq") | ||
| hseq = hsp.findtext("Hsp_hseq") | ||
| midline = hsp.findtext("Hsp_midline") | ||
|
|
||
| cursor.execute(""" | ||
| INSERT INTO hsps (file_id, hit_id, hsp_num, bit_score, score, evalue, query_from, query_to, hit_from, hit_to, | ||
| query_frame, hit_frame, identity, positive, gaps, align_len, qseq, hseq, midline) | ||
| VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) | ||
| """, (file_id, None, hsp_num, bit_score, score, evalue, query_from, query_to, hit_from, hit_to, | ||
| query_frame, hit_frame, identity, positive, gaps, align_len, qseq, hseq, midline)) | ||
|
|
||
| conn.commit() | ||
| conn.close() | ||
| logger.info(f"Inserted HSP data from file_id {file_id} into the database: {sqlite_db_file}") | ||
|
|
||
| return sqlite_db_file | ||
|
|
||
| if __name__ == "__main__": | ||
| parser = argparse.ArgumentParser(description="Parse BLAST XML and insert results into a shared SQLite database.") | ||
| parser.add_argument("--xml", required=True, help="Path to BLAST XML file") | ||
| args = parser.parse_args() | ||
|
|
||
| db_path = xml_to_db(args.xml) | ||
| print(f"Database updated at: {db_path}") | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -13,7 +13,7 @@ | |||||
| from Bio import Entrez | ||||||
| from dotenv import load_dotenv | ||||||
|
|
||||||
| from src.utils.logger import get_logger | ||||||
| from utils.logger import get_logger | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| from services.utils.script_setup import ( | ||||||
| EnvSetup, | ||||||
| FolderSetup, | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this here? Remove it