diff --git a/README.md b/README.md index 9eaee1f..e40318b 100644 --- a/README.md +++ b/README.md @@ -27,3 +27,22 @@ that indicate how well a particular entry is performing in relation with the res Results are stored under /home/bioinfo/services/emv/data/emdbs/emd-***** The JSON files emd-__emv_.json files are automatically and regularl retrieved from Campins (also from Rinchen-dos for dev/testing purposes). This allow to populate the corresponding WebServices (BWS) that will allow to show the _Validation and Quality_ tracks in 3DBionotes. +## Notes + +### Chimera + +For some PDBs, we got an error on the calculation of Q-score. The problem is at `~/.local/UCSF-Chimera64-1.14/share/mapq/qscores.py`, line 1786: + +``` +if at.isBB : +``` + +Some entries do not have this property, so we need to check the property existence first: + +``` +if hasattr(at, 'isBB') and at.isBB: +``` + +Note: This folder `mapq` is not part of the UCSF Chimera core, but an external plugin (mapq): + +https://github.com/gregdp/mapq/blob/v1.6/mapq_chimera/qscores.py#L1905 diff --git a/tools/calc_updates_batch.py b/tools/calc_updates_batch.py index 144d110..fd67095 100644 --- a/tools/calc_updates_batch.py +++ b/tools/calc_updates_batch.py @@ -70,9 +70,11 @@ def get_queue_cmmd_dependant(map_id, pdb_id, parent_job_id, method, script_name) """ get queue command dependant """ + # Use --kill-on-invalid-dep so the job is cancelled when its parent INIT job fails. cmmd = f"sbatch \ --job-name={METHOD_INITIALS[method]}-{map_id} \ --dependency=afterok:{parent_job_id} \ + --kill-on-invalid-dep=yes \ --output=/home/bioinfo/services/emv/logs/%j_%x.out \ --error=/home/bioinfo/services/emv/logs/%j_%x.err \ {os.path.join(TOOLS_PATH, script_name)} {map_id} {pdb_id}" diff --git a/tools/script_emv_setup.py b/tools/script_emv_setup.py index b48d7ef..62c0a3a 100644 --- a/tools/script_emv_setup.py +++ b/tools/script_emv_setup.py @@ -12,9 +12,9 @@ import requests -EMDB_EBI_REPOSITORY = "https://ftp.ebi.ac.uk/pub/databases/emdb/structures/" -EMDB_WWPDB_REPOSITORY = "https://ftp.wwpdb.org/pub/emdb/structures/" -EMDB_RCSB_REPOSITORY = "https://ftp.rcsb.org/pub/emdb/structures/" +EMDB_EBI_REPOSITORY = "http://ftp.ebi.ac.uk/pub/databases/emdb/structures/" +EMDB_WWPDB_REPOSITORY = "https://ftp.wwpdb.org/pub/emdb/structures/" # not used +EMDB_RCSB_REPOSITORY = "https://ftp.rcsb.org/pub/emdb/structures/" # not used EMDB_EBI_JSON_REPOSITORY = "https://www.ebi.ac.uk/emdb/api/entry/" EMDB_FTP_SERVER = "ftp.ebi.ac.uk" EMDB_FTP_DIR = "pub/databases/emdb/structures/%s/other" diff --git a/tools/script_execute_init.py b/tools/script_execute_init.py index 8ec1b2b..bb4b985 100755 --- a/tools/script_execute_init.py +++ b/tools/script_execute_init.py @@ -45,8 +45,8 @@ # Download EMDB metadata JSON file resolution, sampling, size, org_x, org_y, org_z = get_parameters(mapId, workpath) - # Download half-maps if available - hmap1, hmap2 = download_emdb_half_maps(mapId, workpath) + # Download half-maps if available (EST: call disabled, hmap1/hmap2 not used) + # hmap1, hmap2 = download_emdb_half_maps(mapId, workpath) # Download PDB model file inPdb = download_pdb_model(pdbId, workpath) diff --git a/tools/script_execute_mapq.py b/tools/script_execute_mapq.py index 113f4f0..a53d17d 100755 --- a/tools/script_execute_mapq.py +++ b/tools/script_execute_mapq.py @@ -18,16 +18,20 @@ def execute_mapq(map_filename, pdb, dir_path): """compute MapQ""" + mapq_path = os.path.join(dir_path, map_filename) + pdb_path = os.path.join(dir_path, pdb) os_command = f"python3 {TOOLS_PATH}mapq_chimera/mapq_cmd.py \ - /home/bioinfo/.local/UCSF-Chimera64-1.14/ {dir_path}/{map_filename} {dir_path}/{pdb} np=8" + /home/bioinfo/.local/UCSF-Chimera64-1.14/ {mapq_path} {pdb_path} np=8" print("--> Map-Q command:", os_command) os.system(os_command) def convert_mapq_to_aapdb(mapq, aa_pdb, dir_path): """Save to aa.PDB file""" + mapq_path = os.path.join(dir_path, mapq) + pdb_path = os.path.join(dir_path, aa_pdb) os_command = f"python3 {TOOLS_PATH}convert_mapQvol_to_pdb.py \ - {dir_path}/{mapq} {dir_path}/{aa_pdb}" + {mapq_path} {pdb_path}" os.system(os_command) diff --git a/update/getNewPDBEntries.py b/update/getNewPDBEntries.py index 06e6524..0d5648a 100644 --- a/update/getNewPDBEntries.py +++ b/update/getNewPDBEntries.py @@ -4,7 +4,6 @@ import csv import requests from datetime import datetime, timedelta -from rcsbsearch import rcsb_attributes as attrs, TextQuery WS_URL = "https://3dbionotes.cnb.csic.es/api/mappings/PDB/EMDB/" DAYS_INTERVAL = 7 @@ -13,25 +12,31 @@ FN_EM_ENTRIES = "_new-all_entries_em.txt" FN_EMDB_PDB_ENTRIES = "_new-all_entries_mappings.csv" +def to_iso(dt): + """Convert a datetime object to ISO format string.""" + return dt.replace(microsecond=0).isoformat() + "Z" -def getNewPDBEntries(d1, interval, withEM=False): +def getNewPDBEntries(end_date, interval_days, withEM=False): + """Get new PDB entries released within the specified interval from the end date.""" - iso1 = d1.replace(microsecond=0).isoformat() + start_date = end_date - timedelta(days=interval_days) - d0 = d1 - timedelta(days=interval) - iso0 = d0.replace(microsecond=0).isoformat() + # See https://www.ebi.ac.uk/pdbe/api/doc/search.html + params = { + "q": " AND ".join([ + f"release_date:[{to_iso(start_date)} TO {to_iso(end_date)}]", + # "deposition_site:PDBE", # "PDBE" | "PDBJ" | "RCSB" + f"emdb_id:[* TO *]" if withEM else "-emdb_id:[* TO *]" + ]), + "rows": 10_000, + "fl": "pdb_id", + } - q2 = attrs.rcsb_accession_info.initial_release_date >= str(iso0+'Z') - q3 = attrs.rcsb_accession_info.initial_release_date <= str(iso1+'Z') - q4 = attrs.rcsb_entry_info.experimental_method != "EM" - q5 = attrs.rcsb_entry_info.experimental_method == "EM" - query = q2 & q3 - if withEM: - query = query & q5 - else: - query = query & q4 + response = requests.get("https://www.ebi.ac.uk/pdbe/search/pdb/select", params=params) + response.raise_for_status() + data = response.json() - return list(query()) + return [doc["pdb_id"] for doc in data["response"]["docs"]] def save2file(list, filename):