Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions libraries/client/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Scripts for getting lists of repositories on OSBv2 and associated databases

These scripts can be used to get a cached list of the current repositories on OSBv2 and v2dev, and also keep them up to date with the contents of DANDI, ModelDB, BioModels etc.

0) A GitHub access token must be created so scripts using the python github package can be run.

Save in `github.auth` locally.

1) Update current cached lists:

```
./info_all.sh -q # Runs a quick check of contents of OSBv1, OSBv2, OSBv2dev, OSB repos on Github & DANDI Archive.
./info_all.sh # Same as above, but with BioModles & ModelDB
```

Contents will be saved in JSON files in `cached_info/`

2) Update an OSBv1 project.

If there is a new OSBv1 project, which hasn't been added to v2/v2dev, run:

```
python loadosbv1.py -v2dev -dry # this do a dry run and print info on which projects/repos it still needs to add
```

Get an access token by logging in to v2dev, opening the Web Developer console, loading a page, copying the network access token (e.g. abcxxx123) and using this to add the repo via the api:

```
python loadosbv1.py abcxxx123 -v2dev # add new repos

python osb_info.py -v2dev # regenerate cached list of all repos
```

Then do the same using -v2 for OSBv2.
105 changes: 40 additions & 65 deletions libraries/client/biomodels_info.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,38 @@
"""
Script to get Biomodels project info
Script to get BioModels project info
"""

import requests
import json
import pprint
from loadbiomodels import get_model_identifiers, get_model_info

verbose = True #
verbose = False

info_all = {}

API_URL: str = "https://www.ebi.ac.uk/biomodels"
out_format = "json"


def get_model_identifiers():
response = requests.get(API_URL + "/model/identifiers?format=" + out_format)
response.raise_for_status()
output = response.json()
return output


def get_model_info(model_id):
response = requests.get(API_URL + "/" + model_id + "?format=" + out_format)
response.raise_for_status()
output = response.json()
return output


if __name__ == "__main__":
min_index = 0
max_index = 20
index = 0

from loadbiomodels import get_model_identifiers
max_index = 10000
index = 1

model_ids = get_model_identifiers()["models"]

Expand All @@ -29,73 +44,33 @@
% (index, len(selection), index + min_index, model_id)
)

model_link = f"[{model_id}](https://www.ebi.ac.uk/biomodels/{model_id})"
info = get_model_info(model_id)
model_name = info["name"]
print(f" {model_id}: \n {pprint.pformat(info['name'])}--")
model_url = f"https://www.ebi.ac.uk/biomodels/{model_id}"
model_link = f"[{model_id}]({model_url})"
try:
info = get_model_info(model_id)
if info["curationStatus"] != "CURATED":
print(
" **** Not adding, as curationStatus = %s"
% info["curationStatus"]
)
else:
model_name = info["name"]
print(f" {model_id}: \n {pprint.pformat(info['name'])}--")

info_all[model_id] = info
"""
son.loads(get_page('https://modeldb.science/api/v1/models/%s'%model))
info_all[model_id] = info
except Exception as e:
msg = f"Error retrieving model at {model_url}: {e}"

print(' %s'%info[model]['name'])
if 'gitrepo' in info[model] and info[model]['gitrepo']:
with_gitrepo+=1
print(' gitrepo: %s'%info[model]['gitrepo'])
else:
print(' gitrepo: %s'%False)
print(" ******* %s" % msg)

expected_forks = 0
possible_mdb_repo = 'ModelDBRepository/%s'%(info[model]['id'])
try:
mdb_repo = gh.get_repo(possible_mdb_repo)

repo_to_use = mdb_repo
print(' Exists at: %s (def branch: %s; forks: %i)'%(mdb_repo.html_url, mdb_repo.default_branch, mdb_repo.forks))

possible_osbgh_repo = 'OpenSourceBrain/%s'%(info[model]['id'])
try:
osb_repo = gh.get_repo(possible_osbgh_repo)
msg = ' Exists at: %s (def branch: %s; forks: %i), order %i'%(osb_repo.html_url, osb_repo.default_branch, osb_repo.forks, index+min_index)
on_osbv2.append(msg)
print(msg)
repo_to_use = osb_repo
expected_forks+=1

info[model]['osbv2_gh_repo'] = repo_to_use.html_url
info[model]['osbv2_gh_branch'] = repo_to_use.default_branch
except:
print(' Missing fork: %s, forking now: %s'%(possible_osbgh_repo, fork_if_missing))
if fork_if_missing:
print(' Forking to: %s...'%possible_osbgh_repo)
org = gh.get_organization('OpenSourceBrain')
org.create_fork(mdb_repo,default_branch_only=False)
msg = ' Forked to: %s...'%possible_osbgh_repo
print(msg)
forked_now.append(msg)

else:
msg = ' Yet to be forked: %i, order %i; %s'%(info[model]['id'], index+min_index,info[model]['name'])
print(msg)
to_be_forked.append(msg)


if (not mdb_repo.forks==expected_forks) and (not (info[model]['id'] in known_to_have_other_forks)):
msg = ' Unexpected forks for %i (%s != %s)...'%(info[model]['id'], mdb_repo.forks,expected_forks)
print(msg)
many_forks.append(msg)

except:
msg = ' Problem locating repo for: %i (%i/%i) %s'%(info[model]['id'],index, len(selection), info[model]['name'])
print(msg)
errors.append(msg)"""
info_all[model_id] = {"error": msg}

index += 1

if verbose:
infop = pprint.pprint(info_all, compact=True)

print("\nThere were %i models checked\n" % (len(info)))
print("\nThere were %i models checked\n" % (len(info_all)))

filename = "cached_info/biomodels.json"

Expand All @@ -104,4 +79,4 @@
fp.write(strj)


print("Data on Biomodels (%i models) written to %s" % (len(info), filename))
print("Data on Biomodels (%i models) written to %s" % (len(info_all), filename))
Loading
Loading