Skip to content

Commit d27a686

Browse files
authored
Merge pull request #1000 from int-brain-lab/develop
3.4.0
2 parents 88d39b1 + da0f85b commit d27a686

File tree

17 files changed

+306
-183
lines changed

17 files changed

+306
-183
lines changed

brainbox/io/one.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,7 @@ class SpikeSortingLoader:
808808
spike_sorter: str = 'pykilosort'
809809
spike_sorting_path: Path = None
810810
_sync: dict = None
811+
revision: str = None
811812

812813
def __post_init__(self):
813814
# pid gets precedence
@@ -886,7 +887,7 @@ def _get_spike_sorting_collection(self, spike_sorter=None):
886887
_logger.debug(f"selecting: {collection} to load amongst candidates: {self.collections}")
887888
return collection
888889

889-
def load_spike_sorting_object(self, obj, *args, **kwargs):
890+
def load_spike_sorting_object(self, obj, *args, revision=None, **kwargs):
890891
"""
891892
Loads an ALF object
892893
:param obj: object name, str between 'spikes', 'clusters' or 'channels'
@@ -895,8 +896,10 @@ def load_spike_sorting_object(self, obj, *args, **kwargs):
895896
:param collection: string specifiying the collection, for example 'alf/probe01/pykilosort'
896897
:param kwargs: additional arguments to be passed to one.api.One.load_object
897898
:param missing: 'raise' (default) or 'ignore'
899+
:param revision: the dataset revision to load
898900
:return:
899901
"""
902+
revision = revision if revision is not None else self.revision
900903
self.download_spike_sorting_object(obj, *args, **kwargs)
901904
return self._load_object(self.files[obj])
902905

@@ -907,7 +910,7 @@ def get_version(self, spike_sorter=None):
907910
return dset[0]['version'] if len(dset) else 'unknown'
908911

909912
def download_spike_sorting_object(self, obj, spike_sorter=None, dataset_types=None, collection=None,
910-
attribute=None, missing='raise', **kwargs):
913+
attribute=None, missing='raise', revision=None, **kwargs):
911914
"""
912915
Downloads an ALF object
913916
:param obj: object name, str between 'spikes', 'clusters' or 'channels'
@@ -917,8 +920,10 @@ def download_spike_sorting_object(self, obj, spike_sorter=None, dataset_types=No
917920
:param kwargs: additional arguments to be passed to one.api.One.load_object
918921
:param attribute: list of attributes to load for the object
919922
:param missing: 'raise' (default) or 'ignore'
923+
:param revision: the dataset revision to load
920924
:return:
921925
"""
926+
revision = revision if revision is not None else self.revision
922927
if spike_sorter is None:
923928
spike_sorter = self.spike_sorter if self.spike_sorter is not None else 'iblsorter'
924929
if len(self.collections) == 0:
@@ -1170,12 +1175,13 @@ def url(self):
11701175
webclient = getattr(self.one, '_web_client', None)
11711176
return webclient.rel_path2url(get_alf_path(self.session_path)) if webclient else None
11721177

1173-
def _get_probe_info(self):
1178+
def _get_probe_info(self, revision=None):
1179+
revision = revision if revision is not None else self.revision
11741180
if self._sync is None:
11751181
timestamps = self.one.load_dataset(
1176-
self.eid, dataset='_spikeglx_*.timestamps.npy', collection=f'raw_ephys_data/{self.pname}')
1182+
self.eid, dataset='_spikeglx_*.timestamps.npy', collection=f'raw_ephys_data/{self.pname}', revision=revision)
11771183
_ = self.one.load_dataset( # this is not used here but we want to trigger the download for potential tasks
1178-
self.eid, dataset='_spikeglx_*.sync.npy', collection=f'raw_ephys_data/{self.pname}')
1184+
self.eid, dataset='_spikeglx_*.sync.npy', collection=f'raw_ephys_data/{self.pname}', revision=revision)
11791185
try:
11801186
ap_meta = spikeglx.read_meta_data(self.one.load_dataset(
11811187
self.eid, dataset='_spikeglx_*.ap.meta', collection=f'raw_ephys_data/{self.pname}'))

ibllib/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33
import warnings
44

5-
__version__ = '3.3.1'
5+
__version__ = '3.4.0'
66
warnings.filterwarnings('always', category=DeprecationWarning, module='ibllib')
77

88
# if this becomes a full-blown library we should let the logging configuration to the discretion of the dev

ibllib/io/extractors/ephys_fpga.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -959,7 +959,8 @@ def build_trials(self, sync, chmap, display=False, **kwargs):
959959

960960
# If first trial start is missing first detected FPGA event doesn't match any Bpod
961961
# starts then it's probably a mis-assigned valve or trial end event.
962-
i1 = np.any(missing_bpod_idx == 0) and not np.any(np.isclose(fpga_events['intervals_0'][0], bpod_start))
962+
i1 = (self._has_delay_initiation() and np.any(missing_bpod_idx == 0)
963+
and not np.any(np.isclose(fpga_events['intervals_0'][0], bpod_start)))
963964
# skip mis-assigned first FPGA trial start
964965
t_trial_start = np.sort(np.r_[fpga_events['intervals_0'][int(i1):], missing_bpod])
965966
ibpod = np.sort(np.r_[ibpod, missing_bpod_idx])
@@ -1178,7 +1179,8 @@ def get_bpod_event_times(self, sync, chmap, bpod_event_ttls=None, display=False,
11781179
bpod_event_intervals = self._assign_events(
11791180
bpod['times'], bpod['polarities'], bpod_event_ttls, display=display)
11801181

1181-
if 'trial_start' not in bpod_event_intervals or bpod_event_intervals['trial_start'].size == 0:
1182+
if ('trial_start' not in bpod_event_intervals or bpod_event_intervals['trial_start'].size == 0
1183+
or not self._has_delay_initiation()):
11821184
return bpod, bpod_event_intervals
11831185

11841186
# The first trial pulse is longer and often assigned to another event.
@@ -1342,6 +1344,27 @@ def sync_bpod_clock(bpod_trials, fpga_trials, sync_field):
13421344

13431345
return fcn, drift, ibpod, ifpga
13441346

1347+
def _has_delay_initiation(self) -> bool:
1348+
"""
1349+
Check if the first trial has a `delay_initiation` state.
1350+
1351+
Prior to iblrig v8.28.0, the first trial was used to handle, both, the detection of camera pulses and the
1352+
handling of the initial delay. This may cause issues with the extraction of events during the first trial.
1353+
1354+
Returns
1355+
-------
1356+
bool
1357+
True if iblrig version < 8.28.0 or the first trial has a `delay_initiation` state, False otherwise.
1358+
1359+
Notes
1360+
-----
1361+
This method only returns valid results if, both, `self.settings` and `self.bpod_extractor` are set.
1362+
"""
1363+
iblrig_version = version.parse((self.settings or {}).get("IBLRIG_VERSION", "0.0.0"))
1364+
has_delay_init = (hasattr(self, 'bpod_extractor') and 'delay_initiation' in
1365+
self.bpod_extractor.bpod_trials[0]['behavior_data']['States timestamps'])
1366+
return iblrig_version < version.parse('8.28.0') or has_delay_init
1367+
13451368

13461369
class FpgaTrialsHabituation(FpgaTrials):
13471370
"""Extract habituationChoiceWorld trial events from an NI DAQ."""

ibllib/oneibl/data_handlers.py

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from one.api import ONE
1717
from one.webclient import AlyxClient
1818
from one.util import filter_datasets
19-
from one.alf.path import add_uuid_string, session_path_parts, get_alf_path
19+
from one.alf.path import add_uuid_string, get_alf_path, ensure_alf_path
2020
from one.alf.cache import _make_datasets_df
2121
from iblutil.util import flatten, ensure_list
2222

@@ -461,8 +461,8 @@ def dataset_from_name(name, datasets):
461461
462462
Parameters
463463
----------
464-
name : str
465-
The name of the dataset.
464+
name : str, function
465+
The name of the dataset or a function to match the dataset name.
466466
datasets : list of ExpectedDataset
467467
A list of ExpectedDataset instances.
468468
@@ -475,14 +475,18 @@ def dataset_from_name(name, datasets):
475475
matches = []
476476
for dataset in datasets:
477477
if dataset.operator is None:
478-
if dataset._identifiers[2] == name:
479-
matches.append(dataset)
478+
if isinstance(name, str):
479+
if dataset._identifiers[2] == name:
480+
matches.append(dataset)
481+
else:
482+
if name(dataset._identifiers[2]):
483+
matches.append(dataset)
480484
else:
481485
matches.extend(dataset_from_name(name, dataset._identifiers))
482486
return matches
483487

484488

485-
def update_collections(dataset, new_collection, substring=None, unique=None):
489+
def update_collections(dataset, new_collection, substring=None, unique=None, exact_match=False):
486490
"""
487491
Update the collection of a dataset.
488492
@@ -497,6 +501,12 @@ def update_collections(dataset, new_collection, substring=None, unique=None):
497501
substring : str, optional
498502
An optional substring in the collection to replace with new collection(s). If None, the
499503
entire collection will be replaced.
504+
unique : bool, optional
505+
When provided, this will be used to set the `unique` attribute of the new dataset(s). If
506+
None, the `unique` attribute will be set to True if the collection does not contain
507+
wildcards.
508+
exact_match : bool
509+
If True, the collection will be replaced only if it contains `substring`.
500510
501511
Returns
502512
-------
@@ -511,7 +521,10 @@ def update_collections(dataset, new_collection, substring=None, unique=None):
511521
if revision is not None:
512522
raise NotImplementedError
513523
if substring:
514-
after = [(collection or '').replace(substring, x) or None for x in after]
524+
if exact_match and substring not in collection:
525+
after = [collection]
526+
else:
527+
after = [(collection or '').replace(substring, x) or None for x in after]
515528
if unique is None:
516529
unique = [not set(name + (x or '')).intersection('*[?') for x in after]
517530
else:
@@ -523,7 +536,7 @@ def update_collections(dataset, new_collection, substring=None, unique=None):
523536
updated &= D(name, folder, not isinstance(dataset, OptionalDataset), register, unique=unq)
524537
else:
525538
updated = copy(dataset)
526-
updated._identifiers = [update_collections(dd, new_collection, substring, unique)
539+
updated._identifiers = [update_collections(dd, new_collection, substring, unique, exact_match)
527540
for dd in updated._identifiers]
528541
return updated
529542

@@ -536,7 +549,7 @@ def __init__(self, session_path, signature, one=None):
536549
:param signature: input and output file signatures
537550
:param one: ONE instance
538551
"""
539-
self.session_path = session_path
552+
self.session_path = ensure_alf_path(session_path)
540553
self.signature = _parse_signature(signature)
541554
self.one = one
542555
self.processed = {} # Map of filepaths and their processed records (e.g. upload receipts or Alyx records)
@@ -566,7 +579,7 @@ def getData(self, one=None):
566579
dfs = [file.filter(session_datasets)[1] for file in self.signature['input_files']]
567580
return one._cache.datasets.iloc[0:0] if len(dfs) == 0 else pd.concat(dfs).drop_duplicates()
568581

569-
def getOutputFiles(self):
582+
def getOutputFiles(self, session_path=None):
570583
"""
571584
Return a data frame of output datasets found on disk.
572585
@@ -575,10 +588,11 @@ def getOutputFiles(self):
575588
pandas.DataFrame
576589
A dataset data frame of datasets on disk that were specified in signature['output_files'].
577590
"""
578-
assert self.session_path
591+
session_path = self.session_path if session_path is None else session_path
592+
assert session_path
579593
# Next convert datasets to frame
580594
# Create dataframe of all ALF datasets
581-
df = _make_datasets_df(self.session_path, hash_files=False).set_index(['eid', 'id'])
595+
df = _make_datasets_df(session_path, hash_files=False).set_index(['eid', 'id'])
582596
# Filter outputs
583597
if len(self.signature['output_files']) == 0:
584598
return pd.DataFrame()
@@ -714,7 +728,7 @@ def setUp(self, **_):
714728
_logger.warning('Space left on server is < 500GB, won\'t re-download new data')
715729
return
716730

717-
rel_sess_path = '/'.join(self.session_path.parts[-3:])
731+
rel_sess_path = self.session_path.session_path_short()
718732
target_paths = []
719733
source_paths = []
720734
for i, d in df.iterrows():
@@ -761,13 +775,13 @@ def __init__(self, session_path, signature, one=None):
761775
"""
762776
super().__init__(session_path, signature, one=one)
763777

764-
def setUp(self, **_):
778+
def setUp(self, check_hash=True, **_):
765779
"""
766780
Function to download necessary data to run tasks using ONE
767781
:return:
768782
"""
769783
df = super().getData()
770-
self.one._check_filesystem(df, check_hash=False)
784+
self.one._check_filesystem(df, check_hash=check_hash)
771785

772786
def uploadData(self, outputs, version, **kwargs):
773787
"""
@@ -843,8 +857,8 @@ def uploadData(self, outputs, version, **kwargs):
843857
"""
844858
# Set up Globus
845859
from one.remote.globus import Globus # noqa
846-
self.globus = Globus(client_name='server', headless=True)
847-
self.lab = session_path_parts(self.session_path, as_dict=True)['lab']
860+
self.globus = Globus(client_name=kwargs.pop('client_name', 'server'), headless=True)
861+
self.lab = self.session_path.lab
848862
if self.lab == 'cortexlab' and 'cortexlab' in self.one.alyx.base_url:
849863
base_url = 'https://alyx.internationalbrainlab.org'
850864
_logger.warning('Changing Alyx client to %s', base_url)
@@ -957,25 +971,30 @@ def __init__(self, session_path, signatures, one=None):
957971
super().__init__(session_path, signatures, one=one)
958972
self.patch_path = os.getenv('SDSC_PATCH_PATH', SDSC_PATCH_PATH)
959973
self.root_path = SDSC_ROOT_PATH
974+
self.linked_files = [] # List of symlinks created to run tasks
960975

961-
def setUp(self, task):
976+
def setUp(self, task, **_):
962977
"""Function to create symlinks to necessary data to run tasks."""
963978
df = super().getData()
964979

965-
SDSC_TMP = Path(self.patch_path.joinpath(task.__class__.__name__))
980+
SDSC_TMP = ensure_alf_path(self.patch_path.joinpath(task.__class__.__name__))
966981
session_path = Path(get_alf_path(self.session_path))
967982
for uuid, d in df.iterrows():
968983
file_path = session_path / d['rel_path']
969984
file_uuid = add_uuid_string(file_path, uuid)
970985
file_link = SDSC_TMP.joinpath(file_path)
971986
file_link.parent.mkdir(exist_ok=True, parents=True)
972-
try:
987+
try: # TODO append link to task attribute
973988
file_link.symlink_to(
974989
Path(self.root_path.joinpath(file_uuid)))
990+
self.linked_files.append(file_link)
975991
except FileExistsError:
976992
pass
977-
978993
task.session_path = SDSC_TMP.joinpath(session_path)
994+
# If one of the symlinked input files is also an expected output, raise here to avoid overwriting
995+
# In the future we may instead copy the data under this condition
996+
assert self.getOutputFiles(session_path=task.session_path).shape[0] == 0, (
997+
"On SDSC patcher, output files should be distinct from input files to avoid overwriting")
979998

980999
def uploadData(self, outputs, version, **kwargs):
9811000
"""

ibllib/oneibl/patcher.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ def __init__(self, alyx=None, client_name='default'):
398398
self.alyx = alyx or AlyxClient()
399399
globus.Globus.__init__(self, client_name=client_name) # NB we don't init Patcher as we're not using ONE
400400

401-
def delete_dataset(self, dataset, dry=False):
401+
def delete_dataset(self, dataset, dry=False, aws_profile='ibladmin'):
402402
"""
403403
Delete a dataset off Alyx and remove file record from all Globus repositories.
404404
@@ -408,6 +408,8 @@ def delete_dataset(self, dataset, dry=False):
408408
The dataset record or ID to delete.
409409
dry : bool
410410
If true, dataset is not deleted and file paths that would be removed are returned.
411+
aws_profile : str
412+
The AWS profile name to use for S3 deletion.
411413
412414
Returns
413415
-------
@@ -448,15 +450,14 @@ def is_aws(repository_name):
448450

449451
# Remove S3 files
450452
if s3_files:
451-
cmd = ['aws', 's3', 'rm', *s3_files, '--profile', 'ibladmin']
453+
cmd = ['aws', 's3', 'rm', *s3_files, '--profile', aws_profile]
452454
if dry:
453455
cmd.append('--dryrun')
454456
if _logger.level > logging.DEBUG:
455457
log_function = _logger.error
456458
cmd.append('--only-show-errors') # Suppress verbose output
457459
else:
458460
log_function = _logger.debug
459-
cmd.append('--no-progress') # Suppress progress info, estimated time, etc.
460461
_logger.debug(' '.join(cmd))
461462
process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
462463
with process.stdout:
@@ -678,7 +679,7 @@ def patch_dataset(self, file_list, dry=False, ftp=False, force=False, **kwargs):
678679

679680
exists = self.check_datasets(file_list)
680681
if len(exists) > 0 and not force:
681-
_logger.error(f'Files: {", ".join([f.name for f in file_list])} already exist, to force set force=True')
682+
_logger.error(f'Files: {", ".join([f.name for f in file_list])} already exist, to overwrite set force=True')
682683
return
683684

684685
response = super().patch_dataset(file_list, dry=dry, repository=self.s3_repo, ftp=False, **kwargs)

ibllib/oneibl/registration.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
from one.registration import RegistrationClient, get_dataset_type
1111
from one.remote.globus import get_local_endpoint_id, get_lab_from_endpoint_id
1212
from one.webclient import AlyxClient, no_cache
13-
from one.converters import ConversionMixin, datasets2records
13+
from one.converters import datasets2records
1414
import one.alf.exceptions as alferr
15+
from one.alf.path import ensure_alf_path
1516
from one.api import ONE
1617
from iblutil.util import ensure_list
1718

@@ -133,7 +134,7 @@ def register_session_raw_data(session_path, one=None, overwrite=False, **kwargs)
133134
overwrite : bool
134135
If set to True, will patch the datasets. It will take very long. If set to False (default)
135136
will skip all already registered data.
136-
**kwargs
137+
kwargs
137138
Optional keyword arguments for one.registration.RegistrationClient.register_files.
138139
139140
Returns
@@ -553,12 +554,13 @@ def get_lab(session_path, alyx=None):
553554
one.remote.globus.get_lab_from_endpoint_id
554555
"""
555556
alyx = alyx or AlyxClient()
556-
if not (ref := ConversionMixin.path2ref(session_path)):
557+
session_path = ensure_alf_path(session_path)
558+
if not session_path.is_session_path():
557559
raise ValueError(f'Failed to parse session path: {session_path}')
558560

559-
labs = [x['lab'] for x in alyx.rest('subjects', 'list', nickname=ref['subject'])]
561+
labs = [x['lab'] for x in alyx.rest('subjects', 'list', nickname=session_path.subject)]
560562
if len(labs) == 0:
561-
raise alferr.AlyxSubjectNotFound(ref['subject'])
563+
raise alferr.AlyxSubjectNotFound(session_path.subject)
562564
elif len(labs) > 1: # More than one subject with this nickname
563565
# use local endpoint ID to find the correct lab
564566
endpoint_labs = get_lab_from_endpoint_id(alyx=alyx)

0 commit comments

Comments
 (0)