int-brain-lab
diff --git a/‎brainbox/io/one.py‎
Lines changed: 11 additions & 5 deletions b/‎brainbox/io/one.py‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎ibllib/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎ibllib/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ibllib/io/extractors/ephys_fpga.py‎
Lines changed: 25 additions & 2 deletions b/‎ibllib/io/extractors/ephys_fpga.py‎
Lines changed: 25 additions & 2 deletions
diff --git a/‎ibllib/oneibl/data_handlers.py‎
Lines changed: 40 additions & 21 deletions b/‎ibllib/oneibl/data_handlers.py‎
Lines changed: 40 additions & 21 deletions
diff --git a/‎ibllib/oneibl/patcher.py‎
Lines changed: 5 additions & 4 deletions b/‎ibllib/oneibl/patcher.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎ibllib/oneibl/registration.py‎
Lines changed: 7 additions & 5 deletions b/‎ibllib/oneibl/registration.py‎
Lines changed: 7 additions & 5 deletions
@@ -808,6 +808,7 @@ class SpikeSortingLoader:
     spike_sorter: str = 'pykilosort'
     spike_sorting_path: Path = None
     _sync: dict = None
+    revision: str = None
 
     def __post_init__(self):
         # pid gets precedence
@@ -886,7 +887,7 @@ def _get_spike_sorting_collection(self, spike_sorter=None):
         _logger.debug(f"selecting: {collection} to load amongst candidates: {self.collections}")
         return collection
 
-    def load_spike_sorting_object(self, obj, *args, **kwargs):
+    def load_spike_sorting_object(self, obj, *args, revision=None, **kwargs):
         """
         Loads an ALF object
         :param obj: object name, str between 'spikes', 'clusters' or 'channels'
@@ -895,8 +896,10 @@ def load_spike_sorting_object(self, obj, *args, **kwargs):
         :param collection: string specifiying the collection, for example 'alf/probe01/pykilosort'
         :param kwargs: additional arguments to be passed to one.api.One.load_object
         :param missing: 'raise' (default) or 'ignore'
+        :param revision: the dataset revision to load
         :return:
         """
+        revision = revision if revision is not None else self.revision
         self.download_spike_sorting_object(obj, *args, **kwargs)
         return self._load_object(self.files[obj])
 
@@ -907,7 +910,7 @@ def get_version(self, spike_sorter=None):
         return dset[0]['version'] if len(dset) else 'unknown'
 
     def download_spike_sorting_object(self, obj, spike_sorter=None, dataset_types=None, collection=None,
-                                      attribute=None, missing='raise', **kwargs):
+                                      attribute=None, missing='raise', revision=None, **kwargs):
         """
         Downloads an ALF object
         :param obj: object name, str between 'spikes', 'clusters' or 'channels'
@@ -917,8 +920,10 @@ def download_spike_sorting_object(self, obj, spike_sorter=None, dataset_types=No
         :param kwargs: additional arguments to be passed to one.api.One.load_object
         :param attribute: list of attributes to load for the object
         :param missing: 'raise' (default) or 'ignore'
+        :param revision: the dataset revision to load
         :return:
         """
+        revision = revision if revision is not None else self.revision
         if spike_sorter is None:
             spike_sorter = self.spike_sorter if self.spike_sorter is not None else 'iblsorter'
         if len(self.collections) == 0:
@@ -1170,12 +1175,13 @@ def url(self):
         webclient = getattr(self.one, '_web_client', None)
         return webclient.rel_path2url(get_alf_path(self.session_path)) if webclient else None
 
-    def _get_probe_info(self):
+    def _get_probe_info(self, revision=None):
+        revision = revision if revision is not None else self.revision
         if self._sync is None:
             timestamps = self.one.load_dataset(
-                self.eid, dataset='_spikeglx_*.timestamps.npy', collection=f'raw_ephys_data/{self.pname}')
+                self.eid, dataset='_spikeglx_*.timestamps.npy', collection=f'raw_ephys_data/{self.pname}', revision=revision)
             _ = self.one.load_dataset(  # this is not used here but we want to trigger the download for potential tasks
-                self.eid, dataset='_spikeglx_*.sync.npy', collection=f'raw_ephys_data/{self.pname}')
+                self.eid, dataset='_spikeglx_*.sync.npy', collection=f'raw_ephys_data/{self.pname}', revision=revision)
             try:
                 ap_meta = spikeglx.read_meta_data(self.one.load_dataset(
                     self.eid, dataset='_spikeglx_*.ap.meta', collection=f'raw_ephys_data/{self.pname}'))
 
@@ -2,7 +2,7 @@
 import logging
 import warnings
 
-__version__ = '3.3.1'
+__version__ = '3.4.0'
 warnings.filterwarnings('always', category=DeprecationWarning, module='ibllib')
 
 # if this becomes a full-blown library we should let the logging configuration to the discretion of the dev
 
@@ -959,7 +959,8 @@ def build_trials(self, sync, chmap, display=False, **kwargs):
 
             # If first trial start is missing first detected FPGA event doesn't match any Bpod
             # starts then it's probably a mis-assigned valve or trial end event.
-            i1 = np.any(missing_bpod_idx == 0) and not np.any(np.isclose(fpga_events['intervals_0'][0], bpod_start))
+            i1 = (self._has_delay_initiation() and np.any(missing_bpod_idx == 0)
+                  and not np.any(np.isclose(fpga_events['intervals_0'][0], bpod_start)))
             # skip mis-assigned first FPGA trial start
             t_trial_start = np.sort(np.r_[fpga_events['intervals_0'][int(i1):], missing_bpod])
             ibpod = np.sort(np.r_[ibpod, missing_bpod_idx])
@@ -1178,7 +1179,8 @@ def get_bpod_event_times(self, sync, chmap, bpod_event_ttls=None, display=False,
         bpod_event_intervals = self._assign_events(
             bpod['times'], bpod['polarities'], bpod_event_ttls, display=display)
 
-        if 'trial_start' not in bpod_event_intervals or bpod_event_intervals['trial_start'].size == 0:
+        if ('trial_start' not in bpod_event_intervals or bpod_event_intervals['trial_start'].size == 0
+                or not self._has_delay_initiation()):
             return bpod, bpod_event_intervals
 
         # The first trial pulse is longer and often assigned to another event.
@@ -1342,6 +1344,27 @@ def sync_bpod_clock(bpod_trials, fpga_trials, sync_field):
 
         return fcn, drift, ibpod, ifpga
 
+    def _has_delay_initiation(self) -> bool:
+        """
+        Check if the first trial has a `delay_initiation` state.
+
+        Prior to iblrig v8.28.0, the first trial was used to handle, both, the detection of camera pulses and the
+        handling of the initial delay. This may cause issues with the extraction of events during the first trial.
+
+        Returns
+        -------
+        bool
+            True if iblrig version < 8.28.0 or the first trial has a `delay_initiation` state, False otherwise.
+
+        Notes
+        -----
+        This method only returns valid results if, both, `self.settings` and `self.bpod_extractor` are set.
+        """
+        iblrig_version = version.parse((self.settings or {}).get("IBLRIG_VERSION", "0.0.0"))
+        has_delay_init = (hasattr(self, 'bpod_extractor') and 'delay_initiation' in
+                          self.bpod_extractor.bpod_trials[0]['behavior_data']['States timestamps'])
+        return iblrig_version < version.parse('8.28.0') or has_delay_init
+
 
 class FpgaTrialsHabituation(FpgaTrials):
     """Extract habituationChoiceWorld trial events from an NI DAQ."""
 
@@ -16,7 +16,7 @@
 from one.api import ONE
 from one.webclient import AlyxClient
 from one.util import filter_datasets
-from one.alf.path import add_uuid_string, session_path_parts, get_alf_path
+from one.alf.path import add_uuid_string, get_alf_path, ensure_alf_path
 from one.alf.cache import _make_datasets_df
 from iblutil.util import flatten, ensure_list
 
@@ -461,8 +461,8 @@ def dataset_from_name(name, datasets):
 
     Parameters
     ----------
-    name : str
-        The name of the dataset.
+    name : str, function
+        The name of the dataset or a function to match the dataset name.
     datasets : list of ExpectedDataset
         A list of ExpectedDataset instances.
 
@@ -475,14 +475,18 @@ def dataset_from_name(name, datasets):
     matches = []
     for dataset in datasets:
         if dataset.operator is None:
-            if dataset._identifiers[2] == name:
-                matches.append(dataset)
+            if isinstance(name, str):
+                if dataset._identifiers[2] == name:
+                    matches.append(dataset)
+            else:
+                if name(dataset._identifiers[2]):
+                    matches.append(dataset)
         else:
             matches.extend(dataset_from_name(name, dataset._identifiers))
     return matches
 
 
-def update_collections(dataset, new_collection, substring=None, unique=None):
+def update_collections(dataset, new_collection, substring=None, unique=None, exact_match=False):
     """
     Update the collection of a dataset.
 
@@ -497,6 +501,12 @@ def update_collections(dataset, new_collection, substring=None, unique=None):
     substring : str, optional
         An optional substring in the collection to replace with new collection(s). If None, the
         entire collection will be replaced.
+    unique : bool, optional
+        When provided, this will be used to set the `unique` attribute of the new dataset(s). If
+        None, the `unique` attribute will be set to True if the collection does not contain
+        wildcards.
+    exact_match : bool
+        If True, the collection will be replaced only if it contains `substring`.
 
     Returns
     -------
@@ -511,7 +521,10 @@ def update_collections(dataset, new_collection, substring=None, unique=None):
         if revision is not None:
             raise NotImplementedError
         if substring:
-            after = [(collection or '').replace(substring, x) or None for x in after]
+            if exact_match and substring not in collection:
+                after = [collection]
+            else:
+                after = [(collection or '').replace(substring, x) or None for x in after]
         if unique is None:
             unique = [not set(name + (x or '')).intersection('*[?') for x in after]
         else:
@@ -523,7 +536,7 @@ def update_collections(dataset, new_collection, substring=None, unique=None):
                 updated &= D(name, folder, not isinstance(dataset, OptionalDataset), register, unique=unq)
     else:
         updated = copy(dataset)
-        updated._identifiers = [update_collections(dd, new_collection, substring, unique)
+        updated._identifiers = [update_collections(dd, new_collection, substring, unique, exact_match)
                                 for dd in updated._identifiers]
     return updated
 
@@ -536,7 +549,7 @@ def __init__(self, session_path, signature, one=None):
         :param signature: input and output file signatures
         :param one: ONE instance
         """
-        self.session_path = session_path
+        self.session_path = ensure_alf_path(session_path)
         self.signature = _parse_signature(signature)
         self.one = one
         self.processed = {}  # Map of filepaths and their processed records (e.g. upload receipts or Alyx records)
@@ -566,7 +579,7 @@ def getData(self, one=None):
         dfs = [file.filter(session_datasets)[1] for file in self.signature['input_files']]
         return one._cache.datasets.iloc[0:0] if len(dfs) == 0 else pd.concat(dfs).drop_duplicates()
 
-    def getOutputFiles(self):
+    def getOutputFiles(self, session_path=None):
         """
         Return a data frame of output datasets found on disk.
 
@@ -575,10 +588,11 @@ def getOutputFiles(self):
         pandas.DataFrame
             A dataset data frame of datasets on disk that were specified in signature['output_files'].
         """
-        assert self.session_path
+        session_path = self.session_path if session_path is None else session_path
+        assert session_path
         # Next convert datasets to frame
         # Create dataframe of all ALF datasets
-        df = _make_datasets_df(self.session_path, hash_files=False).set_index(['eid', 'id'])
+        df = _make_datasets_df(session_path, hash_files=False).set_index(['eid', 'id'])
         # Filter outputs
         if len(self.signature['output_files']) == 0:
             return pd.DataFrame()
@@ -714,7 +728,7 @@ def setUp(self, **_):
             _logger.warning('Space left on server is < 500GB, won\'t re-download new data')
             return
 
-        rel_sess_path = '/'.join(self.session_path.parts[-3:])
+        rel_sess_path = self.session_path.session_path_short()
         target_paths = []
         source_paths = []
         for i, d in df.iterrows():
@@ -761,13 +775,13 @@ def __init__(self, session_path, signature, one=None):
         """
         super().__init__(session_path, signature, one=one)
 
-    def setUp(self, **_):
+    def setUp(self, check_hash=True, **_):
         """
         Function to download necessary data to run tasks using ONE
         :return:
         """
         df = super().getData()
-        self.one._check_filesystem(df, check_hash=False)
+        self.one._check_filesystem(df, check_hash=check_hash)
 
     def uploadData(self, outputs, version, **kwargs):
         """
@@ -843,8 +857,8 @@ def uploadData(self, outputs, version, **kwargs):
         """
         # Set up Globus
         from one.remote.globus import Globus # noqa
-        self.globus = Globus(client_name='server', headless=True)
-        self.lab = session_path_parts(self.session_path, as_dict=True)['lab']
+        self.globus = Globus(client_name=kwargs.pop('client_name', 'server'), headless=True)
+        self.lab = self.session_path.lab
         if self.lab == 'cortexlab' and 'cortexlab' in self.one.alyx.base_url:
             base_url = 'https://alyx.internationalbrainlab.org'
             _logger.warning('Changing Alyx client to %s', base_url)
@@ -957,25 +971,30 @@ def __init__(self, session_path, signatures, one=None):
         super().__init__(session_path, signatures, one=one)
         self.patch_path = os.getenv('SDSC_PATCH_PATH', SDSC_PATCH_PATH)
         self.root_path = SDSC_ROOT_PATH
+        self.linked_files = []  # List of symlinks created to run tasks
 
-    def setUp(self, task):
+    def setUp(self, task, **_):
         """Function to create symlinks to necessary data to run tasks."""
         df = super().getData()
 
-        SDSC_TMP = Path(self.patch_path.joinpath(task.__class__.__name__))
+        SDSC_TMP = ensure_alf_path(self.patch_path.joinpath(task.__class__.__name__))
         session_path = Path(get_alf_path(self.session_path))
         for uuid, d in df.iterrows():
             file_path = session_path / d['rel_path']
             file_uuid = add_uuid_string(file_path, uuid)
             file_link = SDSC_TMP.joinpath(file_path)
             file_link.parent.mkdir(exist_ok=True, parents=True)
-            try:
+            try:  # TODO append link to task attribute
                 file_link.symlink_to(
                     Path(self.root_path.joinpath(file_uuid)))
+                self.linked_files.append(file_link)
             except FileExistsError:
                 pass
-
         task.session_path = SDSC_TMP.joinpath(session_path)
+        # If one of the symlinked input files is also an expected output, raise here to avoid overwriting
+        # In the future we may instead copy the data under this condition
+        assert self.getOutputFiles(session_path=task.session_path).shape[0] == 0, (
+            "On SDSC patcher, output files should be distinct from input files to avoid overwriting")
 
     def uploadData(self, outputs, version, **kwargs):
         """
 
@@ -398,7 +398,7 @@ def __init__(self, alyx=None, client_name='default'):
         self.alyx = alyx or AlyxClient()
         globus.Globus.__init__(self, client_name=client_name)  # NB we don't init Patcher as we're not using ONE
 
-    def delete_dataset(self, dataset, dry=False):
+    def delete_dataset(self, dataset, dry=False, aws_profile='ibladmin'):
         """
         Delete a dataset off Alyx and remove file record from all Globus repositories.
 
@@ -408,6 +408,8 @@ def delete_dataset(self, dataset, dry=False):
             The dataset record or ID to delete.
         dry : bool
             If true, dataset is not deleted and file paths that would be removed are returned.
+        aws_profile : str
+            The AWS profile name to use for S3 deletion.
 
         Returns
         -------
@@ -448,15 +450,14 @@ def is_aws(repository_name):
 
         # Remove S3 files
         if s3_files:
-            cmd = ['aws', 's3', 'rm', *s3_files, '--profile', 'ibladmin']
+            cmd = ['aws', 's3', 'rm', *s3_files, '--profile', aws_profile]
             if dry:
                 cmd.append('--dryrun')
             if _logger.level > logging.DEBUG:
                 log_function = _logger.error
                 cmd.append('--only-show-errors')  # Suppress verbose output
             else:
                 log_function = _logger.debug
-                cmd.append('--no-progress')  # Suppress progress info, estimated time, etc.
             _logger.debug(' '.join(cmd))
             process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
             with process.stdout:
@@ -678,7 +679,7 @@ def patch_dataset(self, file_list, dry=False, ftp=False, force=False, **kwargs):
 
         exists = self.check_datasets(file_list)
         if len(exists) > 0 and not force:
-            _logger.error(f'Files: {", ".join([f.name for f in file_list])} already exist, to force set force=True')
+            _logger.error(f'Files: {", ".join([f.name for f in file_list])} already exist, to overwrite set force=True')
             return
 
         response = super().patch_dataset(file_list, dry=dry, repository=self.s3_repo, ftp=False, **kwargs)
 
@@ -10,8 +10,9 @@
 from one.registration import RegistrationClient, get_dataset_type
 from one.remote.globus import get_local_endpoint_id, get_lab_from_endpoint_id
 from one.webclient import AlyxClient, no_cache
-from one.converters import ConversionMixin, datasets2records
+from one.converters import datasets2records
 import one.alf.exceptions as alferr
+from one.alf.path import ensure_alf_path
 from one.api import ONE
 from iblutil.util import ensure_list
 
@@ -133,7 +134,7 @@ def register_session_raw_data(session_path, one=None, overwrite=False, **kwargs)
     overwrite : bool
         If set to True, will patch the datasets. It will take very long. If set to False (default)
         will skip all already registered data.
-    **kwargs
+    kwargs
         Optional keyword arguments for one.registration.RegistrationClient.register_files.
 
     Returns
@@ -553,12 +554,13 @@ def get_lab(session_path, alyx=None):
     one.remote.globus.get_lab_from_endpoint_id
     """
     alyx = alyx or AlyxClient()
-    if not (ref := ConversionMixin.path2ref(session_path)):
+    session_path = ensure_alf_path(session_path)
+    if not session_path.is_session_path():
         raise ValueError(f'Failed to parse session path: {session_path}')
 
-    labs = [x['lab'] for x in alyx.rest('subjects', 'list', nickname=ref['subject'])]
+    labs = [x['lab'] for x in alyx.rest('subjects', 'list', nickname=session_path.subject)]
     if len(labs) == 0:
-        raise alferr.AlyxSubjectNotFound(ref['subject'])
+        raise alferr.AlyxSubjectNotFound(session_path.subject)
     elif len(labs) > 1:  # More than one subject with this nickname
         # use local endpoint ID to find the correct lab
         endpoint_labs = get_lab_from_endpoint_id(alyx=alyx)