diff --git a/.gitignore b/.gitignore
index 1595eb6f6..3b75d0fa9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -112,3 +112,6 @@ analysis/*
/moabb/tests/orderplot.pdf
/moabb/tests/orderplot_full.pdf
/test_context.yml
+
+# mac os x stuff
+*DS_store*
diff --git a/README.md b/README.md
index e93ad247c..97074cbad 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
# Mother of all BCI Benchmark
-
-
![](/images/M.png)
-
+
+
+
Build a comprehensive benchmark of popular BCI algorithms applied on an extensive list of freely available EEG datasets.
@@ -32,15 +32,15 @@ This document (the README file) is a hub to give you some information about the
* [Documentation](#docs)
* [Architecture and main concepts](#architecture)
-We also have a recent [arXiv preprint][link_arxiv].
+We also have a recent [paper][link_paper] in JNE.
## What are we doing?
### The problem
-* Reproducible Research in BCI has a long way to go.
-* While many BCI datasets are made freely available, researchers do not publish code, and reproducing results required to benchmark new algorithms turns out to be more tricky than it should be.
-* Performances can be significantly impacted by parameters of the preprocessing steps, toolboxes used and implementation “tricks” that are almost never reported in the literature.
+* Reproducible Research in BCI has a long way to go.
+* While many BCI datasets are made freely available, researchers do not publish code, and reproducing results required to benchmark new algorithms turns out to be more tricky than it should be.
+* Performances can be significantly impacted by parameters of the preprocessing steps, toolboxes used and implementation “tricks” that are almost never reported in the literature.
As a results, there is no comprehensive benchmark of BCI algorithm, and newcomers are spending a tremendous amount of time browsing literature to find out what algorithm works best and on which dataset.
@@ -48,7 +48,7 @@ As a results, there is no comprehensive benchmark of BCI algorithm, and newcomer
The Mother of all BCI Benchmark will:
-* Build a comprehensive benchmark of popular BCI algorithms applied on an extensive list of freely available EEG datasets.
+* Build a comprehensive benchmark of popular BCI algorithms applied on an extensive list of freely available EEG datasets.
* The code will be made available on github, serving as a reference point for the future algorithmic developments.
* Algorithms can be ranked and promoted on a website, providing a clear picture of the different solutions available in the field.
@@ -205,3 +205,4 @@ make html
[link_neurotechx_signup]: https://docs.google.com/forms/d/e/1FAIpQLSfZyzhVdOLU8_oQ4NylHL8EFoKLIVmryGXA4u7HDsZpkTryvg/viewform
[link_moabb_docs]: http://moabb.neurotechx.com/docs/index.html
[link_arxiv]: https://arxiv.org/abs/1805.06427
+[link_jne]: http://iopscience.iop.org/article/10.1088/1741-2552/aadea0/meta
diff --git a/docs/source/README.md b/docs/source/README.md
index 676ae3dbb..bce401d19 100644
--- a/docs/source/README.md
+++ b/docs/source/README.md
@@ -26,7 +26,7 @@ This document (the README file) is a hub to give you some information about the
* [Documentation](#docs)
* [Architecture and main concepts](#architecture)
-We also have a recent [arXiv preprint][link_arxiv].
+We also have a recent [paper][link_paper] in JNE.
## What are we doing?
@@ -198,3 +198,4 @@ make html
[link_neurotechx_signup]: https://docs.google.com/forms/d/e/1FAIpQLSfZyzhVdOLU8_oQ4NylHL8EFoKLIVmryGXA4u7HDsZpkTryvg/viewform
[link_moabb_docs]: http://moabb.neurotechx.com/docs/index.html
[link_arxiv]: https://arxiv.org/abs/1805.06427
+[link_jne]: http://iopscience.iop.org/article/10.1088/1741-2552/aadea0/meta
diff --git a/docs/source/conf.py b/docs/source/conf.py
index c670a598f..c8957595e 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -67,9 +67,9 @@
plot_html_show_source_link = False
sphinx_gallery_conf = {
- 'examples_dirs': ['../../examples', '../../tutorials'],
- 'gallery_dirs': ['auto_examples', 'auto_tutorials'],
- 'backreferences_dir': False}
+ 'examples_dirs': ['../../examples', '../../tutorials'],
+ 'gallery_dirs': ['auto_examples', 'auto_tutorials'],
+ 'backreferences_dir': False}
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst
index 2aede3ed0..ac5779ab6 100644
--- a/docs/source/datasets.rst
+++ b/docs/source/datasets.rst
@@ -24,12 +24,14 @@ Motor Imagery Datasets
MunichMI
Ofner2017
PhysionetMI
+ Schirrmeister2017
Shin2017A
Shin2017B
Weibo2014
Zhou2016
SSVEPExo
+
------------
ERP Datasets
------------
@@ -38,6 +40,12 @@ ERP Datasets
:toctree: generated/
:template: class.rst
+ bi2013a
+ BNCI2014008
+ BNCI2014009
+ BNCI2015003
+ EPFLP300
+
--------------
SSVEP Datasets
--------------
@@ -46,6 +54,8 @@ SSVEP Datasets
:toctree: generated/
:template: class.rst
+ SSVEPExo
+
------------
Base & Utils
------------
diff --git a/examples/plot_cross_session_motor_imagery.py b/examples/plot_cross_session_motor_imagery.py
index f843bc72b..6e1d44fc5 100644
--- a/examples/plot_cross_session_motor_imagery.py
+++ b/examples/plot_cross_session_motor_imagery.py
@@ -61,7 +61,7 @@
pipelines['RG + LR'] = make_pipeline(Covariances(),
TangentSpace(),
- LogisticRegression())
+ LogisticRegression(solver='lbfgs'))
##############################################################################
# Evaluation
@@ -76,7 +76,10 @@
# be overwrited if necessary.
paradigm = LeftRightImagery()
-datasets = [BNCI2014001()]
+# Because this is being auto-generated we only use 2 subjects
+dataset = BNCI2014001()
+dataset.subject_list = dataset.subject_list[:2]
+datasets = [dataset]
overwrite = False # set to True if we want to overwrite cached results
evaluation = CrossSessionEvaluation(paradigm=paradigm, datasets=datasets,
suffix='examples', overwrite=overwrite)
diff --git a/examples/plot_filterbank_csp_vs_csp.py b/examples/plot_filterbank_csp_vs_csp.py
index 61f64579d..ac60e53b9 100644
--- a/examples/plot_filterbank_csp_vs_csp.py
+++ b/examples/plot_filterbank_csp_vs_csp.py
@@ -63,7 +63,10 @@
# The second is a `FilterBankLeftRightImagery` with a bank of 6 filter, ranging
# from 8 to 35 Hz.
-datasets = [BNCI2014001()]
+# Because this is being auto-generated we only use 2 subjects
+dataset = BNCI2014001()
+dataset.subject_list = dataset.subject_list[:2]
+datasets = [dataset]
overwrite = False # set to True if we want to overwrite cached results
# broadband filters
diff --git a/examples/plot_within_session_p300.py b/examples/plot_within_session_p300.py
new file mode 100644
index 000000000..7d7c21394
--- /dev/null
+++ b/examples/plot_within_session_p300.py
@@ -0,0 +1,126 @@
+"""
+===========================
+Within Session P300
+===========================
+
+This Example shows how to perform a within session analysis on three different
+P300 datasets.
+
+We will compare two pipelines :
+
+- Riemannian Geometry
+- xDawn with Linear Discriminant Analysis
+
+We will use the P300 paradigm, which uses the AUC as metric.
+
+"""
+# Authors: Pedro Rodrigues
+#
+# License: BSD (3-clause)
+
+# getting rid of the warnings about the future (on s'en fout !)
+from sklearn.pipeline import make_pipeline
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
+from sklearn.base import BaseEstimator, TransformerMixin
+from pyriemann.tangentspace import TangentSpace
+from pyriemann.estimation import XdawnCovariances, Xdawn
+from moabb.evaluations import WithinSessionEvaluation
+from moabb.paradigms import P300
+from moabb.datasets import EPFLP300
+import moabb
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+import warnings
+warnings.simplefilter(action='ignore', category=FutureWarning)
+warnings.simplefilter(action='ignore', category=RuntimeWarning)
+
+
+moabb.set_log_level('info')
+
+# This is an auxiliary transformer that allows one to vectorize data
+# structures in a pipeline For instance, in the case of a X with dimensions
+# Nt x Nc x Ns, one might be interested in a new data structure with
+# dimensions Nt x (Nc.Ns)
+
+
+class Vectorizer(BaseEstimator, TransformerMixin):
+
+ def __init__(self):
+ pass
+
+ def fit(self, X, y):
+ """fit."""
+ return self
+
+ def transform(self, X):
+ """transform. """
+ return np.reshape(X, (X.shape[0], -1))
+
+##############################################################################
+# Create pipelines
+# ----------------
+#
+# Pipelines must be a dict of sklearn pipeline transformer.
+
+
+pipelines = {}
+
+# we have to do this because the classes are called 'Target' and 'NonTarget'
+# but the evaluation function uses a LabelEncoder, transforming them
+# to 0 and 1
+labels_dict = {'Target': 1, 'NonTarget': 0}
+
+pipelines['RG + LDA'] = make_pipeline(
+ XdawnCovariances(
+ nfilter=2,
+ classes=[
+ labels_dict['Target']],
+ estimator='lwf',
+ xdawn_estimator='lwf'),
+ TangentSpace(),
+ LDA(solver='lsqr', shrinkage='auto'))
+
+pipelines['Xdw + LDA'] = make_pipeline(Xdawn(nfilter=2, estimator='lwf'),
+ Vectorizer(), LDA(solver='lsqr',
+ shrinkage='auto'))
+
+##############################################################################
+# Evaluation
+# ----------
+#
+# We define the paradigm (P300) and use all three datasets available for it.
+# The evaluation will return a dataframe containing a single AUC score for
+# each subject / session of the dataset, and for each pipeline.
+#
+# Results are saved into the database, so that if you add a new pipeline, it
+# will not run again the evaluation unless a parameter has changed. Results can
+# be overwritten if necessary.
+
+paradigm = P300(resample=128)
+dataset = EPFLP300()
+dataset.subject_list = dataset.subject_list[:2]
+datasets = [dataset]
+overwrite = True # set to True if we want to overwrite cached results
+evaluation = WithinSessionEvaluation(paradigm=paradigm,
+ datasets=datasets,
+ suffix='examples', overwrite=overwrite)
+results = evaluation.process(pipelines)
+
+##############################################################################
+# Plot Results
+# ----------------
+#
+# Here we plot the results.
+
+fig, ax = plt.subplots(facecolor='white', figsize=[8, 4])
+
+sns.stripplot(data=results, y='score', x='pipeline', ax=ax, jitter=True,
+ alpha=.5, zorder=1, palette="Set1")
+sns.pointplot(data=results, y='score', x='pipeline', ax=ax,
+ zorder=1, palette="Set1")
+
+ax.set_ylabel('ROC AUC')
+ax.set_ylim(0.5, 1)
+
+fig.show()
diff --git a/moabb/analysis/__init__.py b/moabb/analysis/__init__.py
index c0a1be732..559bacbaf 100644
--- a/moabb/analysis/__init__.py
+++ b/moabb/analysis/__init__.py
@@ -1,9 +1,13 @@
import os
+import logging
import platform
from datetime import datetime
-from moabb.analysis import plotting as plt # noqa: E501
-from moabb.analysis.results import Results # noqa: E501,F401
-from moabb.analysis.meta_analysis import find_significant_differences, compute_dataset_statistics # noqa: E501
+from moabb.analysis import plotting as plt
+from moabb.analysis.results import Results # noqa: F401
+from moabb.analysis.meta_analysis import (
+ find_significant_differences, compute_dataset_statistics) # noqa: E501
+
+log = logging.getLogger()
def analyze(results, out_path, name='analysis', plot=False):
@@ -27,20 +31,27 @@ def analyze(results, out_path, name='analysis', plot=False):
'''
# input checks #
- if type(out_path) is not str:
+ if not isinstance(out_path, str):
raise ValueError('Given out_path argument is not string')
elif not os.path.isdir(out_path):
raise IOError('Given directory does not exist')
else:
analysis_path = os.path.join(out_path, name)
+ unique_ids = [plt._simplify_names(x) for x in results.pipeline.unique()]
+ simplify = True
+ print(unique_ids)
+ print(set(unique_ids))
+ if len(unique_ids) != len(set(unique_ids)):
+ log.warning(
+ 'Pipeline names are too similar, turning off name shortening')
+ simplify = False
+
os.makedirs(analysis_path, exist_ok=True)
# TODO: no good cross-platform way of recording CPU info?
with open(os.path.join(analysis_path, 'info.txt'), 'a') as f:
dt = datetime.now()
- f.write(
- 'Date: {:%Y-%m-%d}\n Time: {:%H:%M}\n'.format(dt,
- dt))
+ f.write('Date: {:%Y-%m-%d}\n Time: {:%H:%M}\n'.format(dt, dt))
f.write('System: {}\n'.format(platform.system()))
f.write('CPU: {}\n'.format(platform.processor()))
@@ -52,5 +63,5 @@ def analyze(results, out_path, name='analysis', plot=False):
if plot:
fig, color_dict = plt.score_plot(results)
fig.savefig(os.path.join(analysis_path, 'scores.pdf'))
- fig = plt.summary_plot(P, T)
+ fig = plt.summary_plot(P, T, simplify=simplify)
fig.savefig(os.path.join(analysis_path, 'ordering.pdf'))
diff --git a/moabb/analysis/plotting.py b/moabb/analysis/plotting.py
index 955cfb51f..1e3e3f038 100644
--- a/moabb/analysis/plotting.py
+++ b/moabb/analysis/plotting.py
@@ -11,7 +11,8 @@
PIPELINE_PALETTE = sea.color_palette("husl", 6)
-sea.set(font='serif', style='whitegrid', palette=PIPELINE_PALETTE)
+sea.set(font='serif', style='whitegrid',
+ palette=PIPELINE_PALETTE, color_codes=False)
log = logging.getLogger()
@@ -72,15 +73,16 @@ def paired_plot(data, alg1, alg2):
return fig
-def summary_plot(sig_df, effect_df, p_threshold=0.05):
+def summary_plot(sig_df, effect_df, p_threshold=0.05, simplify=True):
'''Visualize significances as a heatmap with green/grey/red for significantly
higher/significantly lower.
sig_df is a DataFrame of pipeline x pipeline where each value is a p-value,
effect_df is a DF where each value is an effect size
'''
- effect_df.columns = effect_df.columns.map(_simplify_names)
- sig_df.columns = sig_df.columns.map(_simplify_names)
+ if simplify:
+ effect_df.columns = effect_df.columns.map(_simplify_names)
+ sig_df.columns = sig_df.columns.map(_simplify_names)
annot_df = effect_df.copy()
for row in annot_df.index:
for col in annot_df.columns:
diff --git a/moabb/analysis/results.py b/moabb/analysis/results.py
index 910e7c099..7222d521f 100644
--- a/moabb/analysis/results.py
+++ b/moabb/analysis/results.py
@@ -54,7 +54,7 @@ class that will abstract result storage
self.filepath = os.path.join(self.mod_dir, 'results',
paradigm_class.__name__,
evaluation_class.__name__,
- 'results{}.hdf5'.format('_'+suffix))
+ 'results{}.hdf5'.format('_' + suffix))
os.makedirs(os.path.dirname(self.filepath), exist_ok=True)
self.filepath = self.filepath
diff --git a/moabb/datasets/Weibo2014.py b/moabb/datasets/Weibo2014.py
index cc7a913db..9d97887b1 100644
--- a/moabb/datasets/Weibo2014.py
+++ b/moabb/datasets/Weibo2014.py
@@ -29,7 +29,7 @@ def eeg_data_path(base_path, subject):
def get_subjects(sub_inds, sub_names, ind):
dataname = 'data{}'.format(ind)
- if not os.path.isfile(os.path.join(base_path, dataname+'.zip')):
+ if not os.path.isfile(os.path.join(base_path, dataname + '.zip')):
_fetch_file(FILES[ind], os.path.join(
base_path, dataname + '.zip'), print_destination=False)
with z.ZipFile(os.path.join(base_path, dataname + '.zip'), 'r') as f:
diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py
index 1350bc291..f7d3d4e0e 100644
--- a/moabb/datasets/__init__.py
+++ b/moabb/datasets/__init__.py
@@ -8,9 +8,12 @@
from .gigadb import Cho2017
from .alex_mi import AlexMI
from .physionet_mi import PhysionetMI
-from .bnci import (BNCI2014001, BNCI2014002, BNCI2014004, BNCI2015001,
- BNCI2015004)
+from .bnci import (BNCI2014001, BNCI2014002, BNCI2014004, BNCI2014008,
+ BNCI2014009, BNCI2015001, BNCI2015003, BNCI2015004)
from .bbci_eeg_fnirs import Shin2017A, Shin2017B
+from .schirrmeister2017 import Schirrmeister2017
+from .epfl import EPFLP300
+from .braininvaders import bi2013a
from .upper_limb import Ofner2017
from .Weibo2014 import Weibo2014
from .Zhou2016 import Zhou2016
diff --git a/moabb/datasets/bnci.py b/moabb/datasets/bnci.py
index 5a1c6b426..ea3b7c573 100644
--- a/moabb/datasets/bnci.py
+++ b/moabb/datasets/bnci.py
@@ -178,7 +178,7 @@ def _load_data_004_2014(subject,
for r in ['T', 'E']:
url = '{u}004-2014/B{s:02d}{r}.mat'.format(u=base_url, s=subject, r=r)
filename = data_path(url, path, force_update, update_path)[0]
- raws, _ = _convert_mi(filename, ch_names, ch_types)
+ raws, _ = _convert_mi(filename, ch_names, ch_types)
sessions.extend(raws)
sessions = {'session_%d' % ii: {'run_0': run}
@@ -204,7 +204,9 @@ def _load_data_008_2014(subject,
run = loadmat(filename, struct_as_record=False, squeeze_me=True)['data']
raw, event_id = _convert_run_p300_sl(run, verbose=verbose)
- return [raw], event_id
+ sessions = {'session_0': {'run_0': raw}}
+
+ return sessions
@verbose
@@ -232,7 +234,12 @@ def _load_data_009_2014(subject,
raws.append(raw)
event_id.update(ev)
- return raws, event_id
+ sessions = {}
+ sessions['session_0'] = {}
+ for i, rawi in enumerate(raws):
+ sessions['session_0']['run_' + str(i)] = rawi
+
+ return sessions
@verbose
@@ -279,8 +286,6 @@ def _load_data_003_2015(subject,
url = '{u}003-2015/s{s:d}.mat'.format(u=base_url, s=subject)
filename = data_path(url, path, force_update, update_path)[0]
- raws = list()
- event_id = {'Target': 2, 'Non-Target': 1}
from scipy.io import loadmat
data = loadmat(filename, struct_as_record=False, squeeze_me=True)
@@ -297,7 +302,9 @@ def _load_data_003_2015(subject,
info = create_info(
ch_names=ch_names, ch_types=ch_types, sfreq=sfreq, montage=montage)
- for run in [data.train, data.test]:
+ sessions = {}
+ sessions['session_0'] = {}
+ for ri, run in enumerate([data.train, data.test]):
# flash events on the channel 9
flashs = run[9:10]
ix_flash = flashs[0] > 0
@@ -318,9 +325,9 @@ def _load_data_003_2015(subject,
eeg_data = np.r_[run[1:-2] * 1e-6, targets, flashs]
raw = RawArray(data=eeg_data, info=info, verbose=verbose)
- raws.append(raw)
- event_id.update(evd)
- return raws, event_id
+ sessions['session_0']['run_' + str(ri)] = raw
+
+ return sessions
@verbose
@@ -466,7 +473,7 @@ def _convert_mi(filename, ch_names, ch_types):
event_id = {}
data = loadmat(filename, struct_as_record=False, squeeze_me=True)
- if type(data['data']) is np.ndarray:
+ if isinstance(data['data'], np.ndarray):
run_array = data['data']
else:
run_array = [data['data']]
@@ -819,6 +826,121 @@ def __init__(self):
doi='10.1109/TNSRE.2007.906956')
+class BNCI2014008(MNEBNCI):
+ """BNCI 2014-008 P300 dataset.
+
+ Dataset from [1]_.
+
+ **Dataset description**
+
+ This dataset represents a complete record of P300 evoked potentials
+ using a paradigm originally described by Farwell and Donchin [2].
+ In these sessions, 8 users with amyotrophic lateral sclerosis (ALS)
+ focused on one out of 36 different characters. The objective in this
+ contest is to predict the correct character in each of the provided
+ character selection epochs.
+
+ We included in the study a total of eight volunteers, all naïve to BCI
+ training. Scalp EEG signals were recorded (g.MOBILAB, g.tec, Austria)
+ from eight channels according to 10–10 standard (Fz, Cz, Pz, Oz, P3, P4,
+ PO7 and PO8) using active electrodes (g.Ladybird, g.tec, Austria).
+ All channels were referenced to the right earlobe and grounded to the left
+ mastoid. The EEG signal was digitized at 256 Hz and band-pass filtered
+ between 0.1 and 30 Hz.
+
+ Participants were required to copy spell seven predefined words of five
+ characters each (runs), by controlling a P300 matrix speller. Rows and
+ columns on the interface were randomly intensified for 125ms, with an
+ inter stimulus interval (ISI) of 125ms, yielding a 250 ms lag between the
+ appearance of two stimuli (stimulus onset asynchrony, SOA).
+
+ In the first three runs (15 trials in total) EEG data was stored to
+ perform a calibration of the BCI classifier. Thus no feedback was provided
+ to the participant up to this point. A stepwise linear discriminant
+ analysis (SWLDA) was applied to the data from the three calibration runs
+ (i.e., runs 1–3) to determine the classifier weights (i.e., classifier
+ coefficients). These weights were then applied during the subsequent four
+ testing runs (i.e., runs 4–7) when participants were provided with
+ feedback.
+
+ References
+ ----------
+
+ .. [1] A. Riccio, L. Simione, F. Schettini, A. Pizzimenti, M. Inghilleri,
+ M. O. Belardinelli, D. Mattia, and F. Cincotti (2013). Attention
+ and P300-based BCI performance in people with amyotrophic lateral
+ sclerosis. Front. Hum. Neurosci., vol. 7:, pag. 732.
+ .. [2] L. A. Farwell and E. Donchin, Talking off the top of your head:
+ toward a mental prosthesis utilizing eventrelated
+ brain potentials, Electroencephalogr. Clin. Neurophysiol.,
+ vol. 70, n. 6, pagg. 510–523, 1988.
+
+ """
+
+ def __init__(self):
+ super().__init__(
+ subjects=list(range(1, 9)),
+ sessions_per_subject=1,
+ events={'Target': 2, 'NonTarget': 1},
+ code='008-2014',
+ interval=[0, 1.0],
+ paradigm='p300',
+ doi='10.3389/fnhum.2013.00732')
+
+
+class BNCI2014009(MNEBNCI):
+ """BNCI 2014-009 P300 dataset.
+
+ Dataset from [1]_.
+
+ **Dataset description**
+
+ This dataset presents a complete record of P300 evoked potentials
+ using two different paradigms: a paradigm based on the P300 Speller in
+ overt attention condition and a paradigm based used in covert attention
+ condition. In these sessions, 10 healthy subjects focused on one out of 36
+ different characters. The objective was to predict the correct character
+ in each of the provided character selection epochs.
+ (Note: right now only the overt attention data is available via MOABB)
+
+ In the first interface, cues are organized in a 6×6 matrix and each
+ character is always visible on the screen and spatially separated from the
+ others. By design, no fixation cue is provided, as the subject is expected
+ to gaze at the target character. Stimulation consists in the
+ intensification of whole lines (rows or columns) of six characters.
+
+ Ten healthy subjects (10 female, mean age = 26.8 ± 5.6, table I) with
+ previous experience with P300-based BCIs attended 3 recording sessions.
+ Scalp EEG potentials were measured using 16 Ag/AgCl electrodes that
+ covered the left, right and central scalp (Fz, FCz, Cz, CPz, Pz, Oz, F3,
+ F4, C3, C4, CP3, CP4, P3, P4, PO7, PO8) per the 10-10 standard. Each
+ electrode was referenced to the linked earlobes and grounded to the
+ right mastoid. The EEG was acquired at 256 Hz, high pass- and low
+ pass-filtered with cutoff frequencies of 0.1 Hz and 20 Hz, respectively.
+ Each subject attended 4 recording sessions. During each session,
+ the subject performed three runs with each of the stimulation interfaces.
+
+ References
+ ----------
+
+ .. [1] P Aricò, F Aloise, F Schettini, S Salinari, D Mattia and F Cincotti
+ (2013). Influence of P300 latency jitter on event related potential-
+ based brain–computer interface performance. Journal of Neural
+ Engineering, vol. 11, number 3.
+
+ """
+
+ def __init__(self):
+ super().__init__(
+ subjects=list(range(1, 11)),
+ sessions_per_subject=1,
+ events={'Target': 2, 'NonTarget': 1},
+ code='009-2014',
+ interval=[0, 0.8],
+ paradigm='p300',
+ doi='10.1088/1741-2560/11/3/035008')
+
+
class BNCI2015001(MNEBNCI):
"""BNCI 2015-001 Motor Imagery dataset.
@@ -866,6 +988,40 @@ def __init__(self):
doi='10.1109/tnsre.2012.2189584')
+class BNCI2015003(MNEBNCI):
+ """BNCI 2015-003 P300 dataset.
+
+ Dataset from [1]_.
+
+ **Dataset description**
+
+ This dataset contains recordings from 10 subjects performing a visual P300
+ task for spelling. Results were published in [1]. Sampling frequency was
+ 256 Hz and there were 8 electrodes ('Fz', 'Cz', 'P3', 'Pz', 'P4', 'PO7',
+ 'Oz', 'PO8') which were referenced to the right earlobe. Each subject
+ participated in only one session. For more information, see [1].
+
+ References
+ ----------
+
+ .. [1] C. Guger, S. Daban, E. Sellers, C. Holzner, G. Krausz,
+ R. Carabalona, F. Gramatica, and G. Edlinger (2009). How many
+ people are able to control a P300-based brain-computer interface
+ (BCI)?. Neuroscience Letters, vol. 462, pp. 94–98.
+
+ """
+
+ def __init__(self):
+ super().__init__(
+ subjects=list(range(1, 11)),
+ sessions_per_subject=1,
+ events={'Target': 2, 'NonTarget': 1},
+ code='003-2015',
+ interval=[0, 0.8],
+ paradigm='p300',
+ doi='10.1016/j.neulet.2009.06.045')
+
+
class BNCI2015004(MNEBNCI):
"""BNCI 2015-004 Motor Imagery dataset.
diff --git a/moabb/datasets/braininvaders.py b/moabb/datasets/braininvaders.py
new file mode 100644
index 000000000..8db112bfb
--- /dev/null
+++ b/moabb/datasets/braininvaders.py
@@ -0,0 +1,193 @@
+import mne
+from moabb.datasets.base import BaseDataset
+from moabb.datasets import download as dl
+import os
+import glob
+import zipfile
+import yaml
+
+BI2013a_URL = 'https://zenodo.org/record/1494240/files/'
+
+
+class bi2013a(BaseDataset):
+ '''P300 dataset bi2013a from a "Brain Invaders" experiment (2013)
+ carried-out at University of Grenoble Alpes.
+
+ Dataset following the setup from [1]_.
+
+ **Dataset Description**
+
+ This dataset concerns an experiment carried out at GIPSA-lab
+ (University of Grenoble Alpes, CNRS, Grenoble-INP) in 2013.
+ Principal Investigators: Erwan Vaineau, Dr. Alexandre Barachant
+ Scientific Supervisor : Dr. Marco Congedo
+ Technical Supervisor : Anton Andreev
+
+ The experiment uses the Brain Invaders P300-based Brain-Computer Interface
+ [7], which uses the Open-ViBE platform for on-line EEG data acquisition and
+ processing [1, 9]. For classification purposes the Brain Invaders
+ implements on-line Riemannian MDM classifiers [2, 3, 4, 6]. This experiment
+ features both a training-test (classical) mode of operation and a
+ calibration-less mode of operation [4, 5, 6].
+
+ The recordings concerned 24 subjects in total. Subjects 1 to 7 participated
+ to eight sessions, run in different days, subject 8 to 24 participated to
+ one session. Each session consisted in two runs, one in a Non-Adaptive
+ (classical) and one in an Adaptive (calibration-less) mode of operation.
+ The order of the runs was randomized for each session. In both runs there
+ was a Training (calibration) phase and an Online phase, always passed in
+ this order. In the non-Adaptive run the data from the Training phase was
+ used for classifying the trials on the Online phase using the training-test
+ version of the MDM algorithm [3, 4]. In the Adaptive run, the data from the
+ training phase was not used at all, instead the classifier was initialized
+ with generic class geometric means and continuously adapted to the incoming
+ data using the Riemannian method explained in [4]. Subjects were completely
+ blind to the mode of operation and the two runs appeared to them identical.
+
+ In the Brain Invaders P300 paradigm, a repetition is composed of 12
+ flashes, of which 2 include the Target symbol (Target flashes) and 10 do
+ not (non-Target flash). Please see [7] for a description of the paradigm.
+ For this experiment, in the Training phases the number of flashes is fixed
+ (80 Target flashes and 400 non-Target flashes). In the Online phases the
+ number of Target and non-Target still are in a ratio 1/5, however their
+ number is variable because the Brain Invaders works with a fixed number of
+ game levels, however the number of repetitions needed to destroy the target
+ (hence to proceed to the next level) depends on the user’s performance
+ [4, 5]. In any case, since the classes are unbalanced, an appropriate score
+ must be used for quantifying the performance of classification methods
+ (e.g., balanced accuracy, AUC methods, etc).
+
+ Data were acquired with a Nexus (TMSi, The Netherlands) EEG amplifier:
+ - Sampling Frequency: 512 samples per second
+ - Digital Filter: no
+ - Electrodes: 16 wet Silver/Silver Chloride electrodes positioned at
+ FP1, FP2, F5, AFz, F6, T7, Cz, T8, P7, P3, Pz, P4, P8, O1, Oz, O2
+ according to the 10/20 international system.
+ - Reference: left ear-lobe.
+ - Ground: N/A.
+
+ References
+ ----------
+
+ .. [1] Arrouët C, Congedo M, Marvie J-E, Lamarche F, Lècuyer A, Arnaldi B
+ (2005) Open-ViBE: a 3D Platform for Real-Time Neuroscience.
+ Journal of Neurotherapy, 9(1), 3-25.
+ .. [2] Barachant A, Bonnet S, Congedo M, Jutten C (2013) Classification of
+ covariance matrices using a Riemannian-based kernel for BCI
+ applications. Neurocomputing 112, 172-178.
+ .. [3] Barachant A, Bonnet S, Congedo M, Jutten C (2012) Multi-Class Brain
+ Computer Interface, Classification by Riemannian Geometry.
+ IEEE Transactions on Biomedical Engineering 59(4), 920-928
+ .. [4] Barachant A, Congedo M (2014) A Plug & Play P300 BCI using
+ Information Geometry.
+ arXiv:1409.0107.
+ .. [5] Congedo M, Barachant A, Andreev A (2013) A New Generation of
+ Brain-Computer Interface Based on Riemannian Geometry.
+ arXiv:1310.8115.
+ .. [6] Congedo M, Barachant A, Bhatia R (2017) Riemannian Geometry for
+ EEG-based Brain-Computer Interfaces; a Primer and a Review.
+ Brain-Computer Interfaces, 4(3), 155-174.
+ .. [7] Congedo M, Goyat M, Tarrin N, Ionescu G, Rivet B,Varnet L, Rivet B,
+ Phlypo R, Jrad N, Acquadro M, Jutten C (2011) “Brain Invaders”: a
+ prototype of an open-source P300-based video game working with the
+ OpenViBE platform. Proc. IBCI Conf., Graz, Austria, 280-283.
+ .. [8] Congedo M, Korczowski L, Delorme A, Lopes da Silva F. (2016)
+ Spatio-temporal common pattern: A companion method for ERP analysis
+ in the time domain. Journal of Neuroscience Methods, 267, 74-88.
+ .. [9] Renard Y, Lotte F, Gibert G, Congedo M, Maby E, Delannoy V, Bertrand
+ O, Lécuyer A (2010) OpenViBE: An Open-Source Software Platform to
+ Design, Test and Use Brain-Computer Interfaces in Real and Virtual
+ Environments. PRESENCE : Teleoperators and Virtual Environments
+ 19(1), 35-53.
+ '''
+
+ def __init__(
+ self,
+ NonAdaptive=True,
+ Adaptive=False,
+ Training=True,
+ Online=False):
+ super().__init__(
+ subjects=list(range(1, 24 + 1)),
+ sessions_per_subject='varying',
+ events=dict(Target=33285, NonTarget=33286),
+ code='Brain Invaders 2013a',
+ interval=[0, 1],
+ paradigm='p300',
+ doi='')
+
+ self.adaptive = Adaptive
+ self.nonadaptive = NonAdaptive
+ self.training = Training
+ self.online = Online
+
+ def _get_single_subject_data(self, subject):
+ """return data for a single subject"""
+
+ file_path_list = self.data_path(subject)
+ sessions = {}
+ for file_path in file_path_list:
+
+ session_number = file_path.split(os.sep)[-2].strip('Session')
+ session_name = 'session_' + session_number
+ if session_name not in sessions.keys():
+ sessions[session_name] = {}
+
+ run_number = file_path.split(os.sep)[-1]
+ run_number = run_number.split('_')[-1]
+ run_number = run_number.split('.gdf')[0]
+ run_name = 'run_' + run_number
+
+ raw_original = mne.io.read_raw_edf(file_path,
+ montage='standard_1020',
+ preload=True)
+
+ sessions[session_name][run_name] = raw_original
+
+ return sessions
+
+ def data_path(self, subject, path=None, force_update=False,
+ update_path=None, verbose=None):
+
+ if subject not in self.subject_list:
+ raise(ValueError("Invalid subject number"))
+
+ # check if has the .zip
+ url = '{:s}subject{:d}.zip'.format(BI2013a_URL, subject)
+ path_zip = dl.data_path(url, 'BRAININVADERS')
+ path_folder = path_zip.strip('subject{:d}.zip'.format(subject))
+
+ # check if has to unzip
+ if not(os.path.isdir(path_folder + 'subject{:d}'.format(subject))):
+ print('unzip', path_zip)
+ zip_ref = zipfile.ZipFile(path_zip, "r")
+ zip_ref.extractall(path_folder)
+
+ # filter the data regarding the experimental conditions
+ meta_file = os.path.join('subject{:d}'.format(subject), 'meta.yml')
+ meta_path = path_folder + meta_file
+ with open(meta_path, 'r') as stream:
+ meta = yaml.load(stream)
+ conditions = []
+ if self.adaptive:
+ conditions = conditions + ['adaptive']
+ if self.nonadaptive:
+ conditions = conditions + ['nonadaptive']
+ types = []
+ if self.training:
+ types = types + ['training']
+ if self.online:
+ types = types + ['online']
+ filenames = []
+ for run in meta['runs']:
+ run_condition = run['experimental_condition']
+ run_type = run['type']
+ if (run_condition in conditions) and (run_type in types):
+ filenames = filenames + [run['filename']]
+
+ # list the filepaths for this subject
+ subject_paths = []
+ for filename in filenames:
+ subject_paths = subject_paths + \
+ glob.glob(os.path.join(path_folder, 'subject{:d}'.format(subject), 'Session*', filename)) # noqa
+ return subject_paths
diff --git a/moabb/datasets/epfl.py b/moabb/datasets/epfl.py
new file mode 100644
index 000000000..1d2aff0bd
--- /dev/null
+++ b/moabb/datasets/epfl.py
@@ -0,0 +1,207 @@
+import mne
+import os
+import glob
+import numpy as np
+import datetime as dt
+from moabb.datasets.base import BaseDataset
+from moabb.datasets import download as dl
+from scipy.io import loadmat
+import zipfile
+
+EPFLP300_URL = 'http://documents.epfl.ch/groups/m/mm/mmspg/www/BCI/p300/'
+
+
+class EPFLP300(BaseDataset):
+ """P300 dataset from Hoffmann et al 2008.
+
+ Dataset from the paper [1]_.
+
+ **Dataset Description**
+
+ In the present work a six-choice P300 paradigm is tested using a population
+ of five disabled and four able-bodied subjects. Six different images were
+ flashed in random order with a stimulus interval of 400 ms. Users were
+ facing a laptop screen on which six im- ages were displayed. The images
+ showed a television, a telephone, a lamp, a door, a window, and a radio.
+
+ The images were flashed in random sequences, one image at a time. Each
+ flash of an image lasted for 100 ms and during the following 300 ms none of
+ the images was flashed, i.e. the interstimulus interval was 400 ms. The EEG
+ was recorded at 2048 Hz sampling rate from 32 electrodes placed at the
+ standard positions of the 10-20 international system. The system was tested
+ with five disabled and four healthy subjects. The disabled subjects were
+ all wheelchair-bound but had varying communication and limb muscle control
+ abilities (Subjects 1 to 5). In particular, Subject 5 was only able
+ to perform extremely slow and relatively uncontrolled movements with hands
+ and arms. Due to a severe hypophony and large fluctuations in the level of
+ alertness, communication with subject 5 was very difficult, which is why
+ its data is not available in this dataset. Subjects 6 to 9 were PhD
+ students recruited from our laboratory (all male, age 30 ± 2.3).
+
+ Each subject completed four recording sessions. The first two sessions were
+ performed on one day and the last two sessions on another day. For all
+ subjects the time between the first and the last session was less than two
+ weeks. Each of the sessions consisted of six runs, one run for each of the
+ six images. The duration of one run was approximately one minute and the
+ duration of one session including setup of electrodes and short breaks
+ between runs was approximately 30 minutes. One session comprised on average
+ 810 trials, and the whole data for one subject consisted on average of 3240
+ trials.
+
+ References
+ ----------
+
+ .. [1] Hoffmann, U., Vesin, J-M., Ebrahimi, T., Diserens, K., 2008.
+ An efficient P300-based brain-computer interfacefor disabled
+ subjects. Journal of Neuroscience Methods .
+ https://doi.org/10.1016/j.jneumeth.2007.03.005
+ """
+
+ def __init__(self):
+ super().__init__(
+ subjects=[1, 2, 3, 4, 6, 7, 8, 9],
+ sessions_per_subject=4,
+ events=dict(Target=2, NonTarget=1),
+ code='EPFL P300 dataset',
+ interval=[0, 1],
+ paradigm='p300',
+ doi='10.1016/j.jneumeth.2007.03.005')
+
+ def _get_single_run_data(self, file_path):
+
+ # data from the .mat
+ data = loadmat(file_path)
+ signals = data['data']
+ stimuli = data['stimuli'].squeeze()
+ events = data['events']
+ target = data['target'][0][0]
+
+ # meta-info from the readme.pdf
+ sfreq = 2048
+ ch_names = [
+ 'Fp1',
+ 'AF3',
+ 'F7',
+ 'F3',
+ 'FC1',
+ 'FC5',
+ 'T7',
+ 'C3',
+ 'CP1',
+ 'CP5',
+ 'P7',
+ 'P3',
+ 'Pz',
+ 'PO3',
+ 'O1',
+ 'Oz',
+ 'O2',
+ 'PO4',
+ 'P4',
+ 'P8',
+ 'CP6',
+ 'CP2',
+ 'C4',
+ 'T8',
+ 'FC6',
+ 'FC2',
+ 'F4',
+ 'F8',
+ 'AF4',
+ 'Fp2',
+ 'Fz',
+ 'Cz',
+ 'MA1',
+ 'MA2']
+ ch_types = ['eeg'] * 32 + ['misc'] * 2
+
+ # we have to re-reference the signals
+ # the average signal on the mastoids electrodes is used as reference
+ references = [32, 33]
+ ref = np.mean(signals[references, :], axis=0)
+ signals = signals - ref
+
+ # getting the event time in a Python standardized way
+ events_datetime = []
+ for eventi in events:
+ events_datetime.append(dt.datetime(
+ *eventi.astype(int), int(eventi[-1] * 1e3) % 1000 * 1000))
+
+ # get the indices of the stimuli
+ pos = []
+ n_trials = len(stimuli)
+ for j in range(n_trials):
+ delta_seconds = (
+ events_datetime[j] -
+ events_datetime[0]).total_seconds()
+ delta_indices = int(delta_seconds * sfreq)
+ # has to add an offset
+ pos.append(delta_indices + int(0.4 * sfreq))
+
+ # create a stimulus channel
+ stim_aux = np.copy(stimuli)
+ stim_aux[stimuli == target] = 2
+ stim_aux[stimuli != target] = 1
+ stim_channel = np.zeros(signals.shape[1])
+ stim_channel[pos] = stim_aux
+ ch_names = ch_names + ['STI']
+ ch_types = ch_types + ['stim']
+ signals = np.concatenate([signals, stim_channel[None, :]])
+
+ # create info dictionary
+ info = mne.create_info(ch_names, sfreq, ch_types, montage='biosemi32')
+ info['description'] = 'EPFL P300 dataset'
+
+ # create the Raw structure
+ raw = mne.io.RawArray(signals, info, verbose=False)
+
+ return raw
+
+ def _get_single_subject_data(self, subject):
+ """return data for a single subject"""
+
+ file_path_list = self.data_path(subject)
+ sessions = {}
+
+ for file_path in sorted(file_path_list):
+
+ session_name = 'session_' + \
+ file_path.split(os.sep)[-2].strip('session')
+
+ if session_name not in sessions.keys():
+ sessions[session_name] = {}
+
+ run_name = 'run_' + str(len(sessions[session_name]) + 1)
+ sessions[session_name][run_name] = self._get_single_run_data(
+ file_path)
+
+ return sessions
+
+ def data_path(
+ self,
+ subject,
+ path=None,
+ force_update=False,
+ update_path=None,
+ verbose=None):
+
+ if subject not in self.subject_list:
+ raise(ValueError("Invalid subject number"))
+
+ # check if has the .zip
+ url = '{:s}subject{:d}.zip'.format(EPFLP300_URL, subject)
+ path_zip = dl.data_path(url, 'EPFLP300')
+ path_folder = path_zip.strip('subject{:d}.zip'.format(subject))
+
+ # check if has to unzip
+ if not(os.path.isdir(path_folder + 'subject{:d}'.format(subject))):
+ print('unzip', path_zip)
+ zip_ref = zipfile.ZipFile(path_zip, "r")
+ zip_ref.extractall(path_folder)
+
+ # get the path to all files
+ pattern = os.path.join('subject{:d}'.format(subject), '*', '*')
+ subject_paths = glob.glob(
+ path_folder + pattern)
+
+ return subject_paths
diff --git a/moabb/datasets/fake.py b/moabb/datasets/fake.py
index e6919149c..5d59907d1 100644
--- a/moabb/datasets/fake.py
+++ b/moabb/datasets/fake.py
@@ -34,11 +34,13 @@ def _generate_raw(self):
montage = read_montage('standard_1005')
sfreq = 128
- duration = len(self.event_id)*60
+ duration = len(self.event_id) * 60
eeg_data = 2e-5 * np.random.randn(duration * sfreq, len(ch_names))
y = np.zeros((duration * sfreq))
for ii, ev in enumerate(self.event_id):
- y[((1 + 5*ii)*128)::(5*len(self.event_id)*128)] = self.event_id[ev]
+ start_idx = ((1 + 5 * ii) * 128)
+ jump = (5 * len(self.event_id) * 128)
+ y[start_idx::jump] = self.event_id[ev]
ch_types = ['eeg'] * len(ch_names) + ['stim']
ch_names = ch_names + ['stim']
diff --git a/moabb/datasets/schirrmeister2017.py b/moabb/datasets/schirrmeister2017.py
new file mode 100644
index 000000000..451ca3f56
--- /dev/null
+++ b/moabb/datasets/schirrmeister2017.py
@@ -0,0 +1,251 @@
+import logging
+# import requests
+import numpy as np
+import re
+from moabb.datasets.base import BaseDataset
+from moabb.datasets import download as dl
+import h5py
+import mne
+
+
+log = logging.getLogger(__name__)
+
+GIN_URL = "https://web.gin.g-node.org/robintibor/high-gamma-dataset/raw/master/data" # noqa
+
+
+class Schirrmeister2017(BaseDataset):
+ """High-gamma dataset discribed in Schirrmeister et al. 2017
+
+Our “High-Gamma Dataset” is a 128-electrode dataset (of which we later only use
+44 sensors covering the motor cortex, (see Section 2.7.1), obtained from 14
+healthy subjects (6 female, 2 left-handed, age 27.2 ± 3.6 (mean ± std)) with
+roughly 1000 (963.1 ± 150.9, mean ± std) four-second trials of executed
+movements divided into 13 runs per subject. The four classes of movements were
+movements of either the left hand, the right hand, both feet, and rest (no
+movement, but same type of visual cue as for the other classes). The training
+set consists of the approx. 880 trials of all runs except the last two runs,
+the test set of the approx. 160 trials of the last 2 runs. This dataset was
+acquired in an EEG lab optimized for non-invasive detection of high- frequency
+movement-related EEG components (Ball et al., 2008; Darvas et al., 2010).
+
+ Depending on the direction of a gray arrow that was shown on black back-
+ground, the subjects had to repetitively clench their toes (downward arrow),
+perform sequential finger-tapping of their left (leftward arrow) or right
+(rightward arrow) hand, or relax (upward arrow). The movements were selected
+to require little proximal muscular activity while still being complex enough
+to keep subjects in- volved. Within the 4-s trials, the subjects performed the
+repetitive movements at their own pace, which had to be maintained as long as
+the arrow was showing. Per run, 80 arrows were displayed for 4 s each, with 3
+to 4 s of continuous random inter-trial interval. The order of presentation
+was pseudo-randomized, with all four arrows being shown every four trials.
+Ideally 13 runs were performed to collect 260 trials of each movement and rest.
+The stimuli were presented and the data recorded with BCI2000 (Schalk et al.,
+2004). The experiment was approved by the ethical committee of the University
+of Freiburg.
+
+References
+----------
+
+.. [1] Schirrmeister, Robin Tibor, et al. "Deep learning with convolutional
+neural networks for EEG decoding and visualization." Human brain mapping 38.11
+(2017): 5391-5420.
+
+ """
+
+ def __init__(self):
+ super().__init__(
+ subjects=list(range(1, 15)),
+ sessions_per_subject=1,
+ events=dict(right_hand=1, left_hand=2, rest=3, feet=4),
+ code='Schirrmeister2017',
+ interval=[0, 4],
+ paradigm='imagery',
+ doi='10.1002/hbm.23730')
+
+ def data_path(self, subject, path=None, force_update=False,
+ update_path=None, verbose=None):
+ if subject not in self.subject_list:
+ raise(ValueError('Invalid subject number'))
+
+ def _url(prefix):
+ return '/'.join([GIN_URL, prefix, '{:d}.mat'.format(subject)])
+
+ return [dl.data_path(_url(t), 'SCHIRRMEISTER2017', path, force_update,
+ update_path, verbose) for t in ['train', 'test']]
+
+ def _get_single_subject_data(self, subject):
+ train, test = [BBCIDataset(path) for path in self.data_path(subject)]
+ sessions = {}
+ sessions['session_1'] = {'train': train.load(), 'test': test.load()}
+ return sessions
+
+
+class BBCIDataset(object):
+ """
+ Loader class for files created by saving BBCI files in matlab (make
+ sure to save with '-v7.3' in matlab, see
+ https://de.mathworks.com/help/matlab/import_export/mat-file-versions.html#buk6i87
+ )
+ Parameters
+ ----------
+ filename: str
+ load_sensor_names: list of str, optional
+ Also speeds up loading if you only load some sensors.
+ None means load all sensors.
+
+ Copyright Robin Schirrmeister, 2017
+ Altered by Vinay Jayaram, 2018
+ """
+
+ def __init__(self, filename, load_sensor_names=None):
+ self.__dict__.update(locals())
+ del self.self
+
+ def load(self):
+ cnt = self._load_continuous_signal()
+ cnt = self._add_markers(cnt)
+ return cnt
+
+ def _load_continuous_signal(self):
+ wanted_chan_inds, wanted_sensor_names = self._determine_sensors()
+ fs = self._determine_samplingrate()
+ with h5py.File(self.filename, 'r') as h5file:
+ samples = int(h5file['nfo']['T'][0, 0])
+ cnt_signal_shape = (samples, len(wanted_chan_inds))
+ continuous_signal = np.ones(cnt_signal_shape,
+ dtype=np.float32) * np.nan
+ for chan_ind_arr, chan_ind_set in enumerate(wanted_chan_inds):
+ # + 1 because matlab/this hdf5-naming logic
+ # has 1-based indexing
+ # i.e ch1,ch2,....
+ chan_set_name = 'ch' + str(chan_ind_set + 1)
+ # first 0 to unpack into vector, before it is 1xN matrix
+ chan_signal = h5file[chan_set_name][
+ :].squeeze() # already load into memory
+ continuous_signal[:, chan_ind_arr] = chan_signal
+ assert not np.any(
+ np.isnan(continuous_signal)), "No NaNs expected in signal"
+
+ # Assume we cant know channel type here automatically
+ ch_types = ['eeg'] * len(wanted_chan_inds)
+ info = mne.create_info(ch_names=wanted_sensor_names, sfreq=fs,
+ ch_types=ch_types)
+ # Scale to volts from microvolts, (VJ 19.6.18)
+ continuous_signal = continuous_signal * 1e-6
+ cnt = mne.io.RawArray(continuous_signal.T, info)
+ return cnt
+
+ def _determine_sensors(self):
+ all_sensor_names = self.get_all_sensors(self.filename, pattern=None)
+ if self.load_sensor_names is None:
+
+ # if no sensor names given, take all EEG-chans
+ eeg_sensor_names = all_sensor_names
+ eeg_sensor_names = filter(lambda s: not s.startswith('BIP'),
+ eeg_sensor_names)
+ eeg_sensor_names = filter(lambda s: not s.startswith('E'),
+ eeg_sensor_names)
+ eeg_sensor_names = filter(lambda s: not s.startswith('Microphone'),
+ eeg_sensor_names)
+ eeg_sensor_names = filter(lambda s: not s.startswith('Breath'),
+ eeg_sensor_names)
+ eeg_sensor_names = filter(lambda s: not s.startswith('GSR'),
+ eeg_sensor_names)
+ eeg_sensor_names = list(eeg_sensor_names)
+ assert (len(eeg_sensor_names) in set(
+ [128, 64, 32, 16])), "check this code if you have different sensors..." # noqa
+ self.load_sensor_names = eeg_sensor_names
+ chan_inds = self._determine_chan_inds(all_sensor_names,
+ self.load_sensor_names)
+ return chan_inds, self.load_sensor_names
+
+ def _determine_samplingrate(self):
+ with h5py.File(self.filename, 'r') as h5file:
+ fs = h5file['nfo']['fs'][0, 0]
+ assert isinstance(fs, int) or fs.is_integer()
+ fs = int(fs)
+ return fs
+
+ @staticmethod
+ def _determine_chan_inds(all_sensor_names, sensor_names):
+ assert sensor_names is not None
+ chan_inds = [all_sensor_names.index(s) for s in sensor_names]
+ assert len(chan_inds) == len(sensor_names), ("All"
+ "sensors"
+ "should be there.")
+ # TODO: is it possible for this to fail? the list
+ # comp fails first right?
+ assert len(set(chan_inds)) == len(chan_inds), ("No"
+ "duplicated sensors"
+ "wanted.")
+ return chan_inds
+
+ @staticmethod
+ def get_all_sensors(filename, pattern=None):
+ """
+ Get all sensors that exist in the given file.
+
+ Parameters
+ ----------
+ filename: str
+ pattern: str, optional
+ Only return those sensor names that match the given pattern.
+ Returns
+ -------
+ sensor_names: list of str
+ Sensor names that match the pattern or all
+ sensor names in the file.
+ """
+ with h5py.File(filename, 'r') as h5file:
+ clab_set = h5file['nfo']['clab'][:].squeeze()
+ all_sensor_names = [''.join(chr(c) for c in h5file[obj_ref]) for
+ obj_ref in clab_set]
+ if pattern is not None:
+ all_sensor_names = filter(
+ lambda sname: re.search(pattern, sname),
+ all_sensor_names)
+ return all_sensor_names
+
+ def _add_markers(self, cnt):
+ with h5py.File(self.filename, 'r') as h5file:
+ event_times_in_ms = h5file['mrk']['time'][:].squeeze()
+ event_classes = h5file['mrk']['event']['desc'][:].squeeze().astype(
+ np.int64)
+
+ # Check whether class names known and correct order
+ # class_name_set = h5file['nfo']['className'][:].squeeze()
+ # all_class_names = [''.join(chr(c) for c in h5file[obj_ref])
+ # for obj_ref in class_name_set]
+
+ event_times_in_samples = event_times_in_ms * cnt.info['sfreq'] / 1000.0
+ event_times_in_samples = np.uint32(np.round(event_times_in_samples))
+
+ # Check if there are markers at the same time
+ previous_i_sample = -1
+ for i_event, (i_sample, id_class) in enumerate(
+ zip(event_times_in_samples, event_classes)):
+ if i_sample == previous_i_sample:
+ info = "{:d}: ({:.0f} and {:.0f}).\n".format(i_sample,
+ event_classes[
+ i_event - 1],
+ event_classes[
+ i_event])
+ log.warning("Same sample has at least two markers.\n"
+ + info +
+ "Marker codes will be summed.")
+ previous_i_sample = i_sample
+
+ # Now create stim chan
+ stim_chan = np.zeros_like(cnt.get_data()[0])
+ for i_sample, id_class in zip(event_times_in_samples, event_classes):
+ stim_chan[i_sample] += id_class
+ info = mne.create_info(ch_names=['STI 014'],
+ sfreq=cnt.info['sfreq'],
+ ch_types=['stim'])
+ stim_cnt = mne.io.RawArray(stim_chan[None], info, verbose='WARNING')
+ cnt = cnt.add_channels([stim_cnt])
+ event_arr = [event_times_in_samples,
+ [0] * len(event_times_in_samples),
+ event_classes]
+ cnt.info['events'] = np.array(event_arr).T
+ return cnt
diff --git a/moabb/datasets/utils.py b/moabb/datasets/utils.py
index 457cdc5de..a46acd7c1 100644
--- a/moabb/datasets/utils.py
+++ b/moabb/datasets/utils.py
@@ -22,7 +22,7 @@ def dataset_search(paradigm, multi_session=False, events=None,
Parameters
----------
paradigm: str
- 'imagery','p300','ssvep'
+ 'imagery', 'p300', 'ssvep'
multi_session: bool
if True only returns datasets with more than one session per subject.
@@ -55,8 +55,6 @@ def dataset_search(paradigm, multi_session=False, events=None,
if events is not None and has_all_events:
n_classes = len(events)
assert paradigm in ['imagery', 'p300', 'ssvep']
- if paradigm == 'p300':
- raise Exception('SORRY NOBDOYS GOTTEN AROUND TO THIS YET')
for type_d in dataset_list:
d = type_d()
diff --git a/moabb/paradigms/__init__.py b/moabb/paradigms/__init__.py
index c66ab22d0..a4d8b8632 100644
--- a/moabb/paradigms/__init__.py
+++ b/moabb/paradigms/__init__.py
@@ -6,5 +6,6 @@
paradigms.
"""
# flake8: noqa
+from moabb.paradigms.p300 import *
from moabb.paradigms.motor_imagery import *
from moabb.paradigms.ssvep import *
diff --git a/moabb/paradigms/motor_imagery.py b/moabb/paradigms/motor_imagery.py
index bf25e8681..95cd209a6 100644
--- a/moabb/paradigms/motor_imagery.py
+++ b/moabb/paradigms/motor_imagery.py
@@ -373,4 +373,4 @@ class FakeImageryParadigm(LeftRightImagery):
@property
def datasets(self):
- return [FakeDataset(['left_hand', 'right_hand'])]
+ return [FakeDataset(['left_hand', 'right_hand'], paradigm='imagery')]
diff --git a/moabb/paradigms/p300.py b/moabb/paradigms/p300.py
new file mode 100644
index 000000000..264224b90
--- /dev/null
+++ b/moabb/paradigms/p300.py
@@ -0,0 +1,221 @@
+"""P300 Paradigms"""
+
+import abc
+import mne
+import numpy as np
+import pandas as pd
+import logging
+
+from moabb.paradigms.base import BaseParadigm
+from moabb.datasets import utils
+from moabb.datasets.fake import FakeDataset
+
+log = logging.getLogger()
+
+
+class BaseP300(BaseParadigm):
+ """Base P300 paradigm.
+
+ Please use one of the child classes
+
+ Parameters
+ ----------
+
+ filters: list of list (defaults [[7, 35]])
+ bank of bandpass filter to apply.
+
+ events: List of str | None (default None)
+ event to use for epoching. If None, default to all events defined in
+ the dataset.
+
+ tmin: float (default 0.0)
+ Start time (in second) of the epoch, relative to the dataset specific
+ task interval e.g. tmin = 1 would mean the epoch will start 1 second
+ after the begining of the task as defined by the dataset.
+
+ tmax: float | None, (default None)
+ End time (in second) of the epoch, relative to the begining of the
+ dataset specific task interval. tmax = 5 would mean the epoch will end
+ 5 second after the begining of the task as defined in the dataset. If
+ None, use the dataset value.
+
+ channels: list of str | None (default None)
+ list of channel to select. If None, use all EEG channels available in
+ the dataset.
+
+ resample: float | None (default None)
+ If not None, resample the eeg data with the sampling rate provided.
+ """
+
+ def __init__(self, filters=([1, 24],), events=None, tmin=0.0, tmax=None,
+ channels=None, resample=None):
+ super().__init__()
+ self.filters = filters
+ self.channels = channels
+ self.events = events
+ self.resample = resample
+
+ if (tmax is not None):
+ if tmin >= tmax:
+ raise(ValueError("tmax must be greater than tmin"))
+
+ self.tmin = tmin
+ self.tmax = tmax
+
+ def is_valid(self, dataset):
+ ret = True
+ if not (dataset.paradigm == 'p300'):
+ ret = False
+
+ # check if dataset has required events
+ if self.events:
+ if not set(self.events) <= set(dataset.event_id.keys()):
+ ret = False
+
+ # we should verify list of channels, somehow
+ return ret
+
+ @abc.abstractmethod
+ def used_events(self, dataset):
+ pass
+
+ def process_raw(self, raw, dataset):
+ # find the events
+ events = mne.find_events(raw, shortest_event=0, verbose=False)
+ channels = () if self.channels is None else self.channels
+
+ # picks channels
+ picks = mne.pick_types(raw.info, eeg=True, stim=False,
+ include=channels)
+
+ # get event id
+ event_id = self.used_events(dataset)
+
+ # pick events, based on event_id
+ try:
+ events = mne.pick_events(events, include=list(event_id.values()))
+ except RuntimeError:
+ # skip raw if no event found
+ return
+
+ # get interval
+ tmin = self.tmin + dataset.interval[0]
+ if self.tmax is None:
+ tmax = dataset.interval[1]
+ else:
+ tmax = self.tmax + dataset.interval[0]
+
+ X = []
+ for bandpass in self.filters:
+ fmin, fmax = bandpass
+ # filter data
+ raw_f = raw.copy().filter(fmin, fmax, method='iir',
+ picks=picks, verbose=False)
+ # epoch data
+ epochs = mne.Epochs(raw_f, events, event_id=event_id,
+ tmin=tmin, tmax=tmax, proj=False,
+ baseline=None, preload=True,
+ verbose=False, picks=picks,
+ on_missing='ignore')
+ if self.resample is not None:
+ epochs = epochs.resample(self.resample)
+ # MNE is in V, rescale to have uV
+ X.append(1e6 * epochs.get_data())
+
+ inv_events = {k: v for v, k in event_id.items()}
+ labels = np.array([inv_events[e] for e in epochs.events[:, -1]])
+
+ # if only one band, return a 3D array, otherwise return a 4D
+ if len(self.filters) == 1:
+ X = X[0]
+ else:
+ X = np.array(X).transpose((1, 2, 3, 0))
+
+ metadata = pd.DataFrame(index=range(len(labels)))
+ return X, labels, metadata
+
+ @property
+ def datasets(self):
+ if self.tmax is None:
+ interval = None
+ else:
+ interval = self.tmax - self.tmin
+ return utils.dataset_search(paradigm='p300',
+ events=self.events,
+ interval=interval,
+ has_all_events=True)
+
+ @property
+ def scoring(self):
+ return 'roc_auc'
+
+
+class SinglePass(BaseP300):
+ """Single Bandpass filter P300
+
+ P300 paradigm with only one bandpass filter (default 1 to 24 Hz)
+
+ Parameters
+ ----------
+ fmin: float (default 1)
+ cutoff frequency (Hz) for the high pass filter
+
+ fmax: float (default 24)
+ cutoff frequency (Hz) for the low pass filter
+
+ events: List of str | None (default None)
+ event to use for epoching. If None, default to all events defined in
+ the dataset.
+
+ tmin: float (default 0.0)
+ Start time (in second) of the epoch, relative to the dataset specific
+ task interval e.g. tmin = 1 would mean the epoch will start 1 second
+ after the begining of the task as defined by the dataset.
+
+ tmax: float | None, (default None)
+ End time (in second) of the epoch, relative to the begining of the
+ dataset specific task interval. tmax = 5 would mean the epoch will end
+ 5 second after the begining of the task as defined in the dataset. If
+ None, use the dataset value.
+
+ channels: list of str | None (default None)
+ list of channel to select. If None, use all EEG channels available in
+ the dataset.
+
+ resample: float | None (default None)
+ If not None, resample the eeg data with the sampling rate provided.
+
+ """
+ def __init__(self, fmin=1, fmax=24, **kwargs):
+ if 'filters' in kwargs.keys():
+ raise(ValueError("P300 does not take argument filters"))
+ super().__init__(filters=[[fmin, fmax]], **kwargs)
+
+
+class P300(SinglePass):
+ """P300 for Target/NonTarget classification
+
+ Metric is 'roc_auc'
+
+ """
+
+ def __init__(self, **kwargs):
+ if 'events' in kwargs.keys():
+ raise(ValueError('P300 dont accept events'))
+ super().__init__(events=['Target', 'NonTarget'], **kwargs)
+
+ def used_events(self, dataset):
+ return {ev: dataset.event_id[ev] for ev in self.events}
+
+ @property
+ def scoring(self):
+ return 'roc_auc'
+
+
+class FakeP300Paradigm(P300):
+ """Fake P300 for Target/NonTarget classification.
+ """
+
+ @property
+ def datasets(self):
+ return [FakeDataset(['Target', 'NonTarget'], paradigm='p300')]
diff --git a/moabb/pipelines/utils.py b/moabb/pipelines/utils.py
index 30260ca2f..8f66b6ec1 100644
--- a/moabb/pipelines/utils.py
+++ b/moabb/pipelines/utils.py
@@ -63,8 +63,10 @@ def __init__(self, estimator, flatten=True):
def fit(self, X, y=None):
assert X.ndim == 4
- self.models = [deepcopy(self.estimator).fit(X[..., i], y)
- for i in range(X.shape[-1])]
+ self.models = [
+ deepcopy(self.estimator).fit(X[..., i], y)
+ for i in range(X.shape[-1])
+ ]
return self
def transform(self, X):
@@ -78,6 +80,7 @@ def transform(self, X):
return np.stack(out, axis=2)
def __repr__(self):
- return '{}(estimator={}, flatten={})'.format(type(self).__name__,
- self.estimator.get_params(), # noqa: E501
- self.flatten)
+ estimator_name = type(self).__name__
+ estimator_prms = self.estimator.get_params()
+ return '{}(estimator={}, flatten={})'.format(
+ estimator_name, estimator_prms, self.flatten)
diff --git a/moabb/run.py b/moabb/run.py
index 35b61ca85..a933a48cc 100755
--- a/moabb/run.py
+++ b/moabb/run.py
@@ -59,7 +59,6 @@ def parse_pipelines_from_directory(d):
spec.loader.exec_module(foo)
pipeline_configs.append(foo.PIPELINE)
-
return pipeline_configs
diff --git a/moabb/tests/analysis.py b/moabb/tests/analysis.py
index 58f4184f4..00ef9ca6a 100644
--- a/moabb/tests/analysis.py
+++ b/moabb/tests/analysis.py
@@ -85,7 +85,7 @@ def to_result_input(pnames, dsets):
class Test_Stats(unittest.TestCase):
def return_df(self, shape):
- size = shape[0]*shape[1]
+ size = shape[0] * shape[1]
data = np.arange(size).reshape(*shape)
return pd.DataFrame(data=data)
@@ -96,7 +96,7 @@ def test_wilcoxon(self):
def test_perm_exhaustive(self):
P = ma.compute_pvals_perm(self.return_df((4, 5)))
Pl = P[np.tril_indices(P.shape[0])]
- self.assertTrue(np.allclose(Pl, (1/2**4)), np.tril(P))
+ self.assertTrue(np.allclose(Pl, (1 / 2 ** 4)), np.tril(P))
def test_perm_random(self):
P = ma.compute_pvals_perm(self.return_df((18, 5)))
diff --git a/moabb/tests/datasets.py b/moabb/tests/datasets.py
index 864ca85c8..aaf0fd8ad 100644
--- a/moabb/tests/datasets.py
+++ b/moabb/tests/datasets.py
@@ -27,25 +27,28 @@ def test_fake_dataset(self):
n_subjects = 3
n_sessions = 2
n_runs = 2
- ds = FakeDataset(n_sessions=n_sessions, n_runs=n_runs,
- n_subjects=n_subjects)
- data = ds.get_data()
- # we should get a dict
- self.assertTrue(isinstance(data, dict))
+ for paradigm in ['imagery', 'p300']:
- # we get the right number of subject
- self.assertEqual(len(data), n_subjects)
+ ds = FakeDataset(n_sessions=n_sessions, n_runs=n_runs,
+ n_subjects=n_subjects, paradigm=paradigm)
+ data = ds.get_data()
- # right number of session
- self.assertEqual(len(data[1]), n_sessions)
+ # we should get a dict
+ self.assertTrue(isinstance(data, dict))
- # right number of run
- self.assertEqual(len(data[1]['session_0']), n_runs)
+ # we get the right number of subject
+ self.assertEqual(len(data), n_subjects)
- # We should get a raw array at the end
- self.assertEqual(type(data[1]['session_0']['run_0']),
- mne.io.RawArray)
+ # right number of session
+ self.assertEqual(len(data[1]), n_sessions)
+
+ # right number of run
+ self.assertEqual(len(data[1]['session_0']), n_runs)
+
+ # We should get a raw array at the end
+ self.assertEqual(type(data[1]['session_0']['run_0']),
+ mne.io.RawArray)
- # bad subject id must raise error
- self.assertRaises(ValueError, ds.get_data, [1000])
+ # bad subject id must raise error
+ self.assertRaises(ValueError, ds.get_data, [1000])
diff --git a/moabb/tests/paradigms.py b/moabb/tests/paradigms.py
index 4ee042a94..a7ccb27c9 100644
--- a/moabb/tests/paradigms.py
+++ b/moabb/tests/paradigms.py
@@ -4,7 +4,8 @@
from moabb.paradigms import (BaseSSVEP, FilterBankSSVEP, SSVEP,
LeftRightImagery, BaseMotorImagery,
FilterBankMotorImagery,
- FilterBankLeftRightImagery)
+ FilterBankLeftRightImagery,
+ BaseP300, P300)
import numpy as np
import logging
@@ -22,7 +23,7 @@ class Test_MotorImagery(unittest.TestCase):
def test_BaseImagery_paradigm(self):
paradigm = SimpleMotorImagery()
- dataset = FakeDataset()
+ dataset = FakeDataset(paradigm='imagery')
X, labels, metadata = paradigm.get_data(dataset, subjects=[1])
# we should have all the same length
@@ -49,16 +50,128 @@ def test_BaseImagery_tmintmax(self):
def test_BaseImagery_filters(self):
# can work with filter bank
paradigm = SimpleMotorImagery(filters=[[7, 12], [12, 24]])
- dataset = FakeDataset()
+ dataset = FakeDataset(paradigm='imagery')
X, labels, metadata = paradigm.get_data(dataset, subjects=[1])
# X must be a 4D Array
self.assertEqual(len(X.shape), 4)
self.assertEqual(X.shape[-1], 2)
+ def test_baseImagery_wrongevent(self):
# test process_raw return empty list if raw does not contain any
# selected event. cetain runs in dataset are event specific.
- dataset = FakeDataset()
+ paradigm = SimpleMotorImagery(filters=[[7, 12], [12, 24]])
+ dataset = FakeDataset(paradigm='imagery')
+ raw = dataset.get_data([1])[1]['session_0']['run_0']
+ # add something on the event channel
+ raw._data[-1] *= 10
+ self.assertIsNone(paradigm.process_raw(raw, dataset))
+ # zeros it out
+ raw._data[-1] *= 0
+ self.assertIsNone(paradigm.process_raw(raw, dataset))
+
+ def test_BaseImagery_noevent(self):
+ # Assert error if events from paradigm and dataset dont overlap
+ paradigm = SimpleMotorImagery(events=['left_hand', 'right_hand'])
+ dataset = FakeDataset(paradigm='imagery')
+ self.assertRaises(AssertionError, paradigm.get_data, dataset)
+
+ def test_LeftRightImagery_paradigm(self):
+ # with a good dataset
+ paradigm = LeftRightImagery()
+ dataset = FakeDataset(event_list=['left_hand', 'right_hand'],
+ paradigm='imagery')
+ X, labels, metadata = paradigm.get_data(dataset, subjects=[1])
+ self.assertEqual(len(np.unique(labels)), 2)
+ self.assertEqual(list(np.unique(labels)), ['left_hand', 'right_hand'])
+
+ def test_LeftRightImagery_noevent(self):
+ # we cant pass event to this class
+ self.assertRaises(ValueError, LeftRightImagery, events=['a'])
+
+ def test_LeftRightImagery_badevents(self):
+ paradigm = LeftRightImagery()
+ # does not accept dataset with bad event
+ dataset = FakeDataset(paradigm='imagery')
+ self.assertRaises(AssertionError, paradigm.get_data, dataset)
+
+ def test_FilterBankMotorImagery_paradigm(self):
+ # can work with filter bank
+ paradigm = FilterBankMotorImagery()
+ dataset = FakeDataset(paradigm='imagery')
+ X, labels, metadata = paradigm.get_data(dataset, subjects=[1])
+
+ # X must be a 4D Array
+ self.assertEqual(len(X.shape), 4)
+ self.assertEqual(X.shape[-1], 6)
+
+ def test_FilterBankMotorImagery_moreclassesthanevent(self):
+ self.assertRaises(AssertionError, FilterBankMotorImagery, n_classes=3,
+ events=['hands', 'feet'])
+
+ def test_FilterBankLeftRightImagery_paradigm(self):
+ # can work with filter bank
+ paradigm = FilterBankLeftRightImagery()
+ dataset = FakeDataset(event_list=['left_hand', 'right_hand'],
+ paradigm='imagery')
+ X, labels, metadata = paradigm.get_data(dataset, subjects=[1])
+
+ # X must be a 4D Array
+ self.assertEqual(len(X.shape), 4)
+ self.assertEqual(X.shape[-1], 6)
+
+
+class SimpleP300(BaseP300): # Needed to assess BaseP300
+ def used_events(self, dataset):
+ return dataset.event_id
+
+
+class Test_P300(unittest.TestCase):
+
+ def test_BaseP300_paradigm(self):
+ paradigm = SimpleP300()
+ dataset = FakeDataset(paradigm='p300')
+ X, labels, metadata = paradigm.get_data(dataset, subjects=[1])
+
+ # we should have all the same length
+ self.assertEqual(len(X), len(labels), len(metadata))
+ # X must be a 3D Array
+ self.assertEqual(len(X.shape), 3)
+ # labels must contain 3 values
+ self.assertEqual(len(np.unique(labels)), 3)
+
+ # metadata must have subjets, sessions, runs
+ self.assertTrue('subject' in metadata.columns)
+ self.assertTrue('session' in metadata.columns)
+ self.assertTrue('run' in metadata.columns)
+
+ # we should have only one subject in the metadata
+ self.assertEqual(np.unique(metadata.subject), 1)
+
+ # we should have two sessions in the metadata
+ self.assertEqual(len(np.unique(metadata.session)), 2)
+
+ def test_BaseImagery_tmintmax(self):
+ self.assertRaises(ValueError, SimpleMotorImagery, tmin=1, tmax=0)
+
+ def test_BaseP300_tmintmax(self):
+ self.assertRaises(ValueError, SimpleP300, tmin=1, tmax=0)
+
+ def test_BaseP300_filters(self):
+ # can work with filter bank
+ paradigm = SimpleP300(filters=[[1, 12], [12, 24]])
+ dataset = FakeDataset(paradigm='p300')
+ X, labels, metadata = paradigm.get_data(dataset, subjects=[1])
+
+ # X must be a 4D Array
+ self.assertEqual(len(X.shape), 4)
+ self.assertEqual(X.shape[-1], 2)
+
+ def test_BaseP300_wrongevent(self):
+ # test process_raw return empty list if raw does not contain any
+ # selected event. cetain runs in dataset are event specific.
+ paradigm = SimpleP300(filters=[[1, 12], [12, 24]])
+ dataset = FakeDataset(paradigm='p300')
raw = dataset.get_data([1])[1]['session_0']['run_0']
# add something on the event channel
raw._data[-1] *= 10
@@ -67,6 +180,26 @@ def test_BaseImagery_filters(self):
raw._data[-1] *= 0
self.assertIsNone(paradigm.process_raw(raw, dataset))
+ def test_P300_specifyevent(self):
+ # we cant pass event to this class
+ self.assertRaises(ValueError, P300, events=['a'])
+
+ def test_P300_wrongevent(self):
+ # does not accept dataset with bad event
+ paradigm = P300()
+ dataset = FakeDataset(paradigm='p300')
+ self.assertRaises(AssertionError, paradigm.get_data, dataset)
+
+ def test_P300_paradigm(self):
+ # with a good dataset
+ paradigm = P300()
+ dataset = FakeDataset(event_list=['Target', 'NonTarget'],
+ paradigm='p300')
+ X, labels, metadata = paradigm.get_data(dataset, subjects=[1])
+ self.assertEquals(len(np.unique(labels)), 2)
+ self.assertEquals(list(np.unique(labels)),
+ sorted(['Target', 'NonTarget']))
+
def test_BaseImagery_noevent(self):
# Assert error if events from paradigm and dataset dont overlap
paradigm = SimpleMotorImagery(events=['left_hand', 'right_hand'])
diff --git a/moabb/tests/test_pipelines/LogVar.yml b/moabb/tests/test_pipelines/LogVar.yml
index 0b6338263..cde7692a9 100644
--- a/moabb/tests/test_pipelines/LogVar.yml
+++ b/moabb/tests/test_pipelines/LogVar.yml
@@ -2,6 +2,7 @@ name: Log Variance LDA
paradigms:
- FakeImageryParadigm
+ - FakeP300Paradigm
pipeline:
- name: LogVariance
diff --git a/pipelines/FBCSP.py b/pipelines/FBCSP.py
new file mode 100644
index 000000000..d971c8e86
--- /dev/null
+++ b/pipelines/FBCSP.py
@@ -0,0 +1,19 @@
+from sklearn.svm import SVC
+from pyriemann.estimation import Covariances
+from pyriemann.spatialfilters import CSP
+from sklearn.model_selection import GridSearchCV
+from sklearn.feature_selection import SelectKBest, mutual_info_classif
+from moabb.pipelines.utils import FilterBank
+from sklearn.pipeline import make_pipeline
+import numpy as np
+
+parameters = {'C': np.logspace(-2, 2, 10)}
+clf = GridSearchCV(SVC(kernel='linear'), parameters)
+fb = FilterBank(make_pipeline(Covariances(estimator='oas'), CSP(nfilter=4)))
+pipe = make_pipeline(fb, SelectKBest(score_func=mutual_info_classif, k=10),
+ clf)
+
+# this is what will be loaded
+PIPELINE = {'name': 'FBCSP + optSVM',
+ 'paradigms': ['FilterBankMotorImagery'],
+ 'pipeline': pipe}
diff --git a/pipelines/LogVar.py b/pipelines/LogVar.py
new file mode 100644
index 000000000..4ab7ae20f
--- /dev/null
+++ b/pipelines/LogVar.py
@@ -0,0 +1,14 @@
+from sklearn.svm import SVC
+from sklearn.model_selection import GridSearchCV
+from moabb.pipelines.features import LogVariance
+from sklearn.pipeline import make_pipeline
+import numpy as np
+
+parameters = {'C': np.logspace(-2, 2, 10)}
+clf = GridSearchCV(SVC(kernel='linear'), parameters)
+pipe = make_pipeline(LogVariance(), clf)
+
+# this is what will be loaded
+PIPELINE = {'name': 'AM + optSVM',
+ 'paradigms': ['MotorImagery'],
+ 'pipeline': pipe}
diff --git a/pipelines/TSSVM.py b/pipelines/TSSVM.py
new file mode 100644
index 000000000..a5f9088b5
--- /dev/null
+++ b/pipelines/TSSVM.py
@@ -0,0 +1,15 @@
+from sklearn.svm import SVC
+from sklearn.model_selection import GridSearchCV
+from pyriemann.estimation import Covariances
+from pyriemann.tangentspace import TangentSpace
+from sklearn.pipeline import make_pipeline
+import numpy as np
+
+parameters = {'C': np.logspace(-2, 2, 10)}
+clf = GridSearchCV(SVC(kernel='linear'), parameters)
+pipe = make_pipeline(Covariances('oas'), TangentSpace(metric='riemann'), clf)
+
+# this is what will be loaded
+PIPELINE = {'name': 'TS + optSVM',
+ 'paradigms': ['MotorImagery'],
+ 'pipeline': pipe}
diff --git a/pipelines/WTRCSP.py b/pipelines/WTRCSP.py
new file mode 100644
index 000000000..1e50b3d9f
--- /dev/null
+++ b/pipelines/WTRCSP.py
@@ -0,0 +1,13 @@
+from pyriemann.estimation import Covariances
+from moabb.pipelines.csp import TRCSP
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from sklearn.pipeline import make_pipeline
+
+
+pipe = make_pipeline(Covariances('scm'), TRCSP(
+ nfilter=6), LinearDiscriminantAnalysis())
+
+# this is what will be loaded
+PIPELINE = {'name': 'TRCSP + LDA',
+ 'paradigms': ['MotorImagery'],
+ 'pipeline': pipe}
diff --git a/pipelines/regCSP+shLDA.yml b/pipelines/regCSP+shLDA.yml
new file mode 100644
index 000000000..e4db6bf08
--- /dev/null
+++ b/pipelines/regCSP+shLDA.yml
@@ -0,0 +1,24 @@
+name: DLCSPauto + shLDA
+paradigms:
+ - MotorImagery
+
+citations:
+ - https://doi.org/10.1007/BF01129656
+ - https://doi.org/10.1109/MSP.2008.4408441
+
+pipeline:
+ - name: Covariances
+ from: pyriemann.estimation
+ parameters:
+ estimator: oas
+
+ - name: CSP
+ from: pyriemann.spatialfilters
+ parameters:
+ nfilter: 6
+
+ - name: LinearDiscriminantAnalysis
+ from: sklearn.discriminant_analysis
+ parameters:
+ solver: lsqr
+ shrinkage: auto
diff --git a/requirements.txt b/requirements.txt
index fb292fbb0..a34518988 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
scikit-learn
-mne
+mne >= 0.16
pyriemann
-matplotlib
-seaborn==0.8.1
+matplotlib >= 2.2
+seaborn >= 0.9.0
h5py
pandas
pyyaml
diff --git a/setup.py b/setup.py
index 5a340410f..6ce1f7674 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
from setuptools import setup, find_packages
setup(name='moabb',
- version='0.1.1',
+ version='0.2.1',
description='Mother of all BCI Benchmarks',
url='',
author='Alexandre Barachant, Vinay Jayaram',
diff --git a/tutorials/plot_statistical_analysis.py b/tutorials/plot_statistical_analysis.py
index 9d796046a..43934f8a1 100644
--- a/tutorials/plot_statistical_analysis.py
+++ b/tutorials/plot_statistical_analysis.py
@@ -14,7 +14,8 @@
import moabb
import matplotlib.pyplot as plt
import moabb.analysis.plotting as moabb_plt
-from moabb.analysis.meta_analysis import find_significant_differences, compute_dataset_statistics # noqa: E501
+from moabb.analysis.meta_analysis import (
+ find_significant_differences, compute_dataset_statistics) # noqa: E501
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.linear_model import LogisticRegression
@@ -55,19 +56,15 @@
pipelines = {}
-pipelines['CSP + LDA'] = make_pipeline(CSP(n_components=8),
- LDA())
+pipelines['CSP + LDA'] = make_pipeline(CSP(n_components=8), LDA())
-pipelines['RG + LR'] = make_pipeline(Covariances(),
- TangentSpace(),
+pipelines['RG + LR'] = make_pipeline(Covariances(), TangentSpace(),
LogisticRegression())
-pipelines['CSP + LR'] = make_pipeline(CSP(n_components=8),
- LogisticRegression())
+pipelines['CSP + LR'] = make_pipeline(
+ CSP(n_components=8), LogisticRegression())
-pipelines['RG + LDA'] = make_pipeline(Covariances(),
- TangentSpace(),
- LDA())
+pipelines['RG + LDA'] = make_pipeline(Covariances(), TangentSpace(), LDA())
##############################################################################
# Evaluation
@@ -82,14 +79,18 @@
# be overwritten if necessary.
paradigm = LeftRightImagery()
-datasets = [BNCI2014001()]
+dataset = BNCI2014001()
+dataset.subject_list = dataset.subject_list[:4]
+datasets = [dataset]
overwrite = False # set to True if we want to overwrite cached results
-evaluation = CrossSessionEvaluation(paradigm=paradigm, datasets=datasets,
- suffix='examples', overwrite=overwrite)
+evaluation = CrossSessionEvaluation(
+ paradigm=paradigm,
+ datasets=datasets,
+ suffix='examples',
+ overwrite=overwrite)
results = evaluation.process(pipelines)
-
##############################################################################
# MOABB plotting
# ----------------
@@ -110,7 +111,6 @@
fig = moabb_plt.paired_plot(results, 'CSP + LDA', 'RG + LDA')
plt.show()
-
###############################################################################
# Statistical testing and further plots
# ----------------------------------------
@@ -121,7 +121,6 @@
# other, the method find_significant_differences and the summary_plot are
# possible.
-
stats = compute_dataset_statistics(results)
P, T = find_significant_differences(stats)
@@ -132,7 +131,6 @@
fig = moabb_plt.meta_analysis_plot(stats, 'CSP + LDA', 'RG + LDA')
plt.show()
-
###############################################################################
# The summary plot shows the effect and significance related to the hypothesis
# that the algorithm on the y-axis significantly out-performed the algorithm on