Skip to content

Commit 99534a9

Browse files
author
Fabian Pedregosa
committed
Initial import.
0 parents  commit 99534a9

19 files changed

+7046
-0
lines changed

.gitignore

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
*.pyc
2+
*.so
3+
*~
4+
.#*
5+
*.swp
6+

README.rst

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
2+
Benchmarks for various machine learning packages
3+
==================================================
4+
5+
Collection of benchmarks comparing various python-based machine
6+
learning packages.
7+
8+
This is meant to work with the development version of the libraries
9+
scikits.learn, mlpy, pybrain, pymvpa, mdp and shogun. It might be hard
10+
to get all packages working on the same machine, but benchmarks are
11+
designed so that if something fail it will just print the exception
12+
and go to the next one.
13+
14+
To execute a benchmark, just type from the prompt::
15+
16+
$ python benchmarks/bench_$name.py
17+
18+
and you will se as output the mean and std deviation for the timing of
19+
running the benchmark 10 times with its extreme values removed.
20+
21+
References
22+
----------
23+
24+
- scikits.learn : http://scikit-learn.sourceforge.net
25+
- MDP : http://mdp-toolkit.sourceforge.net/
26+
- PyMVPA : http://pymvpa.org
27+
- MLPy : https://mlpy.fbk.eu/
28+
- Shogun: http://www.shogun-toolbox.org/
29+
- PyBrain : http://pybrain.org/
30+
31+
32+
Misc
33+
----
34+
35+
Author: Fabian Pedregosa <[email protected]>
36+
License: Simplified BSD

benchmarks/__init__.py

Whitespace-only changes.

benchmarks/bench_elasticnet.py

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""Benchmarks for coordinate-descent implementations of ElasticNet"""
2+
3+
#
4+
# .. Imports ..
5+
#
6+
from datetime import datetime
7+
from scikits.learn import linear_model as skl_lm
8+
from mlpy import ElasticNet as mlpy_enet
9+
from mvpa.datasets import Dataset
10+
from mvpa.clfs import glmnet as mvpa_glmnet
11+
12+
#
13+
# .. Load dataset ..
14+
#
15+
from load import load_data, bench
16+
print 'Loading data ...'
17+
X, y, T = load_data()
18+
print 'Done, %s samples with %s features loaded into ' \
19+
'memory' % X.shape
20+
21+
22+
def bench_skl():
23+
#
24+
# .. scikits.learn ..
25+
#
26+
start = datetime.now()
27+
skl_clf = skl_lm.ElasticNet(rho=0.5)
28+
skl_clf.fit(X, y)
29+
skl_clf.predict(T)
30+
return datetime.now() - start
31+
32+
33+
def bench_mlpy():
34+
#
35+
# .. MLPy ..
36+
#
37+
start = datetime.now()
38+
mlpy_clf = mlpy_enet(tau=.5, mu=.5)
39+
mlpy_clf.learn(X, y)
40+
mlpy_clf.pred(T)
41+
return datetime.now() - start
42+
43+
44+
def bench_pymvpa():
45+
#
46+
# .. PyMVPA ..
47+
#
48+
tstart = datetime.now()
49+
data = Dataset(samples=X, labels=y)
50+
clf = mvpa_glmnet.GLMNET_R(alpha=.5)
51+
clf.train(data)
52+
clf.predict(T)
53+
return datetime.now() - tstart
54+
55+
56+
if __name__ == '__main__':
57+
print __doc__
58+
print 'scikits.learn: ', bench(bench_skl)
59+
print 'MLPy: ', bench(bench_mlpy)
60+
print 'PyMVPA: ', bench(bench_pymvpa)

benchmarks/bench_kmeans.py

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
"""K-means clustering"""
2+
3+
import numpy as np
4+
from datetime import datetime
5+
#from shogun.Distance import EuclidianDistance
6+
#from shogun.Features import RealFeatures
7+
#from shogun.Clustering import KMeans
8+
from scikits.learn import cluster as skl_cluster
9+
from scikits.learn.cluster.k_means_ import _e_step
10+
#from pybrain.auxiliary import kmeans as pybrain_kmeans
11+
#from mlpy import Kmeans as mlpy_Kmeans
12+
from mdp.nodes import KMeansClassifier as mdp_KMeans
13+
14+
15+
#
16+
# .. Load dataset ..
17+
#
18+
from load import load_data, bench
19+
print 'Loading data ...'
20+
X, y, T = load_data()
21+
print 'Done, %s samples with %s features loaded into ' \
22+
'memory' % X.shape
23+
n_components = 9
24+
k = 9
25+
26+
27+
def bench_shogun():
28+
#
29+
# .. Shogun ..
30+
#
31+
start = datetime.now()
32+
feat = RealFeatures(X.T)
33+
distance=EuclidianDistance(feat, feat)
34+
clf=KMeans(k, distance)
35+
clf.train()
36+
return datetime.now() - start
37+
38+
39+
def bench_skl():
40+
#
41+
# .. scikits.learn ..
42+
#
43+
start = datetime.now()
44+
clf = skl_cluster.KMeans(k=k, n_init=1)
45+
clf.fit(X)
46+
return datetime.now() - start
47+
48+
49+
def bench_pybrain():
50+
#
51+
# .. pybrain ..
52+
#
53+
start = datetime.now()
54+
pybrain_kmeans.kmeanspp(X, k)
55+
return datetime.now() - start
56+
57+
58+
def bench_mlpy():
59+
#
60+
# .. MLPy ..
61+
#
62+
start = datetime.now()
63+
clf = mlpy_Kmeans(k)
64+
clf.compute(X)
65+
return datetime.now() - start
66+
67+
68+
def bench_mdp():
69+
#
70+
# .. MDP ..
71+
#
72+
start = datetime.now()
73+
clf = mdp_KMeans(k)
74+
clf.label(X)
75+
return datetime.now() - start
76+
77+
78+
79+
if __name__ == '__main__':
80+
print __doc__
81+
# print 'Shogun: ', bench_shogun()
82+
print 'scikits.learn: ', bench_skl()
83+
# print 'pybrain: ', bench_pybrain()
84+
# print 'MLPy: ', bench_mlpy()
85+
print 'MDP: ', bench_mdp()

benchmarks/bench_knn.py

+107
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
"""Various libraries classifying on k-Nearest Neighbors"""
2+
3+
#
4+
# .. Imports ..
5+
#
6+
import numpy as np
7+
from datetime import datetime
8+
9+
#
10+
# .. Load dataset ..
11+
#
12+
from load import load_data, bench
13+
print 'Loading data ...'
14+
X, y, T = load_data()
15+
print 'Done, %s samples with %s features loaded into ' \
16+
'memory' % X.shape
17+
n_neighbors = 9
18+
19+
20+
def bench_shogun():
21+
#
22+
# .. Shogun ..
23+
#
24+
from shogun import Classifier, Features, Distance
25+
start = datetime.now()
26+
feat = Features.RealFeatures(X.T)
27+
distance = Distance.EuclidianDistance(feat, feat)
28+
labels = Features.Labels(y.astype(np.float64))
29+
test_feat = Features.RealFeatures(T.T)
30+
knn = Classifier.KNN(n_neighbors, distance, labels)
31+
knn.train()
32+
knn.classify(test_feat).get_labels()
33+
return datetime.now() - start
34+
35+
36+
def bench_mdp():
37+
#
38+
# .. MDP ..
39+
#
40+
from mdp.nodes.classifier_nodes import KNNClassifier
41+
start = datetime.now()
42+
knn_mdp = KNNClassifier(k=n_neighbors)
43+
knn_mdp.train(X, y)
44+
knn_mdp.label(T)
45+
return datetime.now() - start
46+
47+
48+
def bench_skl():
49+
#
50+
# .. scikits.learn ..
51+
#
52+
from scikits.learn import neighbors
53+
start = datetime.now()
54+
clf = neighbors.NeighborsClassifier(n_neighbors=n_neighbors)
55+
clf.fit(X, y)
56+
clf.predict(T)
57+
return datetime.now() - start
58+
59+
60+
def bench_mlpy():
61+
#
62+
# .. MLPy ..
63+
#
64+
from mlpy import Knn as mlpy_Knn
65+
start = datetime.now()
66+
mlpy_clf = mlpy_Knn(n_neighbors)
67+
mlpy_clf.compute(X, y)
68+
mlpy_clf.predict(T)
69+
return datetime.now() - start
70+
71+
72+
def bench_pymvpa():
73+
#
74+
# .. PyMVPA ..
75+
#
76+
from mvpa.datasets import dataset_wizard
77+
from mvpa.clfs import knn as mvpa_knn
78+
start = datetime.now()
79+
data = dataset_wizard(X, y)
80+
mvpa_clf = mvpa_knn.kNN(k=n_neighbors)
81+
mvpa_clf.train(data)
82+
mvpa_clf.predict(T)
83+
return datetime.now() - start
84+
85+
86+
if __name__ == '__main__':
87+
88+
# don't bother me with warnings
89+
import warnings; warnings.simplefilter('ignore')
90+
np.seterr(all='ignore')
91+
92+
print __doc__ + '\n'
93+
94+
res_shogun = bench(bench_shogun)
95+
print 'Shogun: mean %s, std %s' % (res_shogun.mean(), res_shogun.std())
96+
97+
res_mdp = bench(bench_mdp)
98+
print 'MDP: mean %s, std %s' % (res_mdp.mean(), res_mdp.std())
99+
100+
res_skl = bench(bench_skl)
101+
print 'scikits.learn: mean %s, std %s' % (res_skl.mean(), res_skl.std())
102+
103+
res_mlpy = bench(bench_mlpy)
104+
print 'MLPy: mean %s, std %s' % (res_mlpy.mean(), res_mlpy.std())
105+
106+
res_pymvpa = bench(bench_pymvpa)
107+
print 'PyMVPA: mean %s, std %s' % (res_pymvpa.mean(), res_pymvpa.std())

benchmarks/bench_lassolars.py

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
"""bench different LARS implementations"""
2+
3+
from datetime import datetime
4+
from scikits.learn import linear_model
5+
from mlpy import Lasso as mlpy_lasso
6+
from mvpa.datasets import Dataset
7+
from mvpa.clfs import lars as mvpa_lars
8+
9+
#
10+
# .. Load dataset ..
11+
#
12+
from load import load_data, bench
13+
print 'Loading data ...'
14+
X, y, T = load_data()
15+
print 'Done, %s samples with %s features loaded into ' \
16+
'memory' % X.shape
17+
18+
19+
def bench_skl():
20+
#
21+
# .. scikits.learn ..
22+
#
23+
start = datetime.now()
24+
skl_clf = linear_model.LassoLARS(alpha=0.)
25+
skl_clf.fit(X, y, normalize=False)
26+
skl_clf.predict(X)
27+
return datetime.now() - start
28+
29+
30+
def bench_mlpy():
31+
#
32+
# .. MLPy ..
33+
#
34+
start = datetime.now()
35+
mlpy_clf = mlpy_lasso(m=X.shape[1])
36+
mlpy_clf.learn(X, y)
37+
mlpy_clf.pred(X)
38+
return datetime.now() - start
39+
40+
41+
def bench_pymvpa():
42+
#
43+
# .. PyMVPA ..
44+
#
45+
46+
tstart = datetime.now()
47+
data = Dataset(samples=X, labels=y)
48+
mvpa_clf = mvpa_lars.LARS()
49+
mvpa_clf.train(data)
50+
# BROKEN
51+
# mvpa_pred = mvpa_clf.predict(X)
52+
return (datetime.now() - tstart)
53+
54+
55+
56+
57+
if __name__ == '__main__':
58+
print __doc__
59+
print 'scikits.learn: ', bench(bench_skl)
60+
print 'MLPy: ', bench(bench_mlpy)
61+
print 'PyMVPA: ', bench(bench_pymvpa)

0 commit comments

Comments
 (0)