Skip to content

Commit 5b054cf

Browse files
committed
updated to updated RcppMLPACK(2) package
1 parent f63240e commit 5b054cf

2 files changed

Lines changed: 97 additions & 89 deletions

File tree

_posts/2017-02-20-using-rcppmlpack2.md

Lines changed: 64 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -164,11 +164,11 @@ A second examples shows the `NaiveBayesClassifier` class.
164164
// [[Rcpp::depends(RcppMLPACK)]]
165165

166166
// [[Rcpp::export]]
167-
arma::irowvec naiveBayesClassifier(const arma::mat& train,
168-
const arma::mat& test,
169-
const arma::irowvec& labels,
170-
const int& classes) {
171-
167+
Rcpp::List naiveBayesClassifier(const arma::mat& train,
168+
const arma::irowvec& labels,
169+
const int& classes,
170+
const Rcpp::Nullable<Rcpp::NumericMatrix>& test = R_NilValue) {
171+
172172
// MLPACK wants Row<size_t> which is an unsigned representation
173173
// that R does not have
174174
arma::Row<size_t> labelsur, resultsur;
@@ -179,57 +179,79 @@ arma::irowvec naiveBayesClassifier(const arma::mat& train,
179179
// Initialize with the default arguments.
180180
// TODO: support more arguments>
181181
mlpack::naive_bayes::NaiveBayesClassifier<> nbc(train, labelsur, classes);
182+
183+
Rcpp::List return_val;
184+
if (test.isNotNull()) {
185+
arma::mat armatest = Rcpp::as<arma::mat>(test);
186+
nbc.Classify(armatest, resultsur);
182187

183-
nbc.Classify(test, resultsur);
184-
185-
arma::irowvec results = arma::conv_to<arma::irowvec>::from(resultsur);
186-
187-
return results;
188+
arma::irowvec results = arma::conv_to<arma::irowvec>::from(resultsur);
189+
return Rcpp::List::create(Rcpp::Named("means") = nbc.Means(),
190+
Rcpp::Named("variances") = nbc.Variances(),
191+
Rcpp::Named("probabilities") = nbc.Probabilities(),
192+
Rcpp::Named("classification") = results);
193+
} else {
194+
return Rcpp::List::create(Rcpp::Named("means") = nbc.Means(),
195+
Rcpp::Named("variances") = nbc.Variances(),
196+
Rcpp::Named("probabilities") = nbc.Probabilities());
197+
}
188198
}
189199
{% endhighlight %}
190200

191-
We need a quick helper function to get test data, again mimicking the unit tests:
201+
We can use the sample data included in recent-enough version of the RcppMLPACK package:
192202

193203

194-
{% highlight cpp %}
195-
#include <RcppMLPACK.h> // MLPACK, Rcpp and RcppArmadillo
204+
{% highlight r %}
205+
library(RcppMLPACK)
206+
data(trainSet) ## data part of RcppMLPACK package (when using RcppMLPACK2 source)
207+
trainmat <- t(trainSet[, -5]) ## train data
208+
trainlab <- trainSet[, 5] ## labels
209+
naiveBayesClassifier(trainmat, trainlab, 2L) ## just model
210+
{% endhighlight %}
196211

197-
#include <mlpack/methods/naive_bayes/naive_bayes_classifier.hpp> // particular algorithm used here
198212

199-
// [[Rcpp::depends(RcppMLPACK)]]
200213

214+
<pre class="output">
215+
$means
216+
[1] 2.75000 4.00000 3.68750 2.37500 8.33333 4.66667 3.66667 2.40000
201217

202-
// [[Rcpp::export]]
203-
Rcpp::List getData(const char* trainFilename, const char* testFilename) {
204-
arma::mat trainData, testData;
205-
mlpack::data::Load(trainFilename, trainData, true); // note implicit transpose
206-
mlpack::data::Load(testFilename, testData, true);
207-
208-
// Get the labels, then remove them from data
209-
arma::rowvec trainlabels = trainData.row(trainData.n_rows -1);
210-
arma::rowvec testlabels = testData.row(testData.n_rows -1);
211-
trainData.shed_row(trainData.n_rows - 1);
212-
testData.shed_row(trainData.n_rows - 1);
213-
return(Rcpp::List::create(Rcpp::Named("trainData") = Rcpp::wrap(trainData),
214-
Rcpp::Named("testData") = Rcpp::wrap(testData),
215-
Rcpp::Named("trainlabels") = trainlabels,
216-
Rcpp::Named("testlabels") = testlabels));
217-
}
218+
$variances
219+
[1] 0.333333 0.800000 0.629167 0.383333 0.809524 3.380952 0.666667 0.400000
220+
221+
$probabilities
222+
[1] 0.516129 0.483871
223+
</pre>
224+
225+
226+
227+
{% highlight r %}
228+
testmat <- t(testSet[, -5]) ## test data
229+
testlab <- testSet[, 5]
230+
res <- naiveBayesClassifier(trainmat, trainlab, 2L, testmat) ## also classify
231+
res
218232
{% endhighlight %}
219233

220-
Now that we can fetch the data from R, and use it to call the classifier:
234+
235+
236+
<pre class="output">
237+
$means
238+
[1] 2.75000 4.00000 3.68750 2.37500 8.33333 4.66667 3.66667 2.40000
239+
240+
$variances
241+
[1] 0.333333 0.800000 0.629167 0.383333 0.809524 3.380952 0.666667 0.400000
242+
243+
$probabilities
244+
[1] 0.516129 0.483871
245+
246+
$classification
247+
[1] 0 0 0 1 1 1 1
248+
</pre>
249+
221250

222251

223252
{% highlight r %}
224-
rl <- getData("/home/edd/git/mlpack/src/mlpack/tests/data/trainSet.csv", # should add to RcppMLACK2
225-
"/home/edd/git/mlpack/src/mlpack/tests/data/testSet.csv")
226-
trainData <- rl[["trainData"]]
227-
testData <- rl[["testData"]]
228-
trainlabels <- rl[["trainlabels"]]
229-
testlabels <- rl[["testlabels"]]
230-
res <- naiveBayesClassifier(trainData, testData, trainlabels, 2)
231-
## res was a rowvector but comes back as 1-row matrix
232-
all.equal(res[1,], testlabels)
253+
## res was a rowvector but comes back as 1-row matrix
254+
all.equal(res[[4]], testlab)
233255
{% endhighlight %}
234256

235257

@@ -238,5 +260,6 @@ all.equal(res[1,], testlabels)
238260
[1] TRUE
239261
</pre>
240262

263+
241264
As we can see, the computed classification on the test set corresponds to the expected
242265
classification in `testlabels`.

src/2017-02-20-using-rcppmlpack2.Rmd

Lines changed: 33 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -150,11 +150,11 @@ A second examples shows the `NaiveBayesClassifier` class.
150150
// [[Rcpp::depends(RcppMLPACK)]]
151151
152152
// [[Rcpp::export]]
153-
arma::irowvec naiveBayesClassifier(const arma::mat& train,
154-
const arma::mat& test,
155-
const arma::irowvec& labels,
156-
const int& classes) {
157-
153+
Rcpp::List naiveBayesClassifier(const arma::mat& train,
154+
const arma::irowvec& labels,
155+
const int& classes,
156+
const Rcpp::Nullable<Rcpp::NumericMatrix>& test = R_NilValue) {
157+
158158
// MLPACK wants Row<size_t> which is an unsigned representation
159159
// that R does not have
160160
arma::Row<size_t> labelsur, resultsur;
@@ -165,56 +165,41 @@ arma::irowvec naiveBayesClassifier(const arma::mat& train,
165165
// Initialize with the default arguments.
166166
// TODO: support more arguments>
167167
mlpack::naive_bayes::NaiveBayesClassifier<> nbc(train, labelsur, classes);
168-
169-
nbc.Classify(test, resultsur);
170-
171-
arma::irowvec results = arma::conv_to<arma::irowvec>::from(resultsur);
172-
173-
return results;
174-
}
175-
```
176-
177-
We need a quick helper function to get test data, again mimicking the unit tests:
178-
179-
```{r, engine="Rcpp"}
180-
#include <RcppMLPACK.h> // MLPACK, Rcpp and RcppArmadillo
181168
182-
#include <mlpack/methods/naive_bayes/naive_bayes_classifier.hpp> // particular algorithm used here
183-
184-
// [[Rcpp::depends(RcppMLPACK)]]
185-
186-
187-
// [[Rcpp::export]]
188-
Rcpp::List getData(const char* trainFilename, const char* testFilename) {
189-
arma::mat trainData, testData;
190-
mlpack::data::Load(trainFilename, trainData, true); // note implicit transpose
191-
mlpack::data::Load(testFilename, testData, true);
192-
193-
// Get the labels, then remove them from data
194-
arma::rowvec trainlabels = trainData.row(trainData.n_rows -1);
195-
arma::rowvec testlabels = testData.row(testData.n_rows -1);
196-
trainData.shed_row(trainData.n_rows - 1);
197-
testData.shed_row(trainData.n_rows - 1);
198-
return(Rcpp::List::create(Rcpp::Named("trainData") = Rcpp::wrap(trainData),
199-
Rcpp::Named("testData") = Rcpp::wrap(testData),
200-
Rcpp::Named("trainlabels") = trainlabels,
201-
Rcpp::Named("testlabels") = testlabels));
169+
Rcpp::List return_val;
170+
if (test.isNotNull()) {
171+
arma::mat armatest = Rcpp::as<arma::mat>(test);
172+
nbc.Classify(armatest, resultsur);
173+
174+
arma::irowvec results = arma::conv_to<arma::irowvec>::from(resultsur);
175+
return Rcpp::List::create(Rcpp::Named("means") = nbc.Means(),
176+
Rcpp::Named("variances") = nbc.Variances(),
177+
Rcpp::Named("probabilities") = nbc.Probabilities(),
178+
Rcpp::Named("classification") = results);
179+
} else {
180+
return Rcpp::List::create(Rcpp::Named("means") = nbc.Means(),
181+
Rcpp::Named("variances") = nbc.Variances(),
182+
Rcpp::Named("probabilities") = nbc.Probabilities());
183+
}
202184
}
203185
```
204186

205-
Now that we can fetch the data from R, and use it to call the classifier:
187+
We can use the sample data included in recent-enough version of the RcppMLPACK package:
206188

207189
```{r}
208-
rl <- getData("/home/edd/git/mlpack/src/mlpack/tests/data/trainSet.csv", # should add to RcppMLACK2
209-
"/home/edd/git/mlpack/src/mlpack/tests/data/testSet.csv")
210-
trainData <- rl[["trainData"]]
211-
testData <- rl[["testData"]]
212-
trainlabels <- rl[["trainlabels"]]
213-
testlabels <- rl[["testlabels"]]
214-
res <- naiveBayesClassifier(trainData, testData, trainlabels, 2)
215-
## res was a rowvector but comes back as 1-row matrix
216-
all.equal(res[1,], testlabels)
190+
library(RcppMLPACK)
191+
data(trainSet) ## data part of RcppMLPACK package (when using RcppMLPACK2 source)
192+
trainmat <- t(trainSet[, -5]) ## train data
193+
trainlab <- trainSet[, 5] ## labels
194+
naiveBayesClassifier(trainmat, trainlab, 2L) ## just model
195+
testmat <- t(testSet[, -5]) ## test data
196+
testlab <- testSet[, 5]
197+
res <- naiveBayesClassifier(trainmat, trainlab, 2L, testmat) ## also classify
198+
res
199+
## res was a rowvector but comes back as 1-row matrix
200+
all.equal(res[[4]], testlab)
217201
```
218202

203+
219204
As we can see, the computed classification on the test set corresponds to the expected
220205
classification in `testlabels`.

0 commit comments

Comments
 (0)