Skip to content

Commit 3958cf6

Browse files
committed
minor beautification
1 parent 472514d commit 3958cf6

File tree

2 files changed

+43
-89
lines changed

2 files changed

+43
-89
lines changed

_posts/2013-02-27-fast-factor-generation.md

+25-72
Original file line numberDiff line numberDiff line change
@@ -20,21 +20,21 @@ using namespace Rcpp;
2020

2121
template <int RTYPE>
2222
IntegerVector fast_factor_template( const Vector<RTYPE>& x ) {
23-
Vector<RTYPE> levs = sort_unique(x);
24-
IntegerVector out = match(x, levs);
25-
out.attr("levels") = as<CharacterVector>(levs);
26-
out.attr("class") = "factor";
27-
return out;
23+
Vector<RTYPE> levs = sort_unique(x);
24+
IntegerVector out = match(x, levs);
25+
out.attr("levels") = as<CharacterVector>(levs);
26+
out.attr("class") = "factor";
27+
return out;
2828
}
2929

3030
// [[Rcpp::export]]
3131
SEXP fast_factor( SEXP x ) {
32-
switch( TYPEOF(x) ) {
32+
switch( TYPEOF(x) ) {
3333
case INTSXP: return fast_factor_template<INTSXP>(x);
3434
case REALSXP: return fast_factor_template<REALSXP>(x);
3535
case STRSXP: return fast_factor_template<STRSXP>(x);
36-
}
37-
return R_NilValue;
36+
}
37+
return R_NilValue;
3838
}
3939
{% endhighlight %}
4040

@@ -60,30 +60,8 @@ And a quick test:
6060

6161
{% highlight r %}
6262
library(microbenchmark)
63-
all.equal( factor( 1:10 ), fast_factor( 1:10 ) )
64-
{% endhighlight %}
65-
66-
67-
68-
<pre class="output">
69-
[1] TRUE
70-
</pre>
71-
72-
73-
74-
{% highlight r %}
75-
all.equal( factor( letters ), fast_factor( letters ) )
76-
{% endhighlight %}
77-
78-
79-
80-
<pre class="output">
81-
[1] TRUE
82-
</pre>
83-
84-
85-
86-
{% highlight r %}
63+
stopifnot(all.equal( factor( 1:10 ), fast_factor( 1:10 )))
64+
stopifnot(all.equal( factor( letters ), fast_factor( letters )))
8765
lets <- sample( letters, 1E5, replace=TRUE )
8866
microbenchmark( factor(lets), fast_factor(lets) )
8967
{% endhighlight %}
@@ -92,9 +70,9 @@ microbenchmark( factor(lets), fast_factor(lets) )
9270

9371
<pre class="output">
9472
Unit: milliseconds
95-
expr min lq median uq max
96-
1 factor(lets) 5.315 5.766 5.930 6.069 32.93
97-
2 fast_factor(lets) 1.420 1.458 1.474 1.486 28.85
73+
expr min lq median uq max neval
74+
factor(lets) 5.065 5.788 5.976 6.375 36.57 100
75+
fast_factor(lets) 1.367 1.421 1.453 1.520 2.83 100
9876
</pre>
9977

10078

@@ -114,48 +92,23 @@ we can test a couple ways of performing a `tapply`-like function:
11492
{% highlight r %}
11593
x <- rnorm(1E5)
11694
gp <- sample( 1:1000, 1E5, TRUE )
117-
all( tapply(x, gp, mean) == unlist( lapply( split(x, fast_factor(gp)), mean ) ) )
118-
{% endhighlight %}
119-
120-
121-
122-
<pre class="output">
123-
[1] TRUE
124-
</pre>
125-
126-
127-
128-
{% highlight r %}
129-
all( tapply(x, gp, mean) == unlist( lapply( split(x, gp), mean ) ) )
130-
{% endhighlight %}
131-
132-
133-
134-
<pre class="output">
135-
[1] TRUE
136-
</pre>
137-
138-
139-
140-
{% highlight r %}
141-
rbenchmark::benchmark( replications=20, order="relative",
142-
tapply(x, gp, mean),
143-
unlist( lapply( split(x, fast_factor(gp)), mean) ),
144-
unlist( lapply( split(x, gp), mean ) )
145-
)[,1:4]
95+
stopifnot(all( tapply(x, gp, mean) == unlist( lapply( split(x, fast_factor(gp)), mean ))))
96+
stopifnot(all( tapply(x, gp, mean) == unlist( lapply( split(x, gp), mean ) ) ))
97+
library(rbenchmark)
98+
benchmark(replications=20, order="relative",
99+
tapply(x, gp, mean),
100+
unlist(lapply(split(x,fast_factor(gp)),mean)),
101+
unlist(lapply(split(x,gp), mean))
102+
)[,c(1,3:4)]
146103
{% endhighlight %}
147104

148105

149106

150107
<pre class="output">
151-
test replications elapsed
152-
2 unlist(lapply(split(x, fast_factor(gp)), mean)) 20 0.200
153-
3 unlist(lapply(split(x, gp), mean)) 20 0.731
154-
1 tapply(x, gp, mean) 20 1.444
155-
relative
156-
2 1.000
157-
3 3.655
158-
1 7.220
108+
test elapsed relative
109+
2 unlist(lapply(split(x, fast_factor(gp)), mean)) 0.292 1.000
110+
3 unlist(lapply(split(x, gp), mean)) 1.042 3.568
111+
1 tapply(x, gp, mean) 2.043 6.997
159112
</pre>
160113

161114

src/2013-02-27-fast-factor-generation.Rmd

+18-17
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,21 @@ using namespace Rcpp;
1717
1818
template <int RTYPE>
1919
IntegerVector fast_factor_template( const Vector<RTYPE>& x ) {
20-
Vector<RTYPE> levs = sort_unique(x);
21-
IntegerVector out = match(x, levs);
22-
out.attr("levels") = as<CharacterVector>(levs);
23-
out.attr("class") = "factor";
24-
return out;
20+
Vector<RTYPE> levs = sort_unique(x);
21+
IntegerVector out = match(x, levs);
22+
out.attr("levels") = as<CharacterVector>(levs);
23+
out.attr("class") = "factor";
24+
return out;
2525
}
2626
2727
// [[Rcpp::export]]
2828
SEXP fast_factor( SEXP x ) {
29-
switch( TYPEOF(x) ) {
29+
switch( TYPEOF(x) ) {
3030
case INTSXP: return fast_factor_template<INTSXP>(x);
3131
case REALSXP: return fast_factor_template<REALSXP>(x);
3232
case STRSXP: return fast_factor_template<STRSXP>(x);
33-
}
34-
return R_NilValue;
33+
}
34+
return R_NilValue;
3535
}
3636
```
3737

@@ -55,8 +55,8 @@ And a quick test:
5555

5656
```{r}
5757
library(microbenchmark)
58-
all.equal( factor( 1:10 ), fast_factor( 1:10 ) )
59-
all.equal( factor( letters ), fast_factor( letters ) )
58+
stopifnot(all.equal( factor( 1:10 ), fast_factor( 1:10 )))
59+
stopifnot(all.equal( factor( letters ), fast_factor( letters )))
6060
lets <- sample( letters, 1E5, replace=TRUE )
6161
microbenchmark( factor(lets), fast_factor(lets) )
6262
```
@@ -76,13 +76,14 @@ we can test a couple ways of performing a `tapply`-like function:
7676
```{r tidy=FALSE}
7777
x <- rnorm(1E5)
7878
gp <- sample( 1:1000, 1E5, TRUE )
79-
all( tapply(x, gp, mean) == unlist( lapply( split(x, fast_factor(gp)), mean ) ) )
80-
all( tapply(x, gp, mean) == unlist( lapply( split(x, gp), mean ) ) )
81-
rbenchmark::benchmark( replications=20, order="relative",
82-
tapply(x, gp, mean),
83-
unlist( lapply( split(x, fast_factor(gp)), mean) ),
84-
unlist( lapply( split(x, gp), mean ) )
85-
)[,1:4]
79+
stopifnot(all( tapply(x, gp, mean) == unlist( lapply( split(x, fast_factor(gp)), mean ))))
80+
stopifnot(all( tapply(x, gp, mean) == unlist( lapply( split(x, gp), mean ) ) ))
81+
library(rbenchmark)
82+
benchmark(replications=20, order="relative",
83+
tapply(x, gp, mean),
84+
unlist(lapply(split(x,fast_factor(gp)),mean)),
85+
unlist(lapply(split(x,gp), mean))
86+
)[,c(1,3:4)]
8687
```
8788

8889
To be fair, tapply actually returns a 1-dimensional array rather than a vector,

0 commit comments

Comments
 (0)