@@ -20,21 +20,21 @@ using namespace Rcpp;
20
20
21
21
template <int RTYPE >
22
22
IntegerVector fast_factor_template( const Vector<RTYPE >& x ) {
23
- Vector<RTYPE > levs = sort_unique(x);
24
- IntegerVector out = match(x, levs);
25
- out.attr("levels") = as<CharacterVector >(levs);
26
- out.attr("class") = "factor";
27
- return out;
23
+ Vector<RTYPE > levs = sort_unique(x);
24
+ IntegerVector out = match(x, levs);
25
+ out.attr("levels") = as<CharacterVector >(levs);
26
+ out.attr("class") = "factor";
27
+ return out;
28
28
}
29
29
30
30
// [[ Rcpp::export]]
31
31
SEXP fast_factor( SEXP x ) {
32
- switch( TYPEOF(x) ) {
32
+ switch( TYPEOF(x) ) {
33
33
case INTSXP: return fast_factor_template<INTSXP >(x);
34
34
case REALSXP: return fast_factor_template<REALSXP >(x);
35
35
case STRSXP: return fast_factor_template<STRSXP >(x);
36
- }
37
- return R_NilValue;
36
+ }
37
+ return R_NilValue;
38
38
}
39
39
{% endhighlight %}
40
40
@@ -60,30 +60,8 @@ And a quick test:
60
60
61
61
{% highlight r %}
62
62
library(microbenchmark)
63
- all.equal( factor( 1:10 ), fast_factor( 1:10 ) )
64
- {% endhighlight %}
65
-
66
-
67
-
68
- <pre class =" output " >
69
- [1] TRUE
70
- </pre >
71
-
72
-
73
-
74
- {% highlight r %}
75
- all.equal( factor( letters ), fast_factor( letters ) )
76
- {% endhighlight %}
77
-
78
-
79
-
80
- <pre class =" output " >
81
- [1] TRUE
82
- </pre >
83
-
84
-
85
-
86
- {% highlight r %}
63
+ stopifnot(all.equal( factor( 1:10 ), fast_factor( 1:10 )))
64
+ stopifnot(all.equal( factor( letters ), fast_factor( letters )))
87
65
lets <- sample( letters, 1E5, replace=TRUE )
88
66
microbenchmark( factor(lets), fast_factor(lets) )
89
67
{% endhighlight %}
@@ -92,9 +70,9 @@ microbenchmark( factor(lets), fast_factor(lets) )
92
70
93
71
<pre class =" output " >
94
72
Unit: milliseconds
95
- expr min lq median uq max
96
- 1 factor(lets) 5.315 5.766 5.930 6.069 32.93
97
- 2 fast_factor(lets) 1.420 1.458 1.474 1.486 28.85
73
+ expr min lq median uq max neval
74
+ factor(lets) 5.065 5.788 5.976 6.375 36.57 100
75
+ fast_factor(lets) 1.367 1.421 1.453 1.520 2.83 100
98
76
</pre >
99
77
100
78
@@ -114,48 +92,23 @@ we can test a couple ways of performing a `tapply`-like function:
114
92
{% highlight r %}
115
93
x <- rnorm(1E5)
116
94
gp <- sample( 1:1000, 1E5, TRUE )
117
- all( tapply(x, gp, mean) == unlist( lapply( split(x, fast_factor(gp)), mean ) ) )
118
- {% endhighlight %}
119
-
120
-
121
-
122
- <pre class =" output " >
123
- [1] TRUE
124
- </pre >
125
-
126
-
127
-
128
- {% highlight r %}
129
- all( tapply(x, gp, mean) == unlist( lapply( split(x, gp), mean ) ) )
130
- {% endhighlight %}
131
-
132
-
133
-
134
- <pre class =" output " >
135
- [1] TRUE
136
- </pre >
137
-
138
-
139
-
140
- {% highlight r %}
141
- rbenchmark::benchmark( replications=20, order="relative",
142
- tapply(x, gp, mean),
143
- unlist( lapply( split(x, fast_factor(gp)), mean) ),
144
- unlist( lapply( split(x, gp), mean ) )
145
- )[ ,1:4]
95
+ stopifnot(all( tapply(x, gp, mean) == unlist( lapply( split(x, fast_factor(gp)), mean ))))
96
+ stopifnot(all( tapply(x, gp, mean) == unlist( lapply( split(x, gp), mean ) ) ))
97
+ library(rbenchmark)
98
+ benchmark(replications=20, order="relative",
99
+ tapply(x, gp, mean),
100
+ unlist(lapply(split(x,fast_factor(gp)),mean)),
101
+ unlist(lapply(split(x,gp), mean))
102
+ )[ ,c(1,3:4)]
146
103
{% endhighlight %}
147
104
148
105
149
106
150
107
<pre class =" output " >
151
- test replications elapsed
152
- 2 unlist(lapply(split(x, fast_factor(gp)), mean)) 20 0.200
153
- 3 unlist(lapply(split(x, gp), mean)) 20 0.731
154
- 1 tapply(x, gp, mean) 20 1.444
155
- relative
156
- 2 1.000
157
- 3 3.655
158
- 1 7.220
108
+ test elapsed relative
109
+ 2 unlist(lapply(split(x, fast_factor(gp)), mean)) 0.292 1.000
110
+ 3 unlist(lapply(split(x, gp), mean)) 1.042 3.568
111
+ 1 tapply(x, gp, mean) 2.043 6.997
159
112
</pre >
160
113
161
114
0 commit comments