Skip to content

Commit 0ea5b61

Browse files
committed
fun/pkg analysis
0 parents  commit 0ea5b61

File tree

1,563 files changed

+359098
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,563 files changed

+359098
-0
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*~
2+
*.tar.gz

README.org

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
Database of all RcppExports.cpp files, to support [[https://github.com/NAU-CS/RcppDeepState][RcppDeepState]]
2+
project.
3+
4+
** 29 Jan 2020
5+
6+
- [[file:packages-download.R]] downloads all CRAN packages which list Rcpp
7+
under LinkingTo.
8+
- [[file:packages-untar.R]] extracts just the RcppExports.cpp file from
9+
each package tar.gz file. (these are copied to the [[file:packages]]
10+
directory in this github repo)
11+
- [[file:input_parameter_parse.R]] was for experimenting with regex
12+
subroutines, but it only parses argument types (not functions) so it
13+
should no longer be used.
14+
- [[file:packages-parse.R]] analyzes which types are used most frequently
15+
in R packages that use Rcpp:
16+
17+
The top 10 types are:
18+
19+
#+BEGIN_SRC
20+
> (top10 <- arg.counts[args==1, .(
21+
+ funs=.N,
22+
+ pkgs=length(unique(pkg.dir))
23+
+ ), by=clean.type][order(-funs)][1:10])
24+
clean.type funs pkgs
25+
1: SEXP 380 72
26+
2: Rcpp::NumericVector 330 154
27+
3: Rcpp::NumericMatrix 236 128
28+
4: arma::mat 208 102
29+
5: Rcpp::List 172 71
30+
6: std::string 159 76
31+
7: Rcpp::CharacterVector 112 51
32+
8: int 108 60
33+
9: Rcpp::IntegerVector 88 37
34+
10: double 79 44
35+
>
36+
#+END_SRC
37+
38+
If we implement =RcppDeepState_*= random generation functions for each
39+
of these ten types, then we will be able to automatically test this many
40+
functions/packages:
41+
42+
#+BEGIN_SRC
43+
> covered[, .(
44+
+ funs=.N,
45+
+ pkgs=length(unique(pkg.dir))
46+
+ )]
47+
funs pkgs
48+
1: 7702 1132
49+
>
50+
#+END_SRC
51+
52+
If we only implement these 8 (easy) then we have this many:
53+
54+
#+BEGIN_SRC
55+
> (some.types <- grep("SEXP|List", top10$clean.type, invert=TRUE, value=TRUE))
56+
[1] "Rcpp::NumericVector" "Rcpp::NumericMatrix" "arma::mat"
57+
[4] "std::string" "Rcpp::CharacterVector" "int"
58+
[7] "Rcpp::IntegerVector" "double"
59+
> some.covered <- arg.counts[some.types, on="clean.type"][, .(
60+
+ top10args=.N
61+
+ ), by=.(pkg.dir, funName, args)][args==top10args][order(-args)]
62+
> some.covered[, .(
63+
+ funs=.N,
64+
+ pkgs=length(unique(pkg.dir))
65+
+ )]
66+
funs pkgs
67+
1: 5838 995
68+
>
69+
#+END_SRC

input_parameter.txt

+70,941
Large diffs are not rendered by default.

input_parameter_parse.R

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
if(FALSE){
2+
system("grep input_parameter packages/*/src/RcppExports.cpp > input_parameter.txt")
3+
}
4+
5+
input.vec <- readLines("input_parameter.txt")
6+
empty.comments <- sub("//.*", "", input.vec)
7+
removed.comments <- grep("input_parameter", empty.comments, value=TRUE)
8+
9+
file.pattern <- list(
10+
"packages/",
11+
package=".*?",
12+
"/src/RcppExports.cpp:\\s*",
13+
type=list(
14+
before="[^<]+",
15+
"<\\s*",
16+
inside="(?2)|[^<>]+",
17+
">",
18+
"\\s*",
19+
"(?:const)?",
20+
"\\s*",
21+
"&?",
22+
"\\s*"),
23+
"::type")
24+
nc::capture_first_vec(
25+
"packages/HDclust/src/RcppExports.cpp: Rcpp::traits::input_parameter< Nullable<List> >::type rfsClust_(rfsClust_SEXP);",
26+
file.pattern,
27+
nomatch.error=FALSE,
28+
engine="PCRE")
29+
30+
match.dt <- nc::capture_first_vec(
31+
removed.comments,
32+
file.pattern,
33+
nomatch.error=FALSE,
34+
engine="PCRE")
35+
match.dt[, input.vec[which(is.na(inside))] ]
36+
match.dt[before != "Rcpp::traits::input_parameter"]
37+
38+
clean <- function(type.vec){
39+
trans.list <- list(
40+
comments=function(x)gsub("/[*].*?[*]/", "", x),
41+
beginning=function(x)sub("^\\s+", "", x),
42+
end=function(x)sub("\\s+$", "", x),
43+
after.temp=function(x)gsub("([<>])\\s+", "\\1", x),
44+
before.temp=function(x)gsub("\\s+([<>])", "\\1", x),
45+
and=function(x)sub("&$", "", x),
46+
const=function(x)gsub("\\s*const\\s*", "", x),
47+
reps=function(x)gsub(" +", " ", x))
48+
for(fun in trans.list){
49+
type.vec <- fun(type.vec)
50+
}
51+
rep.dt <- unique(nc::capture_all_str(
52+
type.vec,
53+
namespace="\\b[^\\s:<]+?",
54+
"::",
55+
fun="[^<\\s]+?\\b")[!is.na(fun)])
56+
for(i in 1:nrow(rep.dt)){
57+
rep.row <- rep.dt[i]
58+
pattern <- paste0("(?<!::)\\b", rep.row$fun, "\\b")
59+
replace <- rep.row[, paste0(namespace, "::", fun)]
60+
type.vec <- gsub(pattern, replace, type.vec, perl=TRUE)
61+
}
62+
type.vec
63+
}
64+
65+
match.dt[, cleaned.type := clean(inside)]
66+
67+
(count.dt <- match.dt[, .(
68+
count=.N,
69+
packages=length(unique(package))
70+
), by=cleaned.type])
71+
count.dt[order(-count)][1:20]
72+
count.dt[order(-packages)][1:20]
73+
74+
grep("&", count.dt$cleaned.type, value=TRUE)
75+
grep("const", count.dt$cleaned.type, value=TRUE)
76+
grep("Numeric", count.dt$cleaned.type, value=TRUE)
77+
78+
count.dt[grepl("^Rcpp::", cleaned.type) & !grepl("<", cleaned.type)][order(-count)]

packages-download.R

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Rcpp.LinkingTo.pkgs <- devtools::revdep("Rcpp", "LinkingTo")
2+
dir.create("packages")
3+
download.packages(Rcpp.LinkingTo.pkgs, "packages", type="source")

packages-parse.R

+164
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
old.pattern <- list(type=list(
2+
before="[^<,]+",
3+
nc::quantifier(
4+
"<\\s*",
5+
inside="(?1)|[^<>]+",
6+
">",
7+
"\\s*",
8+
"(?:const)?",
9+
"\\s*",
10+
"&?",
11+
"\\s*",
12+
"?")),
13+
"\\s+",
14+
name="[^\\s,)>]+",
15+
nc::quantifier(
16+
"\\s*",
17+
"=",
18+
"\\s*",
19+
default="[^\\s,)>]+",
20+
"?"),
21+
"[,)]")
22+
nc::capture_all_str(
23+
"const Eigen::Matrix<double, 1, Eigen::Dynamic>& inv, const int& nerrs, foo< bar< sars > > var = 1, boost::ecuyer1988& base_rng__, std::ostream* pstream__)",
24+
old.pattern)
25+
26+
cleanType <- function(type.vec){
27+
if(length(type.vec)==0)return(type.vec)
28+
trans.list <- list(
29+
comments=function(x)gsub("/[*].*?[*]/", "", x),
30+
beginning=function(x)sub("^\\s+", "", x),
31+
end=function(x)sub("\\s+$", "", x),
32+
after.temp=function(x)gsub("([<>])\\s+", "\\1", x),
33+
before.temp=function(x)gsub("\\s+([<>])", "\\1", x),
34+
and=function(x)gsub("&", "", x),
35+
const=function(x)gsub("\\s*const\\s*", "", x),
36+
reps=function(x)gsub(" +", " ", x))
37+
for(fun in trans.list){
38+
type.vec <- fun(type.vec)
39+
}
40+
rep.dt <- unique(nc::capture_all_str(
41+
type.vec,
42+
namespace="\\b[^\\s:<]+?",
43+
"::",
44+
fun="[^<\\s]+?\\b")[!is.na(fun)])
45+
for(i in 1:nrow(rep.dt)){
46+
rep.row <- rep.dt[i]
47+
pattern <- paste0("(?<!::)\\b", rep.row$fun, "\\b")
48+
replace <- rep.row[, paste0(namespace, "::", fun)]
49+
type.vec <- gsub(pattern, replace, type.vec, perl=TRUE)
50+
}
51+
type.vec
52+
}
53+
54+
type.pattern <- list(
55+
type=list(
56+
before="[^<]+",
57+
"<\\s*",
58+
inside="(?1)|[^<>]+",
59+
">",
60+
"\\s*",
61+
"(?:const)?",
62+
"\\s*",
63+
"&?",
64+
"\\s*"),
65+
"::type")
66+
67+
parseRcppExports <- function(pkg.path){
68+
RcppExports.cpp <- normalizePath(file.path(
69+
pkg.path, "src", "RcppExports.cpp"),
70+
mustWork=TRUE)
71+
cpp.lines <- readLines(RcppExports.cpp)
72+
subject.vec <- gsub("/[*].*?[*]/", "", cpp.lines)
73+
ns.dt <- nc::capture_all_str(
74+
subject.vec,
75+
"using namespace ",
76+
namespace="[^ ;]+")
77+
fun.dt <- nc::capture_all_str(
78+
subject.vec,
79+
"\n// ",
80+
commentName=".*",
81+
"\n",
82+
prototype=list(
83+
returnType=".*",
84+
" ",
85+
funName=".*?",
86+
"\\(",
87+
arguments=".*",
88+
"\\);\n"),
89+
SEXP=".*\n",
90+
"BEGIN_RCPP\n",
91+
code="(?:.*\n)*?",
92+
"END_RCPP")
93+
arg.dt <- if(nrow(fun.dt)==0){
94+
NULL
95+
}else{
96+
fun.dt[, {
97+
code.vec <- strsplit(code, "\n")[[1]]
98+
no.comments <- sub("//.*", "", code.vec)
99+
input.vec <- grep("Rcpp::traits::input_parameter", no.comments, value=TRUE)
100+
if(length(input.vec)==0){
101+
NULL
102+
}else{
103+
nc::capture_first_vec(input.vec, type.pattern)
104+
}
105+
}, by=funName]
106+
}
107+
list(
108+
namespaces=ns.dt,
109+
prototypes=fun.dt[, .(funName, commentName, prototype)],
110+
arguments=arg.dt)
111+
}
112+
113+
RcppExports.cpp.vec <- Sys.glob(file.path(
114+
"packages", "*", "src", "RcppExports.cpp"))
115+
pkg.dir.vec <- dirname(dirname(RcppExports.cpp.vec))
116+
117+
arg.dt.list <- list()
118+
ns.dt.list <- list()
119+
for(pkg.dir in pkg.dir.vec){
120+
result.list <- parseRcppExports(pkg.dir)
121+
if(nrow(result.list$prototypes)==0){
122+
print(pkg.dir)
123+
}
124+
if(!is.null(result.list$arguments) && nrow(result.list$arguments)){
125+
arg.dt.list[[pkg.dir]] <- data.table::data.table(
126+
pkg.dir, result.list$arguments)
127+
ns.dt.list[[pkg.dir]] <- data.table::data.table(
128+
pkg.dir, result.list$namespaces)
129+
}
130+
}
131+
arg.dt <- do.call(rbind, arg.dt.list)
132+
ns.dt <- do.call(rbind, ns.dt.list)
133+
134+
arg.dt[, clean.type := cleanType(inside)]
135+
print(names(table(arg.dt$clean.type)))
136+
print(names(table(ns.dt$namespace)))
137+
arg.dt[clean.type=="longint"]
138+
arg.dt[grepl(" ", funName)]
139+
140+
arg.counts <- arg.dt[, .(
141+
args=.N
142+
), by=.(pkg.dir, funName)][arg.dt, on=.(pkg.dir, funName)]
143+
(top10 <- arg.counts[args==1, .(
144+
funs=.N,
145+
pkgs=length(unique(pkg.dir))
146+
), by=clean.type][order(-funs)][1:10])
147+
148+
(covered <- arg.counts[top10$clean.type, on="clean.type"][, .(
149+
top10args=.N
150+
), by=.(pkg.dir, funName, args)][args==top10args][order(-args)])
151+
##covered[funName=="repel_boxes"]
152+
covered[, .(
153+
funs=.N,
154+
pkgs=length(unique(pkg.dir))
155+
)]
156+
157+
(some.types <- grep("SEXP|List", top10$clean.type, invert=TRUE, value=TRUE))
158+
some.covered <- arg.counts[some.types, on="clean.type"][, .(
159+
top10args=.N
160+
), by=.(pkg.dir, funName, args)][args==top10args][order(-args)]
161+
some.covered[, .(
162+
funs=.N,
163+
pkgs=length(unique(pkg.dir))
164+
)]

packages-untar.R

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
setwd("packages")
2+
tgz.vec <- Sys.glob("*.tar.gz")
3+
for(pkg.tar.gz in tgz.vec){
4+
pkg.name <- sub("_.*", "", basename(pkg.tar.gz))
5+
RcppExports.cpp <- file.path(pkg.name, "src/RcppExports.cpp")
6+
system(paste("tar -xvf", pkg.tar.gz, RcppExports.cpp))
7+
}

packages/ABCoptim/src/RcppExports.cpp

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// Generated by using Rcpp::compileAttributes() -> do not edit by hand
2+
// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3+
4+
#include <Rcpp.h>
5+
6+
using namespace Rcpp;
7+
8+
// abc_cpp_
9+
List abc_cpp_(NumericVector& par, Function& fn, const NumericVector& lb, const NumericVector& ub, int FoodNumber, int limit, int maxCycle, int criter);
10+
RcppExport SEXP _ABCoptim_abc_cpp_(SEXP parSEXP, SEXP fnSEXP, SEXP lbSEXP, SEXP ubSEXP, SEXP FoodNumberSEXP, SEXP limitSEXP, SEXP maxCycleSEXP, SEXP criterSEXP) {
11+
BEGIN_RCPP
12+
Rcpp::RObject rcpp_result_gen;
13+
Rcpp::RNGScope rcpp_rngScope_gen;
14+
Rcpp::traits::input_parameter< NumericVector& >::type par(parSEXP);
15+
Rcpp::traits::input_parameter< Function& >::type fn(fnSEXP);
16+
Rcpp::traits::input_parameter< const NumericVector& >::type lb(lbSEXP);
17+
Rcpp::traits::input_parameter< const NumericVector& >::type ub(ubSEXP);
18+
Rcpp::traits::input_parameter< int >::type FoodNumber(FoodNumberSEXP);
19+
Rcpp::traits::input_parameter< int >::type limit(limitSEXP);
20+
Rcpp::traits::input_parameter< int >::type maxCycle(maxCycleSEXP);
21+
Rcpp::traits::input_parameter< int >::type criter(criterSEXP);
22+
rcpp_result_gen = Rcpp::wrap(abc_cpp_(par, fn, lb, ub, FoodNumber, limit, maxCycle, criter));
23+
return rcpp_result_gen;
24+
END_RCPP
25+
}
26+
27+
static const R_CallMethodDef CallEntries[] = {
28+
{"_ABCoptim_abc_cpp_", (DL_FUNC) &_ABCoptim_abc_cpp_, 8},
29+
{NULL, NULL, 0}
30+
};
31+
32+
RcppExport void R_init_ABCoptim(DllInfo *dll) {
33+
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
34+
R_useDynamicSymbols(dll, FALSE);
35+
}

0 commit comments

Comments
 (0)