-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathjaccard_distance_function.cpp
70 lines (56 loc) · 2.08 KB
/
jaccard_distance_function.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#include <RcppArmadillo.h>
// [[Rcpp::depends(RcppArmadillo)]]
#include <iostream>
using namespace std;
using namespace arma;
using namespace Rcpp;
//' Calculate the Jaccard distance between two sets
//' @param x and y two vectors of features (1xn)
//' @param nFeat number of features in each sets
//' @return result distance between the two vectors
double distJaccard(vec x, vec y, unsigned int nFeat) {
uvec xSort = sort_index(x, "ascend");
uvec ySort = sort_index(y, "ascend");
uvec A = sort(xSort.head(nFeat));
uvec B = sort(ySort.head(nFeat));
uvec C = sort(xSort.tail(nFeat));
uvec D = sort(ySort.tail(nFeat));
std::vector<int> ABunion;
std::set_union(A.begin(), A.end(),
B.begin(), B.end(),
std::back_inserter(ABunion));
std::vector<int> CDunion;
std::set_union(C.begin(), C.end(),
D.begin(), D.end(),
std::back_inserter(CDunion));
std::vector<int> ABintersect;
std::set_intersection(A.begin(), A.end(),
B.begin(), B.end(),
std::back_inserter(ABintersect));
std::vector<int> CDintersect;
std::set_intersection(C.begin(), C.end(),
D.begin(), D.end(),
std::back_inserter(CDintersect));
double dTop = 1.0 - double(ABintersect.size())/ABunion.size();
double dBottom = 1.0 - double(CDintersect.size())/CDunion.size();
double distance = (dTop + dBottom)/2.0;
return distance;
}
//' Calculate the Jaccard distances between all pairs of vector (different than themself)
//' @param A matrix of replicate mxn
//' @param nFeat number of features in each sets
//' @return vecotr of Jaccard distance
// [[Rcpp::export]]
NumericVector vecJaccardDistance(mat A, unsigned int nFeat){
NumericVector distVec(A.n_rows*(A.n_rows-1)/2);
unsigned int idx(0);
for(unsigned int i = 0; i < A.n_rows-1; i++){
vec x = (A.row(i)).t();
for(unsigned int j = i+1; j < A.n_rows; j++){
vec y = (A.row(j)).t();
distVec[idx] = distJaccard(x, y, nFeat);
idx++;
}
}
return distVec;
}