33// #include <iomanip>
44#include < chrono>
55#include < thread>
6- #include < unordered_map>
6+ // #include <unordered_map>
77#include < mutex>
88#include " word2vec/word2vec.hpp"
99#include " tokens.h"
@@ -19,29 +19,38 @@ Rcpp::CharacterVector encode(std::vector<std::string> types){
1919 return (types_);
2020}
2121
22- Rcpp::NumericMatrix as_matrix (w2v::word2vec_t model) {
23-
24- std::unordered_map<std::string, std::vector<float >> m_map = model.map ();
25- std::vector<std::string> words;
26- words.reserve (m_map.size ());
27- for (auto it : m_map) {
28- words.push_back (it.first );
29- }
22+ // Rcpp::NumericMatrix as_matrix(w2v::word2vec_t model) {
23+ //
24+ // std::unordered_map<std::string, std::vector<float>> m_map = model.map();
25+ // std::vector<std::string> words;
26+ // words.reserve(m_map.size());
27+ // for(auto it : m_map) {
28+ // words.push_back(it.first);
29+ // }
30+ //
31+ // std::vector<float> mat;
32+ // mat.reserve(model.vectorSize() * words.size());
33+ // for (size_t j = 0; j < words.size(); j++) {
34+ // //auto p = model.vector(words[j]);
35+ // auto it = m_map.find(words[j]);
36+ // if (it != m_map.end()) {
37+ // //std::vector<float> vec = *p;
38+ // std::vector<float> vec = it->second;
39+ // mat.insert(mat.end(), vec.begin(), vec.end());
40+ // }
41+ // }
42+ // //std::vector<float> mat = model.trainMatrix();
43+ //
44+ // Rcpp::NumericMatrix mat_(model.vectorSize(), words.size(), mat.begin());
45+ // colnames(mat_) = encode(words);
46+ // return Rcpp::transpose(mat_);
47+ // }
3048
31- std::vector<float > mat;
32- mat.reserve (model.vectorSize () * words.size ());
33- for (size_t j = 0 ; j < words.size (); j++) {
34- // auto p = model.vector(words[j]);
35- auto it = m_map.find (words[j]);
36- if (it != m_map.end ()) {
37- // std::vector<float> vec = *p;
38- std::vector<float > vec = it->second ;
39- mat.insert (mat.end (), vec.begin (), vec.end ());
40- }
41- }
49+ Rcpp::NumericMatrix as_matrix (w2v::word2vec_t model, w2v::corpus_t corpus) {
4250
43- Rcpp::NumericMatrix mat_ (model.vectorSize (), words.size (), mat.begin ());
44- colnames (mat_) = encode (words);
51+ std::vector<float > mat = model.trainMatrix ();
52+ Rcpp::NumericMatrix mat_ (model.vectorSize (), corpus.types .size (), mat.begin ());
53+ colnames (mat_) = encode (corpus.types );
4554 return Rcpp::transpose (mat_);
4655}
4756
@@ -155,7 +164,8 @@ Rcpp::List cpp_w2v(Rcpp::List texts_,
155164 Rprintf (" ...complete\n " );
156165
157166 Rcpp::List out = Rcpp::List::create (
158- Rcpp::Named (" model" ) = as_matrix (word2vec),
167+ // Rcpp::Named("model") = as_matrix(word2vec),
168+ Rcpp::Named (" model" ) = as_matrix (word2vec, corpus),
159169 // Rcpp::Named("model") = model,
160170 // Rcpp::Named("vocabulary") = types.size(),
161171 // Rcpp::Named("success") = success,
0 commit comments