dselivanov
diff --git a/‎.Rbuildignore
+3 b/‎.Rbuildignore
+3
diff --git a/‎.clang-format
+4 b/‎.clang-format
+4
diff --git a/‎.gitignore
+1-4 b/‎.gitignore
+1-4
diff --git a/‎Makefile
+4 b/‎Makefile
+4
diff --git a/‎R/RcppExports.R
+52-52 b/‎R/RcppExports.R
+52-52
diff --git a/‎inst/include/mapped_csc.hpp
+31 b/‎inst/include/mapped_csc.hpp
+31
diff --git a/‎inst/include/mapped_csr.hpp
+38 b/‎inst/include/mapped_csr.hpp
+38
diff --git a/‎inst/include/nnls.hpp
+6-12 b/‎inst/include/nnls.hpp
+6-12
diff --git a/‎inst/include/wrmf.hpp
+17 b/‎inst/include/wrmf.hpp
+17
@@ -9,3 +9,6 @@ docs/
 ^cran-comments\.md$
 .github/
 ^\.github$
+^.clang-format$
+^Makefile$
+extradata/
@@ -0,0 +1,4 @@
+BasedOnStyle:  Google
+DerivePointerAlignment: false
+ColumnLimit: 90
+IncludeBlocks: Preserve
@@ -6,10 +6,7 @@ src/*.o
 src/*.so
 src/*.dll
 .DS_Store
-demo/
-data/lastfm.rds
-docs/papers/
-R/draft/
+extradata/
 
 *.status
 config.log
 
@@ -0,0 +1,4 @@
+clang_format=`which clang-format`
+
+format: $(shell find . -name *.hpp) $(shell find . -type f \( -iname "*.cpp" ! -iname "RcppExports.cpp" \))
+	@${clang_format} -i $?
@@ -1,30 +1,6 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
-check_is_seq <- function(indices) {
-    .Call(`_rsparse_check_is_seq`, indices)
-}
-
-copy_csr_rows <- function(indptr, indices, values, rows_take) {
-    .Call(`_rsparse_copy_csr_rows`, indptr, indices, values, rows_take)
-}
-
-copy_csr_rows_col_seq <- function(indptr, indices, values, rows_take, cols_take) {
-    .Call(`_rsparse_copy_csr_rows_col_seq`, indptr, indices, values, rows_take, cols_take)
-}
-
-copy_csr_arbitrary <- function(indptr, indices, values, rows_take, cols_take) {
-    .Call(`_rsparse_copy_csr_arbitrary`, indptr, indices, values, rows_take, cols_take)
-}
-
-get_ftrl_weights <- function(R_model) {
-    .Call(`_rsparse_get_ftrl_weights`, R_model)
-}
-
-ftrl_partial_fit <- function(m, y, R_model, weights, do_update = 1L, n_threads = 1L) {
-    .Call(`_rsparse_ftrl_partial_fit`, m, y, R_model, weights, do_update, n_threads)
-}
-
 fm_create_param <- function(learning_rate_w, learning_rate_v, rank, lambda_w, lambda_v, w0_R, w_R, v_R, grad_w2_R, grad_v2_R, task, intercept) {
     .Call(`_rsparse_fm_create_param`, learning_rate_w, learning_rate_v, rank, lambda_w, lambda_v, w0_R, w_R, v_R, grad_w2_R, grad_v2_R, task, intercept)
 }
@@ -57,6 +33,14 @@ is_invalid_ptr <- function(sexp_ptr) {
     .Call(`_rsparse_is_invalid_ptr`, sexp_ptr)
 }
 
+get_ftrl_weights <- function(R_model) {
+    .Call(`_rsparse_get_ftrl_weights`, R_model)
+}
+
+ftrl_partial_fit <- function(m, y, R_model, weights, do_update = 1L, n_threads = 1L) {
+    .Call(`_rsparse_ftrl_partial_fit`, m, y, R_model, weights, do_update, n_threads)
+}
+
 cpp_glove_create <- function(params) {
     .Call(`_rsparse_cpp_glove_create`, params)
 }
@@ -65,36 +49,40 @@ cpp_glove_partial_fit <- function(ptr, x_irow, x_icol, x_val, iter_order, n_thre
     .Call(`_rsparse_cpp_glove_partial_fit`, ptr, x_irow, x_icol, x_val, iter_order, n_threads)
 }
 
-csr_dense_tcrossprod <- function(x_csr_r, y_transposed, num_threads = 1L) {
-    .Call(`_rsparse_csr_dense_tcrossprod`, x_csr_r, y_transposed, num_threads)
+arma_kmeans <- function(x, k, seed_mode, n_iter, verbose, result) {
+    .Call(`_rsparse_arma_kmeans`, x, k, seed_mode, n_iter, verbose, result)
 }
 
-dense_csc_prod <- function(x_r, y_csc_r, num_threads = 1L) {
-    .Call(`_rsparse_dense_csc_prod`, x_r, y_csc_r, num_threads)
+check_is_seq <- function(indices) {
+    .Call(`_rsparse_check_is_seq`, indices)
 }
 
-als_implicit_double <- function(m_csc_r, X, Y, XtX, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row) {
-    .Call(`_rsparse_als_implicit_double`, m_csc_r, X, Y, XtX, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row)
+copy_csr_rows <- function(indptr, indices, values, rows_take) {
+    .Call(`_rsparse_copy_csr_rows`, indptr, indices, values, rows_take)
 }
 
-als_implicit_float <- function(m_csc_r, X_, Y_, XtX_, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row) {
-    .Call(`_rsparse_als_implicit_float`, m_csc_r, X_, Y_, XtX_, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row)
+copy_csr_rows_col_seq <- function(indptr, indices, values, rows_take, cols_take) {
+    .Call(`_rsparse_copy_csr_rows_col_seq`, indptr, indices, values, rows_take, cols_take)
 }
 
-als_explicit_double <- function(m_csc_r, X, Y, cnt_X, lambda, n_threads, solver, cg_steps, dynamic_lambda, with_biases, is_x_bias_last_row) {
-    .Call(`_rsparse_als_explicit_double`, m_csc_r, X, Y, cnt_X, lambda, n_threads, solver, cg_steps, dynamic_lambda, with_biases, is_x_bias_last_row)
+copy_csr_arbitrary <- function(indptr, indices, values, rows_take, cols_take) {
+    .Call(`_rsparse_copy_csr_arbitrary`, indptr, indices, values, rows_take, cols_take)
 }
 
-als_explicit_float <- function(m_csc_r, X_, Y_, cnt_X_, lambda, n_threads, solver, cg_steps, dynamic_lambda, with_biases, is_x_bias_last_row) {
-    .Call(`_rsparse_als_explicit_float`, m_csc_r, X_, Y_, cnt_X_, lambda, n_threads, solver, cg_steps, dynamic_lambda, with_biases, is_x_bias_last_row)
+csr_dense_tcrossprod <- function(x_csr_r, y_transposed, num_threads = 1L) {
+    .Call(`_rsparse_csr_dense_tcrossprod`, x_csr_r, y_transposed, num_threads)
 }
 
-initialize_biases_double <- function(m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias = FALSE) {
-    .Call(`_rsparse_initialize_biases_double`, m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias)
+dense_csc_prod <- function(x_r, y_csc_r, num_threads = 1L) {
+    .Call(`_rsparse_dense_csc_prod`, x_r, y_csc_r, num_threads)
 }
 
-initialize_biases_float <- function(m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias = FALSE) {
-    .Call(`_rsparse_initialize_biases_float`, m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias)
+top_product <- function(x, y, k, n_threads, not_recommend_r, exclude, glob_mean = 0.) {
+    .Call(`_rsparse_top_product`, x, y, k, n_threads, not_recommend_r, exclude, glob_mean)
+}
+
+c_nnls_double <- function(x, y, max_iter, rel_tol) {
+    .Call(`_rsparse_c_nnls_double`, x, y, max_iter, rel_tol)
 }
 
 rankmf_solver_double <- function(x_r, W, H, W2_grad, H2_grad, user_features_r, item_features_r, rank, n_updates, learning_rate = 0.01, gamma = 1, lambda_user = 0.0, lambda_item_positive = 0.0, lambda_item_negative = 0.0, n_threads = 1L, update_items = TRUE, loss = 0L, kernel = 0L, max_negative_samples = 50L, margin = 0.1, optimizer = 0L, report_progress = 10L) {
@@ -105,18 +93,6 @@ rankmf_solver_float <- function(x_r, W, H, W2_grad, H2_grad, user_features_r, it
     invisible(.Call(`_rsparse_rankmf_solver_float`, x_r, W, H, W2_grad, H2_grad, user_features_r, item_features_r, rank, n_updates, learning_rate, gamma, lambda_user, lambda_item_positive, lambda_item_negative, n_threads, update_items, loss, kernel, max_negative_samples, margin, optimizer, report_progress))
 }
 
-top_product <- function(x, y, k, n_threads, not_recommend_r, exclude, glob_mean = 0.) {
-    .Call(`_rsparse_top_product`, x, y, k, n_threads, not_recommend_r, exclude, glob_mean)
-}
-
-arma_kmeans <- function(x, k, seed_mode, n_iter, verbose, result) {
-    .Call(`_rsparse_arma_kmeans`, x, k, seed_mode, n_iter, verbose, result)
-}
-
-c_nnls_double <- function(x, y, max_iter, rel_tol) {
-    .Call(`_rsparse_c_nnls_double`, x, y, max_iter, rel_tol)
-}
-
 omp_thread_count <- function() {
     .Call(`_rsparse_omp_thread_count`)
 }
@@ -133,3 +109,27 @@ deep_copy <- function(x) {
     .Call(`_rsparse_deep_copy`, x)
 }
 
+als_explicit_double <- function(m_csc_r, X, Y, cnt_X, lambda, n_threads, solver, cg_steps, dynamic_lambda, with_biases, is_x_bias_last_row) {
+    .Call(`_rsparse_als_explicit_double`, m_csc_r, X, Y, cnt_X, lambda, n_threads, solver, cg_steps, dynamic_lambda, with_biases, is_x_bias_last_row)
+}
+
+als_explicit_float <- function(m_csc_r, X_, Y_, cnt_X_, lambda, n_threads, solver, cg_steps, dynamic_lambda, with_biases, is_x_bias_last_row) {
+    .Call(`_rsparse_als_explicit_float`, m_csc_r, X_, Y_, cnt_X_, lambda, n_threads, solver, cg_steps, dynamic_lambda, with_biases, is_x_bias_last_row)
+}
+
+als_implicit_double <- function(m_csc_r, X, Y, XtX, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row) {
+    .Call(`_rsparse_als_implicit_double`, m_csc_r, X, Y, XtX, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row)
+}
+
+als_implicit_float <- function(m_csc_r, X_, Y_, XtX_, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row) {
+    .Call(`_rsparse_als_implicit_float`, m_csc_r, X_, Y_, XtX_, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row)
+}
+
+initialize_biases_double <- function(m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias = FALSE) {
+    .Call(`_rsparse_initialize_biases_double`, m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias)
+}
+
+initialize_biases_float <- function(m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias = FALSE) {
+    .Call(`_rsparse_initialize_biases_float`, m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias)
+}
+
@@ -0,0 +1,31 @@
+#ifndef MAPPED_CSC_HPP
+#define MAPPED_CSC_HPP
+
+#include <stddef.h>
+#include <armadillo>
+#include <cstdint>
+
+template <typename T>
+class MappedCSC {
+ public:
+  MappedCSC();
+  MappedCSC(arma::uword n_rows, arma::uword n_cols, size_t nnz, arma::uword* row_indices,
+            arma::uword* col_ptrs, T* values)
+      : n_rows(n_rows),
+        n_cols(n_cols),
+        nnz(nnz),
+        row_indices(row_indices),
+        col_ptrs(col_ptrs),
+        values(values){};
+  const arma::uword n_rows;
+  const arma::uword n_cols;
+  const size_t nnz;
+  arma::uword* row_indices;
+  arma::uword* col_ptrs;
+  T* values;
+};
+
+using dMappedCSC = MappedCSC<double>;
+using fMappedCSC = MappedCSC<float>;
+
+#endif /* MAPPED_CSC_HPP */
@@ -0,0 +1,38 @@
+#ifndef MAPPED_CSR_HPP
+#define MAPPED_CSR_HPP
+
+#include <stddef.h>
+#include <armadillo>
+#include <cstdint>
+
+template <typename T>
+class MappedCSR {
+ public:
+  MappedCSR();
+  MappedCSR(arma::uword n_rows, arma::uword n_cols, size_t nnz, arma::uword* col_indices,
+            arma::uword* row_ptrs, T* values)
+      : n_rows(n_rows),
+        n_cols(n_cols),
+        nnz(nnz),
+        col_indices(col_indices),
+        row_ptrs(row_ptrs),
+        values(values){};
+  const arma::uword n_rows;
+  const arma::uword n_cols;
+  const size_t nnz;
+  arma::uword* col_indices;
+  arma::uword* row_ptrs;
+  T* values;
+  std::pair<arma::uvec, arma::Col<T>> get_row(const arma::uword i) const {
+    const arma::uword p1 = this->row_ptrs[i];
+    const arma::uword p2 = this->row_ptrs[i + 1];
+    const arma::uvec idx = arma::uvec(&this->col_indices[p1], p2 - p1, false, true);
+    const arma::Col<T> values = arma::Col<T>(&this->values[p1], p2 - p1, false, true);
+    return (std::pair<arma::uvec, arma::Col<T>>(idx, values));
+  };
+};
+
+using dMappedCSR = MappedCSR<double>;
+using fMappedCSR = MappedCSR<float>;
+
+#endif /* MAPPED_CSR_HPP */
@@ -3,11 +3,9 @@
 #define EPS 1e-16
 
 template <class T>
-arma::Col<T> scd_ls_update(const arma::Mat<T> &XtX,
-                   arma::Col<T> &mu,
-                   arma::uword max_iter,
-                   double rel_tol,
-                   const arma::Col<T> &initial) {
+arma::Col<T> scd_ls_update(const arma::Mat<T>& XtX, arma::Col<T>& mu,
+                           arma::uword max_iter, double rel_tol,
+                           const arma::Col<T>& initial) {
   arma::Col<T> res = initial;
   T rel_diff, old_value, new_value, diff;
   const arma::Col<T> XtX_diag = XtX.diag();
@@ -16,7 +14,7 @@ arma::Col<T> scd_ls_update(const arma::Mat<T> &XtX,
     for (auto k = 0; k < XtX.n_cols; k++) {
       old_value = res(k);
       new_value = old_value - mu(k) / XtX_diag(k);
-      if(new_value < 0) new_value = 0;
+      if (new_value < 0) new_value = 0;
       diff = new_value - old_value;
       if (diff != 0) {
         res(k) = new_value;
@@ -31,12 +29,8 @@ arma::Col<T> scd_ls_update(const arma::Mat<T> &XtX,
 }
 
 template <class T>
-arma::Col<T> c_nnls(const arma::Mat<T> &X,
-                    const arma::Col<T> &y,
-                    const arma::Col<T> &init,
-                    arma::uword max_iter,
-                    double rel_tol) {
-
+arma::Col<T> c_nnls(const arma::Mat<T>& X, const arma::Col<T>& y,
+                    const arma::Col<T>& init, arma::uword max_iter, double rel_tol) {
   arma::Mat<T> Xt = X.t();
   arma::Mat<T> XtX = Xt * X;
   // for stability: avoid divided by 0
 
@@ -0,0 +1,17 @@
+#include <armadillo>
+#include "mapped_csc.hpp"
+#include "mapped_csr.hpp"
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#define GRAIN_SIZE 100
+
+#define CHOLESKY 0
+#define CONJUGATE_GRADIENT 1
+#define SEQ_COORDINATE_WISE_NNLS 2
+
+#define SCD_MAX_ITER 10000
+#define SCD_TOL 1e-4
+#define CG_TOL 1e-10