re-arrange omp parallel region to make more efficient memory allocattions (#75)

dselivanov · web-flow · commit 873db8474c09 · 2023-04-18T08:10:23.000+04:00
* re-arrange omp parallel region to make more efficient memory allocations. Related to #72 * optimize R code, avoid double work in transform * ignore bench files * update github actions * fix accidentally introduced segfault * run CI only for master * - update readme - update NEWS * simplify r cmd check options
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -14,3 +14,4 @@ docs/
 extradata/
 revdep/
 ^CRAN-SUBMISSION$
+bench/
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -1,28 +1,45 @@
-# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
-# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches:
-      - master
+    branches: [master]
   pull_request:
-    branches:
-      - master
+    branches: [master]
 
 name: R-CMD-check
 
 jobs:
   R-CMD-check:
-    runs-on: macOS-latest
+    runs-on:  ubuntu-latest
+
+    name: (${{ matrix.config.r }})
+
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - {r: 'devel'}
+          # minimal required R version
+          - {r: '3.6.0'}
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      R_KEEP_PKG_SOURCE: yes
+
     steps:
-      - uses: actions/checkout@v2
-      - uses: r-lib/actions/setup-r@v1
-      - name: Install dependencies
-        run: |
-          install.packages(c("remotes", "rcmdcheck", "Matrix"))
-          remotes::install_deps(dependencies = TRUE)
-        shell: Rscript {0}
-      - name: Check
-        run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error")
-        shell: Rscript {0}
+      - uses: actions/checkout@v3
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: ${{ matrix.config.r }}
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::rcmdcheck, any::Matrix
+          needs: check
+
+      - uses: r-lib/actions/check-r-package@v2
+        with:
+          upload-snapshots: true
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -1,47 +1,31 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches:
-      - master
+    branches: [master]
   pull_request:
-    branches:
-      - master
+    branches: [master]
 
 name: test-coverage
 
 jobs:
   test-coverage:
-    runs-on: macOS-latest
+    runs-on: ubuntu-latest
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
-    steps:
-
-      - uses: actions/checkout@v2
-
-      - uses: r-lib/actions/setup-r@master
-
-      - uses: r-lib/actions/setup-pandoc@master
 
-      - name: Query dependencies
-        run: |
-          install.packages('remotes')
-          saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
-          writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
-        shell: Rscript {0}
+    steps:
+      - uses: actions/checkout@v3
 
-      - name: Cache R packages
-        uses: actions/cache@v1
+      - uses: r-lib/actions/setup-r@v2
         with:
-          path: ${{ env.R_LIBS_USER }}
-          key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
-          restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
+          use-public-rspm: true
 
-      - name: Install dependencies
-        run: |
-          install.packages(c("remotes", "Matrix"))
-          remotes::install_deps(dependencies = TRUE)
-          remotes::install_cran("covr")
-        shell: Rscript {0}
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::covr, any::Matrix
+          needs: coverage
 
       - name: Test coverage
-        run: covr::codecov()
+        run: covr::codecov(quiet = FALSE)
         shell: Rscript {0}
diff --git a/.gitignore b/.gitignore
@@ -13,3 +13,4 @@ autom4te.cache
 src/Makevars
 revdep
 .Rprofile
+bench/
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
+# rsparse dev
+- faster WRMF solver see #72, #75
+- updated github actions
+
 # rsparse 0.5.1 (2022-09-11)
 - update `configure` script, thanks to @david-cortes, see #73
 - minor fixes in WRMF
diff --git a/R/model_WRMF.R b/R/model_WRMF.R
@@ -180,10 +180,15 @@ WRMF = R6::R6Class(
           RhpcBLASctl::blas_set_num_threads(blas_threads_keep)
         })
       }
-
+      logger$debug("converting input user-item matrix")
       c_ui = MatrixExtra::as.csc.matrix(x)
+      # c_ui = as(x, "CsparseMatrix")
+      logger$debug("pre-processing input")
       c_ui = private$preprocess(c_ui)
-      c_iu = MatrixExtra::t_shallow(MatrixExtra::as.csr.matrix(x))
+      logger$debug("creating item-user matrix")
+      c_iu = MatrixExtra::t_shallow(MatrixExtra::as.csr.matrix(c_ui))
+      # c_iu = t(c_ui)
+      logger$debug("created item-user matrix")
       # store item_ids in order to use them in predict method
       private$item_ids = colnames(c_ui)
 
@@ -195,7 +200,7 @@ WRMF = R6::R6Class(
       n_user = nrow(c_ui)
       n_item = ncol(c_ui)
 
-      logger$trace("initializing U")
+      logger$debug("initializing U")
       if (private$precision == "double") {
         private$U = large_rand_matrix(private$rank, n_user)
         # for item biases
@@ -210,7 +215,7 @@ WRMF = R6::R6Class(
       }
 
       if (is.null(self$components)) {
-
+        logger$debug("initializing components")
         if (private$solver_code == 1L) { ### <- cholesky
           if (private$precision == "double") {
             self$components = matrix(0, private$rank, n_item)
@@ -331,6 +336,7 @@ WRMF = R6::R6Class(
 
         loss_prev_iter = loss
       }
+      logger$debug("solver finished")
 
       if (private$precision == "double")
         data.table::setattr(self$components, "dimnames", list(NULL, colnames(x)))
@@ -341,12 +347,16 @@ WRMF = R6::R6Class(
       rank_ = ifelse(private$with_user_item_bias, private$rank - 1L, private$rank)
       ridge = fl(diag(x = private$lambda, nrow = rank_, ncol = rank_))
       XX = if (private$with_user_item_bias) self$components[-1L, , drop = FALSE] else self$components
+
+      RhpcBLASctl::blas_set_num_threads(RhpcBLASctl::get_num_cores())
       private$XtX = tcrossprod(XX) + ridge
+      RhpcBLASctl::blas_set_num_threads(1)
 
       # call extra transform to ensure results from transform() and fit_transform()
       # are the same (due to avoid_cg, etc)
       # this adds some extra computation, but not a big deal though
-      self$transform(x)
+      # self$transform(x)
+      private$transform_(c_iu, ...)
     },
     # project new users into latent user space - just make ALS step given fixed items matrix
     #' @description create user embeddings for new input
@@ -366,6 +376,41 @@ WRMF = R6::R6Class(
         x = MatrixExtra::t_shallow(x)
       }
 
+      x = private$preprocess(x)
+
+      if (self$global_bias != 0. && private$feedback == "explicit")
+        x@x = x@x - self$global_bias
+
+      private$transform_(x, ...)
+    }
+  ),
+  #### private -----
+  private = list(
+    solver_code = NULL,
+    cg_steps = NULL,
+    scorers = NULL,
+    lambda = NULL,
+    dynamic_lambda = FALSE,
+    rank = NULL,
+    non_negative = NULL,
+    cnt_u = NULL,
+    # user factor matrix = rank * n_users
+    U = NULL,
+    # item factor matrix = rank * n_items
+    I = NULL,
+    # preprocess - transformation of input matrix before passing it to ALS
+    # for example we can scale each row or apply log() to values
+    # this is essentially "confidence" transformation from WRMF article
+    preprocess = NULL,
+    feedback = NULL,
+    precision = NULL,
+    XtX = NULL,
+    solver = NULL,
+    with_user_item_bias = NULL,
+    with_global_bias = NULL,
+    init_user_item_bias = NULL,
+    transform_ = function(x, ...) {
+      logger$debug('starting transform')
       if (private$feedback == "implicit" ) {
         logger$trace("WRMF$transform(): calling `RhpcBLASctl::blas_set_num_threads(1)` (to avoid thread contention)")
         blas_threads_keep = RhpcBLASctl::blas_get_num_procs()
@@ -375,11 +420,6 @@ WRMF = R6::R6Class(
           RhpcBLASctl::blas_set_num_threads(blas_threads_keep)
         })
       }
-
-      x = private$preprocess(x)
-      if (self$global_bias != 0. && private$feedback == "explicit")
-        x@x = x@x - self$global_bias
-
       if (private$precision == "double") {
         res = matrix(0, nrow = private$rank, ncol = ncol(x))
       } else {
@@ -389,7 +429,7 @@ WRMF = R6::R6Class(
       if (private$with_user_item_bias) {
         res[1, ] = if(private$precision == "double") 1.0 else float::fl(1.0)
       }
-
+      logger$debug('starting transform solver')
       loss = private$solver(
         x,
         self$components,
@@ -399,42 +439,17 @@ WRMF = R6::R6Class(
         cnt_X = private$cnt_u,
         avoid_cg = TRUE
       )
+      logger$debug('finished transform solver')
 
       res = t(res)
 
       if (private$precision == "double")
         setattr(res, "dimnames", list(colnames(x), NULL))
       else
         setattr(res@Data, "dimnames", list(colnames(x), NULL))
-
+      logger$debug('finished transform')
       res
     }
-  ),
-  #### private -----
-  private = list(
-    solver_code = NULL,
-    cg_steps = NULL,
-    scorers = NULL,
-    lambda = NULL,
-    dynamic_lambda = FALSE,
-    rank = NULL,
-    non_negative = NULL,
-    cnt_u = NULL,
-    # user factor matrix = rank * n_users
-    U = NULL,
-    # item factor matrix = rank * n_items
-    I = NULL,
-    # preprocess - transformation of input matrix before passing it to ALS
-    # for example we can scale each row or apply log() to values
-    # this is essentially "confidence" transformation from WRMF article
-    preprocess = NULL,
-    feedback = NULL,
-    precision = NULL,
-    XtX = NULL,
-    solver = NULL,
-    with_user_item_bias = NULL,
-    with_global_bias = NULL,
-    init_user_item_bias = NULL
   )
 )
 
@@ -465,7 +480,9 @@ als_implicit = function(
     } else {
       XX = X
     }
+    RhpcBLASctl::blas_set_num_threads(RhpcBLASctl::get_num_cores())
     XtX = tcrossprod(XX) + ridge
+    RhpcBLASctl::blas_set_num_threads(1)
   }
   if (is.null(global_bias_base)) {
     global_bias_base = numeric()
diff --git a/README.md b/README.md
@@ -11,13 +11,6 @@
 
 We've paid some attention to the implementation details - we try to avoid data copies, utilize multiple threads via OpenMP and use SIMD where appropriate. Package **allows to work on datasets with millions of rows and millions of columns**.
 
-
-### Support 
-
-Please reach us if you need **commercial support** - [hello@rexy.ai](mailto:hello@rexy.ai).
-
-
-
 # Features
 
 ### Classification/Regression
diff --git a/inst/include/wrmf_implicit.hpp b/inst/include/wrmf_implicit.hpp