Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WiP] SRHTs #122

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Changes from 2 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
2872d1f
init-commit, initial `TrigSkOp` structure
aryamanjeendgar Oct 1, 2024
7103a00
Cleaning
aryamanjeendgar Oct 1, 2024
5c1572d
Commiting old changes for documenting
aryamanjeendgar Oct 7, 2024
a604909
`A <- (\Pi D) A`, for `ColMajor` A
aryamanjeendgar Oct 9, 2024
1caaffe
`A <- (\Pi H D) A` for `A: (mxn)` and `ColMajor`
aryamanjeendgar Oct 9, 2024
30c32f3
{L|R}`RowMajor`, {R}`RowMajor`, implicit padding
aryamanjeendgar Oct 9, 2024
e601309
All variations of $H_n$ implemented
aryamanjeendgar Oct 10, 2024
7b8b134
Cleaned code with `fht_dispatch`
aryamanjeendgar Oct 10, 2024
5f72995
Generalizing `applyDiagonalRademacher`
aryamanjeendgar Oct 10, 2024
3bd2361
Generalized `permuteRowsToTop`
aryamanjeendgar Oct 10, 2024
eec1813
Added `permuteColsToLeft` && `rmiget`
aryamanjeendgar Oct 10, 2024
10470bb
Removed old `TrigSkOp` code
aryamanjeendgar Oct 10, 2024
b2aabf8
First review
aryamanjeendgar Oct 12, 2024
bc33741
Some stylistic cleanups
aryamanjeendgar Oct 13, 2024
348bf05
Templating numeric precision in FHT implementation
aryamanjeendgar Oct 13, 2024
f13086b
Corrected ommited templating + `int->int64_t`
aryamanjeendgar Oct 13, 2024
884f96e
First version of tests, VERY UNSTABLE
aryamanjeendgar Oct 16, 2024
018acd8
Hadamard && Diag tests working
aryamanjeendgar Oct 17, 2024
0cfdcf1
Final cleanups, all tests working
aryamanjeendgar Oct 17, 2024
591522f
Added in invSRHT tests
aryamanjeendgar Oct 17, 2024
8b04df1
Cleaning up comments and stylistic changes
aryamanjeendgar Oct 17, 2024
6b7101c
Review
aryamanjeendgar Oct 23, 2024
ea2c068
Merge branch 'BallisticLA:main' into SRHT
aryamanjeendgar Oct 23, 2024
0bcd76e
`HadamardMixingOp`, `lmiget|rmiget` -> `miget`
aryamanjeendgar Oct 25, 2024
6da1463
Removed comments
aryamanjeendgar Oct 25, 2024
6ade616
BIG, removes Eigen-test dep. && review
aryamanjeendgar Nov 4, 2024
09ea44d
Simplifying correctness test for `diag`
aryamanjeendgar Nov 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
315 changes: 315 additions & 0 deletions RandBLAS/trig_skops.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
#include "RandBLAS/base.hh"
#include "RandBLAS/exceptions.hh"
#include "RandBLAS/random_gen.hh"
#include <RandBLAS/sparse_skops.hh>

#include <Random123/philox.h>
#include <blas.hh>
#include <lapack.hh>
#include <omp.h>
aryamanjeendgar marked this conversation as resolved.
Show resolved Hide resolved

#include <iostream>
#include <stdio.h>
#include <stdexcept>
#include <string>
#include <tuple>
#include <random>
aryamanjeendgar marked this conversation as resolved.
Show resolved Hide resolved

#include <math.h>
#include <typeinfo>

#define MAX(a, b) (((a) < (b)) ? (b) : (a))
#define MIN(a, b) (((a) < (b)) ? (a) : (b))

namespace RandBLAS {

// =============================================================================
/// WARNING: None of the following functions or overloads thereof are part of the
/// public API
///
void generateRademacherVector_r123(std::vector<int64_t> &buff, uint32_t key_seed, uint32_t ctr_seed, int n) {
typedef r123::Philox2x32 RNG;
// std::vector<int64_t> rademacherVector(n);

// Use OpenMP to parallelize the Rademacher vector generation
#pragma omp parallel
{
// Each thread has its own RNG instance
RNG rng;

RNG::key_type k = {{key_seed + omp_get_thread_num()}}; // Unique key for each thread
aryamanjeendgar marked this conversation as resolved.
Show resolved Hide resolved

// Thread-local counter
RNG::ctr_type c;

// Parallel for loop
#pragma omp for
for (int i = 0; i < n; ++i) {
// Set the counter for each random number (unique per thread)
c[0] = ctr_seed + i; // Ensure the counter is unique across threads

// Generate a 2x32-bit random number using the Philox generator
RNG::ctr_type r = rng(c, k);

// Convert the random number into a float in [0, 1) using u01fixedpt
float randValue = r123::u01fixedpt<float>(r.v[0]);

// Convert the float into a Rademacher entry (-1 or 1)
buff[i] = randValue < 0.5 ? -1 : 1;
}
}
}

std::vector<int64_t> generateRademacherVector_parallel(int64_t n) {
std::vector<int64_t> rademacherVec(n);

#pragma omp parallel
{
// Each thread gets its own random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::bernoulli_distribution bernoulli(0.5);

#pragma omp for
for (int64_t i = 0; i < n; ++i) {
rademacherVec[i] = bernoulli(gen) ? 1 : -1;
}
}

return rademacherVec;
}

template<typename T>
void applyDiagonalRademacher(int64_t rows, int64_t cols, T* A) {
std::vector<int64_t> diag = generateRademacherVector_parallel(cols);

for(int col=0; col < cols; col++) {
if(diag[col] > 0)
continue;
blas::scal(rows, diag[col], &A[col * rows], 1);
}
}

template<typename T>
void applyDiagonalRademacher(int64_t rows, int64_t cols, T* A, std::vector<int64_t> &diag) {
//NOTE: Only considers sketching from the left + ColMajor format as of now
for(int col=0; col < cols; col++) {
if(diag[col] > 0)
continue;
blas::scal(rows, diag[col], &A[col * rows], 1);
}
}

// `applyDiagonalRademacher`should have a similar API as `sketch_general`
// Will be called inside of `lskget`
aryamanjeendgar marked this conversation as resolved.
Show resolved Hide resolved
// B <- alpha * diag(Rademacher) * op(A) + beta * B
// `alpha`, `beta` aren't needed and shape(A) == shape(B)
// lda == ldb
template<typename T>
void applyDiagonalRademacher(
blas::Layout layout,
blas::Op opA, // taken from `sketch_general/lskget`
int64_t n, // everything is the same size as `op(A)`: m-by-n
int64_t m,
std::vector<int64_t> &diag,
const T* A, // The data matrix that won't be modified
// int64_t lda,
T* B // The destination matrix
// int64_t ldb
) {
// B <- alpha * diag(Rademacher) * op(A) + beta * B
// `alpha`, `beta` aren't needed
// && shape(A) == shape(B) && lda == ldb && shape({A | B}) = m-by-n
//NOTE: Use `layout` and `opA` for working with RowMajor data
int64_t lda = m;
int64_t ldb = m;

//NOTE: Should the copy be made inside of `applyDiagonalRademacher` or
// should it be inside of `lskget`?
lapack::lacpy(lapack::MatrixType::General, m, n, A, lda, B, ldb);

applyDiagonalRademacher(m, n, B, diag);
}


// `permuteRowsToTop` should just take in `B` as an input
// `B` will be passed in by the user to `sketch_general/lskget` and `permuteRowsToTop` will take in
// the `B` that has been modified by `applyRademacherDiagonal`
// Will also be called inside of `lskget`
template<typename T>
void permuteRowsToTop(int rows, int cols, std::vector<int64_t> selectedRows, T* B, int ldb) {
//NOTE: There should be a similar `permuteColsToLeft`
int top = 0; // Keeps track of the topmost unselected row

// Compute `r` internally
// int64_t r = 100;
// std::vector<int64_t> selectedRows = generateRademacherVector_parallel(r);

for (int selected : selectedRows) {
if (selected != top) {
// Use BLAS swap to swap the entire rows
// Swapping row 'selected' with row 'top'
blas::swap(cols, &B[selected], ldb, &B[top], ldb);
}
top++;
}
}

enum class TrigDistName: char {
Fourier = 'F',

// ---------------------------------------------------------------------------

Hadamard = 'H'
};

struct TrigDist {
const int64_t n_rows;
const int64_t n_cols;

int64_t dim_short;
int64_t dim_long;
aryamanjeendgar marked this conversation as resolved.
Show resolved Hide resolved

const TrigDistName family;

TrigDist(
int64_t n_rows,
int64_t n_cols,
TrigDistName tn = TrigDistName::Hadamard
) : n_rows(n_rows), n_cols(n_cols), family(tn) {
dim_short = MIN(n_rows, n_cols);
dim_long = MAX(n_rows, n_cols);
}
};

template<typename T, SignedInteger sint_t, typename RNG = r123::Philox4x32>
struct TrigSkOp {
using generator = RNG;
using state_type = RNGState<RNG>;
using buffer_type = T;

const int64_t n_rows;
const int64_t n_cols;
int64_t dim_short;
int64_t dim_long;
aryamanjeendgar marked this conversation as resolved.
Show resolved Hide resolved
// int64_t n_sampled;

const TrigDist dist;


const RNGState<RNG> seed_state;

RNGState<RNG> next_state;

const blas::Layout layout;
const bool sketchFromLeft = true;
aryamanjeendgar marked this conversation as resolved.
Show resolved Hide resolved
bool known_filled = false;

T* DiagonalRademacher = nullptr;
T* SampledRows = nullptr;
aryamanjeendgar marked this conversation as resolved.
Show resolved Hide resolved

TrigSkOp(
TrigDist dist,
RNGState<RNG> const &state,
bool known_filled = true
) : n_rows(dist.n_rows), n_cols(dist.n_cols), dist(dist), seed_state(state), known_filled(known_filled), dim_short(dist.dim_short), dim_long(dist.dim_long){
// Memory for Rademacher diagonal gets allocated here
// Number of rows/cols to be sampled gets decided here
// i.e. `n_sampled` gets set
if(sketchFromLeft)
DiagonalRademacher = new T[n_rows];
else
DiagonalRademacher = new T[n_cols];
aryamanjeendgar marked this conversation as resolved.
Show resolved Hide resolved

SampledRows = new sint_t[dim_short];
aryamanjeendgar marked this conversation as resolved.
Show resolved Hide resolved

//TODO: Logic to compute the number of samples that we require `r`
// this can be shown to depend on the maximal row norm of the data matrix
//NOTE: Do not have access to this in the data-oblivious regime --- how
// do we get access?
aryamanjeendgar marked this conversation as resolved.
Show resolved Hide resolved
};

TrigSkOp(
TrigDistName family,
int64_t n_rows,
int64_t n_cols,
uint32_t key
) : n_rows(n_rows), n_cols(n_cols), dist(TrigDist{n_rows, n_cols, family}), seed_state() {};

~TrigSkOp();
aryamanjeendgar marked this conversation as resolved.
Show resolved Hide resolved
};

// Populates the Rademacher diagonal entries and decides the rows/cols to
// be sampled at the end
template<typename T, typename RNG, SignedInteger sint_t>
RandBLAS::RNGState<RNG> fill_trig(
TrigSkOp<T, RNG> &Tr
) {
/**
* Will do the work of filling in the diagonal Rademacher entries
* and selecting the rows/cols to be sampled
*/

auto [ctr, key] = Tr.seed_state;

// Fill in the Rademacher diagonal
if(Tr.sketchFromLeft)
generateRademacherVector_r123(Tr.DiagonalRademacher, key, ctr, Tr.n_rows);
else
generateRademacherVector_r123(Tr.DiagonalRademacher, key, ctr, Tr.n_cols);
aryamanjeendgar marked this conversation as resolved.
Show resolved Hide resolved

//NOTE: Select the rows/cols to be sampled --- use the `repeated_fisher_yates` function
int64_t r = Tr.dim_short;
int64_t d = Tr.dim_long;
aryamanjeendgar marked this conversation as resolved.
Show resolved Hide resolved

std::vector<sint_t> idxs_minor(r); // Placeholder
std::vector<T> vals(r); // Placeholder

RandBLAS::repeated_fisher_yates<T, RNG, sint_t>(
Tr.seed_state,
r, // Number of samples (vec_nnz)
d, // Total number of elements (dim_major)
1, // Single sample round (dim_minor)
Tr.SampledRows, // Holds the required output
idxs_minor.data(), // Placeholder
vals.data() // Placeholder
);
}
}


namespace RandBLAS::trig {

// Performs the actual application of the fast trigonometric transform
// Only called after calling `fill_trig`
template <typename T, typename RNG>
inline void lskget(
blas::Layout layout,
blas::Op opS,
blas::Op opA,
int64_t d, // B is d-by-n
int64_t n, // op(A) is m-by-n
int64_t m, // op(S) is d-by-m
T alpha,
TrigSkOp<T, RNG> &Tr,
int64_t ro_s,
int64_t co_s,
const T* A, // data-matrix
int64_t lda,
T beta,
T* B, // output matrix
int64_t ldb
) {
if (!Tr.known_filled)
fill_trig(Tr);

// Applying the diagonal transformation
applyDiagonalRademacher(layout, opA, n, m, Tr.DiagonalRademacher, A, B);
// applyDiagonalRademacher(m, n, A, Tr.DiagonalRademacher);

//TODO: Apply the Hadamard transform

//... and finally permute the rows
permuteRowsToTop(m, n, Tr.SampledRows, B, ldb);
}
}
Loading