Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions tests/test-backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,50 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
}
}

// generate an F16 mask where certain blocks are randomly masked with -INF value
static void init_tensor_kq_mask(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
GGML_ASSERT(tensor->type == GGML_TYPE_F16);

GGML_TENSOR_LOCALS( int32_t, ne, tensor, ne);

std::vector<float> data_f32(ne0*ne1*ne2*ne3);
std::vector<ggml_fp16_t> data_f16(ne0*ne1*ne2*ne3);

std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dis(min, max);

for (size_t i = 0; i < data_f32.size(); i++) {
data_f32[i] = dis(gen);
}

// block size
const int blck0 = 128;
const int blck1 = 16;

// number of INF blocks
const int n_inf_blocks = 0.1*(ne0*ne1*ne2*ne3)/(blck0*blck1);

for (int b = 0; b < n_inf_blocks; b++) {
const int p3 = (rd() % ne3);
const int p2 = (rd() % ne2);
const int p1 = (rd() % ne1);
const int p0 = (rd() % ne0);

for (int i1 = 0; i1 < blck1 && p1 + i1 < ne1; i1++) {
const int idx = p3*ne2*ne1*ne0 + p2*ne1*ne0 + (p1 + i1)*ne0 + p0;

for (int i0 = 0; i0 < blck0 && p0 + i0 < ne0; i0++) {
data_f32[idx + i0] = -INFINITY;
}
}
}

ggml_fp32_to_fp16_row(data_f32.data(), data_f16.data(), ne0*ne1*ne2*ne3);

ggml_backend_tensor_set(tensor, data_f16.data(), 0, data_f16.size()*sizeof(ggml_fp16_t));
}

static std::vector<float> tensor_to_float(const ggml_tensor * t) {
std::vector<float> tv;
tv.reserve(ggml_nelements(t));
Expand Down Expand Up @@ -5104,6 +5148,8 @@ struct test_flash_attn_ext : public test_case {
if (strcmp(t->name, "s") == 0) {
// make the sink values more noticable in order to trigger a test failure when the implementation is wrong
init_tensor_uniform(t, -10.0f, 10.0f);
} else if (strcmp(t->name, "m") == 0) {
init_tensor_kq_mask(t);
} else {
init_tensor_uniform(t);
}
Expand Down
Loading