Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions projects/rocprofiler-compute/sample/rocflop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ __global__ void matmul_fp32_throughput(float* inputs, vec4<float>* outputs, int
}
#endif // !defined(__gfx906__)

// SMFMAC (Sparse MFMA) instructions are only available on gfx90a and later (not on gfx906 or gfx908)
#if !defined(__gfx906__) && !defined(__gfx908__)
// SMFMAC (Sparse MFMA) instructions are only available on gfx940 and later (not on gfx906, gfx908, or gfx90a)
#if !defined(__gfx906__) && !defined(__gfx908__) && !defined(__gfx90a__)
__global__ void sparse_matmul_fp16_throughput(vec4<float16>* input0, vec8<float16>* input1, vec4<float>* outputs, int count)
{
int grid_size = gridDim.x * blockDim.x;
Expand Down Expand Up @@ -149,7 +149,7 @@ __global__ void sparse_matmul_fp16_throughput(vec4<float16>* input0, vec8<float1

outputs[tid] = accum0 + accum1 + accum2 + accum3;
}
#endif // !defined(__gfx906__) && !defined(__gfx908__)
#endif // !defined(__gfx906__) && !defined(__gfx908__) && !defined(__gfx90a__)

void HIP_CALL(hipError_t err)
{
Expand Down Expand Up @@ -322,7 +322,7 @@ template<typename matT, typename accumT> double matmul_throughput_test(int devic
}
#endif // !defined(__gfx906__)

#if !defined(__gfx906__) && !defined(__gfx908__)
#if !defined(__gfx906__) && !defined(__gfx908__) && !defined(__gfx90a__)
template<typename matT, typename accumT> double sparse_matmul_throughput_test(int device, int count, int runs = 1)
{
const int wave_size = 64;
Expand Down Expand Up @@ -376,7 +376,7 @@ template<typename matT, typename accumT> double sparse_matmul_throughput_test(in

return flops;
}
#endif // !defined(__gfx906__) && !defined(__gfx908__)
#endif // !defined(__gfx906__) && !defined(__gfx908__) && !defined(__gfx90a__)

struct Result {
int device = -1;
Expand Down Expand Up @@ -480,17 +480,17 @@ Result run_tests(int device, int runs, uint32_t mask)
}
#endif

#if !defined(__gfx906__) && !defined(__gfx908__)
#if !defined(__gfx906__) && !defined(__gfx908__) && !defined(__gfx90a__)
if(mask & SMATRIX_FP16) {
// SMFMAC only available on gfx90a (MI200) and later, not on gfx906 or gfx908
if(arch.major == 0x9 && (arch.minor > 0x4 || (arch.minor == 0 && arch.rev >= 0xa))) {
// SMFMAC only available on gfx940 (MI300) and later, not on gfx906, gfx908, or gfx90a
if(arch.major == 0x9 && arch.minor >= 0x4) {
res.smfmac_fp16 = sparse_matmul_throughput_test<float16, float>(device, 4096, runs);
} else {
res.smfmac_fp16 = 0;
}
}
#else
// SMFMAC not available when compiling for gfx906 or gfx908
// SMFMAC not available when compiling for gfx906, gfx908, or gfx90a
if(mask & SMATRIX_FP16) {
res.smfmac_fp16 = 0;
}
Expand Down