Skip to content

Commit bc79770

Browse files
authored
Rename oneMKL to oneMath and improve the lesson/exercise (#387)
Change the directory name, file names, and all references to oneMKL to the new name oneMath. Replace the global queue::wait calls with proper synchronisation, which is automatic in case of buffers and explicitly passing sycl::events into APIs with USM.
1 parent 51b5012 commit bc79770

14 files changed

+1982
-286
lines changed

Code_Exercises/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,5 +66,5 @@ add_subdirectory(Local_Memory_Tiling)
6666
add_subdirectory(Work_Group_Sizes)
6767
add_subdirectory(Matrix_Transpose)
6868
add_subdirectory(Functors)
69-
add_subdirectory(OneMKL_gemm)
69+
add_subdirectory(oneMath_gemm)
7070
add_subdirectory(More_SYCL_Features)

Code_Exercises/OneMKL_gemm/CMakeLists.txt

-22
This file was deleted.

Code_Exercises/OneMKL_gemm/README.md

-27
This file was deleted.
+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#[[
2+
SYCL Academy (c)
3+
4+
SYCL Academy is licensed under a Creative Commons Attribution-ShareAlike 4.0
5+
International License.
6+
7+
You should have received a copy of the license along with this work. If not,
8+
see <http://creativecommons.org/licenses/by-sa/4.0/>.
9+
]]
10+
11+
add_sycl_executable(oneMath_gemm source_onemath_usm_gemm)
12+
add_sycl_executable(oneMath_gemm source_onemath_buffer_gemm)
13+
14+
target_link_libraries(oneMath_gemm_source_onemath_usm_gemm PUBLIC -lonemath)
15+
target_link_libraries(oneMath_gemm_source_onemath_buffer_gemm PUBLIC -lonemath)
16+
if(SYCL_ACADEMY_ENABLE_SOLUTIONS)
17+
add_sycl_executable(oneMath_gemm solution_onemath_usm_gemm)
18+
add_sycl_executable(oneMath_gemm solution_onemath_buffer_gemm)
19+
20+
target_link_libraries(oneMath_gemm_solution_onemath_usm_gemm PUBLIC -lonemath)
21+
target_link_libraries(oneMath_gemm_solution_onemath_buffer_gemm PUBLIC -lonemath)
22+
endif()

Code_Exercises/oneMath_gemm/README.md

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# SYCL Academy
2+
3+
## Exercise 11: Using the oneMath library for matrix multiplication
4+
---
5+
6+
In this exercise you will learn how to use the API of the oneMath library and
7+
perform a matrix multiplication using the GEMM routines.
8+
9+
The source code provides a template to perform GEMM using oneMath's USM/buffer
10+
API. Please refer to the API here:
11+
https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/domains/blas/gemm
12+
13+
---
14+
## Exercise `oneMath_usm_gemm/source.cpp`
15+
16+
The `source_*.cpp` already include all the code to create input matrices and
17+
compute a reference result serially on host. The exercise is to fill in the
18+
sections marked with "TODO" comments to perform GEMM on a device using oneMath.
19+
20+
## Build and execution hints
21+
22+
To run the example: ./oneMath_usm_gemm_solution (or) ./oneMath_usm_gemm_source
23+
To verify with CUBLAS debug info, `export CUBLAS_LOGINFO_DB=1` and `export CUBLAS_LOGDEST_DBG=stdout`
24+
25+
For DevCloud via JupiterLab follow these [instructions](../devcloudJupyter.md).
26+
27+
For DPC++: [instructions](../dpcpp.md).
28+
29+
For AdaptiveCpp: [instructions](../adaptivecpp.md).

Code_Exercises/OneMKL_gemm/solution_onemkl_buffer_gemm.cpp Code_Exercises/oneMath_gemm/solution_onemath_buffer_gemm.cpp

+24-22
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,20 @@
77
You should have received a copy of the license along with this
88
work. If not, see <http://creativecommons.org/licenses/by-sa/4.0/>.
99
10-
SYCL Quick Reference
10+
Quick Reference
1111
~~~~~~~~~~~~~~~~~~~~
1212
13-
// oneMKL APIs:
14-
https://spec.oneapi.io/versions/latest/elements/oneMKL/source/domains/blas/gemm.html#onemkl-blas-gemm
13+
oneMath execution model:
14+
https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/architecture/architecture
1515
16-
// DGEMM:
17-
https://www.intel.com/content/www/us/en/docs/onemkl/tutorial-c/2021-4/multiplying-matrices-using-dgemm.html
16+
oneMath GEMM API:
17+
https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/domains/blas/gemm
1818
1919
*/
2020

2121
#include <iostream>
2222
#include <limits>
23-
#include <oneapi/mkl/blas.hpp>
23+
#include <oneapi/math.hpp>
2424
#include <random>
2525

2626
#include <sycl/sycl.hpp>
@@ -62,12 +62,12 @@ int VerifyResult(sycl::host_accessor<T, 1>& c_A, T* c_B) {
6262
//////////////////////////////////////////////////////////////////////////////////////////
6363

6464
void print_device_info(sycl::queue& Q) {
65-
std::string sycl_dev_name, sycl_runtime, sycl_driver;
65+
std::string sycl_dev_name, sycl_dev_version, sycl_driver;
6666
sycl_dev_name = Q.get_device().get_info<sycl::info::device::name>();
6767
sycl_driver = Q.get_device().get_info<sycl::info::device::driver_version>();
68-
sycl_runtime = Q.get_device().get_info<sycl::info::device::version>();
69-
std::cout << "Running on " << sycl_dev_name.c_str() << ", SYCL runtime: v"
70-
<< sycl_runtime.c_str()
68+
sycl_dev_version = Q.get_device().get_info<sycl::info::device::version>();
69+
std::cout << "Running on " << sycl_dev_name.c_str()
70+
<< ", version: " << sycl_dev_version.c_str()
7171
<< ", driver version: " << sycl_driver.c_str() << std::endl;
7272
}
7373

@@ -117,28 +117,30 @@ int main() {
117117
}
118118
}
119119

120-
// Create a SYCL in-order queue targetting GPU device
121-
sycl::queue Q{sycl::gpu_selector_v, sycl::property::queue::in_order{}};
120+
// Create a SYCL queue
121+
sycl::queue Q;
122122
// Prints some basic info related to the hardware
123123
print_device_info(Q);
124124

125-
// TODO: Allocate memory on device, (using sycl::malloc_device APIs)
126-
// Creating 1D buffers for matrices which are bound to host memory array
125+
// Create 1D buffers for matrices which are bound to host memory arrays
127126
sycl::buffer<T, 1> a{A.data(), sycl::range<1>{M * N}};
128127
sycl::buffer<T, 1> b{B.data(), sycl::range<1>{N * P}};
129128
sycl::buffer<T, 1> c{C_host.data(), sycl::range<1>{M * P}};
130129

131-
// TODO: Use oneMKL GEMM USM API
132-
oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans;
133-
oneapi::mkl::transpose transB = oneapi::mkl::transpose::nontrans;
134-
oneapi::mkl::blas::column_major::gemm(Q, transA, transB, n, m, k, alpha, b,
135-
ldB, a, ldA, beta, c, ldC);
136-
Q.wait();
130+
// Use oneMath GEMM buffer API
131+
oneapi::math::transpose transA = oneapi::math::transpose::nontrans;
132+
oneapi::math::transpose transB = oneapi::math::transpose::nontrans;
133+
oneapi::math::blas::column_major::gemm(Q, transA, transB, n, m, k, alpha, b,
134+
ldB, a, ldA, beta, c, ldC);
135+
136+
// Host accessor ensures synchronisation: a read operation on the accessor
137+
// will wait until all kernels writing to buffer "c" finished executing and
138+
// then copy the data back to host
137139
sycl::host_accessor C_device{c};
138140

139-
// Verify results from oneMKL APIs
141+
// Verify results from oneMath
140142
int result = 0;
141-
std::cout << "Verify results between OneMKL & Serial: ";
143+
std::cout << "Verify results between oneMath & serial: ";
142144
result = VerifyResult(C_device, C_host.data());
143145

144146
return result;

Code_Exercises/OneMKL_gemm/solution_onemkl_usm_gemm.cpp Code_Exercises/oneMath_gemm/solution_onemath_usm_gemm.cpp

+35-28
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,20 @@
77
You should have received a copy of the license along with this
88
work. If not, see <http://creativecommons.org/licenses/by-sa/4.0/>.
99
10-
SYCL Quick Reference
10+
Quick Reference
1111
~~~~~~~~~~~~~~~~~~~~
1212
13-
// oneMKL APIs:
14-
https://spec.oneapi.io/versions/latest/elements/oneMKL/source/domains/blas/gemm.html#onemkl-blas-gemm
13+
oneMath execution model:
14+
https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/architecture/architecture
1515
16-
// DGEMM:
17-
https://www.intel.com/content/www/us/en/docs/onemkl/tutorial-c/2021-4/multiplying-matrices-using-dgemm.html
16+
oneMath GEMM API:
17+
https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/domains/blas/gemm
1818
1919
*/
2020

2121
#include <iostream>
2222
#include <limits>
23-
#include <oneapi/mkl/blas.hpp>
23+
#include <oneapi/math.hpp>
2424
#include <random>
2525

2626
#include <sycl/sycl.hpp>
@@ -62,12 +62,12 @@ int VerifyResult(T* c_A, T* c_B) {
6262
//////////////////////////////////////////////////////////////////////////////////////////
6363

6464
void print_device_info(sycl::queue& Q) {
65-
std::string sycl_dev_name, sycl_runtime, sycl_driver;
65+
std::string sycl_dev_name, sycl_dev_version, sycl_driver;
6666
sycl_dev_name = Q.get_device().get_info<sycl::info::device::name>();
6767
sycl_driver = Q.get_device().get_info<sycl::info::device::driver_version>();
68-
sycl_runtime = Q.get_device().get_info<sycl::info::device::version>();
69-
std::cout << "Running on " << sycl_dev_name.c_str() << ", SYCL runtime: v"
70-
<< sycl_runtime.c_str()
68+
sycl_dev_version = Q.get_device().get_info<sycl::info::device::version>();
69+
std::cout << "Running on " << sycl_dev_name.c_str()
70+
<< ", version: " << sycl_dev_version.c_str()
7171
<< ", driver version: " << sycl_driver.c_str() << std::endl;
7272
}
7373

@@ -117,35 +117,42 @@ int main() {
117117
}
118118
}
119119

120-
// Create a SYCL in-order queue targetting GPU device
121-
sycl::queue Q{sycl::gpu_selector_v, sycl::property::queue::in_order{}};
120+
// Create a SYCL queue
121+
sycl::queue Q;
122122
// Prints some basic info related to the hardware
123123
print_device_info(Q);
124124

125-
// TODO: Allocate memory on device, (using sycl::malloc_device APIs)
125+
// Allocate memory on device, (using sycl::malloc_device APIs)
126126
T* a = sycl::malloc_device<T>((M * N), Q);
127127
T* b = sycl::malloc_device<T>((N * P), Q);
128128
T* c = sycl::malloc_device<T>((M * P), Q);
129-
Q.memcpy(a, A.data(), sizeof(T) * M * N);
130-
Q.memcpy(b, B.data(), sizeof(T) * N * P);
131-
132-
// TODO: Use oneMKL GEMM USM API
133-
oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans;
134-
oneapi::mkl::transpose transB = oneapi::mkl::transpose::nontrans;
135-
oneapi::mkl::blas::column_major::gemm(Q, transA, transB, n, m, k, alpha, b,
136-
ldB, a, ldA, beta, c,
137-
ldC); // row-major
138-
129+
sycl::event eventCopyA = Q.memcpy(a, A.data(), sizeof(T) * M * N);
130+
sycl::event eventCopyB = Q.memcpy(b, B.data(), sizeof(T) * N * P);
131+
132+
// Use oneMath GEMM USM API
133+
oneapi::math::transpose transA = oneapi::math::transpose::nontrans;
134+
oneapi::math::transpose transB = oneapi::math::transpose::nontrans;
135+
// Pass the synchronisation events to ensure GEMM starts after inputs are
136+
// fully copied to the device
137+
sycl::event eventGEMM = oneapi::math::blas::column_major::gemm(
138+
Q, transA, transB, n, m, k, alpha, b, ldB, a, ldA, beta, c, ldC,
139+
{eventCopyA, eventCopyB}); // row-major
140+
141+
// Copy the results from device to host for verification
139142
std::vector<T> C_device(M * P);
140-
Q.memcpy(C_device.data(), c, sizeof(T) * M * P);
141-
Q.wait();
143+
// Pass the synchronisation event for the copy to wait until GEMM is finished
144+
sycl::event eventCopyC =
145+
Q.memcpy(C_device.data(), c, sizeof(T) * M * P, eventGEMM);
146+
147+
// Wait for the copy to finish
148+
eventCopyC.wait();
142149

143-
// Verify results from oneMKL APIs
150+
// Verify results from oneMath
144151
int result = 0;
145-
std::cout << "Verify results between OneMKL & Serial: ";
152+
std::cout << "Verify results between oneMath & serial: ";
146153
result = VerifyResult(C_device.data(), C_host.data());
147154

148-
// TODO: Free memory from device
155+
// Free memory from device
149156
sycl::free(a, Q);
150157
sycl::free(b, Q);
151158
sycl::free(c, Q);

Code_Exercises/OneMKL_gemm/source_onemkl_buffer_gemm.cpp Code_Exercises/oneMath_gemm/source_onemath_buffer_gemm.cpp

+16-17
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,20 @@
77
You should have received a copy of the license along with this
88
work. If not, see <http://creativecommons.org/licenses/by-sa/4.0/>.
99
10-
SYCL Quick Reference
10+
Quick Reference
1111
~~~~~~~~~~~~~~~~~~~~
1212
13-
// oneMKL APIs:
14-
https://spec.oneapi.io/versions/latest/elements/oneMKL/source/domains/blas/gemm.html#onemkl-blas-gemm
13+
oneMath execution model:
14+
https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/architecture/architecture
1515
16-
// DGEMM:
17-
https://www.intel.com/content/www/us/en/docs/onemkl/tutorial-c/2021-4/multiplying-matrices-using-dgemm.html
16+
oneMath GEMM API:
17+
https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/domains/blas/gemm
1818
1919
*/
2020

2121
#include <iostream>
2222
#include <limits>
23-
#include <oneapi/mkl/blas.hpp>
23+
#include <oneapi/math.hpp>
2424
#include <random>
2525

2626
#include <sycl/sycl.hpp>
@@ -62,12 +62,12 @@ int VerifyResult(sycl::host_accessor<T, 1>& c_A, T* c_B) {
6262
//////////////////////////////////////////////////////////////////////////////////////////
6363

6464
void print_device_info(sycl::queue& Q) {
65-
std::string sycl_dev_name, sycl_runtime, sycl_driver;
65+
std::string sycl_dev_name, sycl_dev_version, sycl_driver;
6666
sycl_dev_name = Q.get_device().get_info<sycl::info::device::name>();
6767
sycl_driver = Q.get_device().get_info<sycl::info::device::driver_version>();
68-
sycl_runtime = Q.get_device().get_info<sycl::info::device::version>();
69-
std::cout << "Running on " << sycl_dev_name.c_str() << ", SYCL runtime: v"
70-
<< sycl_runtime.c_str()
68+
sycl_dev_version = Q.get_device().get_info<sycl::info::device::version>();
69+
std::cout << "Running on " << sycl_dev_name.c_str()
70+
<< ", version: " << sycl_dev_version.c_str()
7171
<< ", driver version: " << sycl_driver.c_str() << std::endl;
7272
}
7373

@@ -117,22 +117,21 @@ int main() {
117117
}
118118
}
119119

120-
// Create a SYCL in-order queue targetting GPU device
121-
sycl::queue Q{sycl::gpu_selector_v, sycl::property::queue::in_order{}};
120+
// Create a SYCL queue
121+
sycl::queue Q;
122122
// Prints some basic info related to the hardware
123123
print_device_info(Q);
124124

125-
// TODO: Allocate memory on device
126-
// Creating 1D buffers for matrices which are bound to host memory array
125+
// TODO: Create 1D buffers for matrices which are bound to host memory arrays
127126

128-
// TODO: Use oneMKL GEMM USM API
127+
// TODO: Use oneMath GEMM buffer API
129128

130129
// TODO: Copy the results from device to host for verification
131130
// Hint: Use sycl::host_accessor
132131

133-
// Verify results from oneMKL APIs
132+
// Verify results from oneMath
134133
int result = 0;
135-
std::cout << "Verify results between OneMKL & Serial: ";
134+
std::cout << "Verify results between oneMath & serial: ";
136135
// TODO: Uncomment the following line verify the results
137136
// result = VerifyResult(C_device, C_host);
138137

0 commit comments

Comments
 (0)