Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@ Please install libnuma package:
git checkout <latest-tag>
# Please make sure torch is installed when run python example
mkdir build && cd build
# Notice: use gcc-13 or higher
cmake ..
# If you see the error "numa.h: No such file or directory", install libnuma first, then build with "CPATH=$CONDA_PATH/include/:$CPATH make -j".
make -j
```
- Using `python setup.py`
Expand Down
2 changes: 2 additions & 0 deletions README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,9 @@ docker run -it \
git checkout <latest-tag>
# 如果使用python示例,请确保已经安装torch。
mkdir build && cd build
# 注意使用gcc-13及以上版本
cmake ..
# 若遇到错误 "numa.h: No such file or directory",需要先安装numa包,然后使用 "CPATH=$CONDA_PATH/include/:$CPATH make -j"完成编译
make -j
```
- 使用 `python setup.py`
Expand Down
4 changes: 2 additions & 2 deletions cmake/xdnn.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ include(ExternalProject)

# cmake-format: off
ExternalProject_Add(xdnn_lib
URL https://github.com/intel/xFasterTransformer/releases/download/IntrinsicGemm/xdnn_v1.5.7.tar.gz
URL_HASH MD5=6cad71df05ef120e058bce28a0a478a8
URL https://github.com/intel/xFasterTransformer/releases/download/IntrinsicGemm/xdnn_v1.5.9.tar.gz
URL_HASH MD5=3aa9cd15df3eb2a7a1c178f3edcf9d37
TIMEOUT 120
SOURCE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/xdnn
CONFIGURE_COMMAND ""
Expand Down
14 changes: 7 additions & 7 deletions src/utils/matmul_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -524,12 +524,12 @@ class MMHelper {

// E4M3
else if constexpr (std::is_same_v<WeiT, e4m3_t>) {
int amx_rows = (int)((K + 15) / 16) * 16;
int amx_cols = (int)((N + 63) / 64) * 64;
if (!weight.isShadow()) weight.Resize(amx_rows, amx_cols);
memset(weight.Data(), 0, sizeof(e4m3_t) * amx_rows * amx_cols);
int packBlkSize = 32;
size_t pack_size = xdnn_small_amx_sgemm_bf16f8bf16_packb_size(K, N, packBlkSize);
if (!weight.isShadow()) weight.Resize((pack_size + N - 1) / N, N);
memset(weight.Data(), 0, sizeof(e4m3_t) * pack_size);
xdnn_small_amx_sgemm_bf16f8bf16_packb(trans, N, K, (const XDNN_E4M3 *)src.Data(), src.Stride(),
(XDNN_E4M3 *)weight.Data(), 64);
(XDNN_E4M3 *)weight.Data(), packBlkSize);
}
}

Expand Down Expand Up @@ -691,7 +691,7 @@ class MMHelper {

// E4M3
else if constexpr (std::is_same_v<WeiT, e4m3_t>) {
if (M <= 16) {
if (true) {
assert(blockSize == 128);
if (lds == -1) lds = (K + 127) / 128;
GEMMVERBOSE("xdnn_gemm_bf16f8bf16_compute",
Expand Down Expand Up @@ -1509,7 +1509,7 @@ class MMHelper {

// E4M3
else if constexpr (std::is_same_v<WeiT, e4m3_t>) {
if (M <= 16) {
if (true) {
assert(blockSize == 128);
if (lds == -1) lds = (K + 127) / 128;
GEMMVERBOSE("xdnn_gemm_bf16f8bf16_compute_residential",
Expand Down