Skip to content

Commit 7397664

Browse files
committed
Init.
1 parent d646a0b commit 7397664

30 files changed

+290
-209
lines changed

tests/python_tests/helpers/param_config.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,8 +286,25 @@ def parametrize(**kwargs: any):
286286
parameters_string = ",".join(parameters)
287287
parameter_values = _params_solve_dependencies(**kwargs)
288288

289+
def generate_id(value_tuple):
290+
"""Generate readable test IDs from parameter values."""
291+
parts = []
292+
for param, value in zip(parameters, value_tuple):
293+
if isinstance(value, InputOutputFormat):
294+
param_value = f"{value.input_format.name}->{value.output_format.name}"
295+
elif hasattr(value, "name"):
296+
param_value = value.name
297+
elif hasattr(value, "value"):
298+
param_value = str(value.value)
299+
else:
300+
param_value = str(value)
301+
parts.append(f"{param}:{param_value}")
302+
return "-".join(parts)
303+
304+
ids = [generate_id(values) for values in parameter_values]
305+
289306
def decorator(test_function):
290-
return pytest.mark.parametrize(parameters_string, parameter_values)(
307+
return pytest.mark.parametrize(parameters_string, parameter_values, ids=ids)(
291308
test_function
292309
)
293310

tests/python_tests/perf_eltwise_binary_sfpu.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,12 @@ def test_perf_eltwise_binary_sfpu_add_top_row(
266266
"DestAccumulation.No is not supported for SfpuAddTopRow on Blackhole"
267267
)
268268

269+
# Skip Float32 with DestAccumulation.Yes - format inference converts to Tf32 which ADD_TOP_ROW doesn't support
270+
if formats.input_format == DataFormat.Float32 and dest_acc == DestAccumulation.Yes:
271+
pytest.skip(
272+
"Float32 with DestAccumulation.Yes is not supported for SfpuAddTopRow (gets converted to Tf32)"
273+
)
274+
269275
unpack_to_dest = (
270276
formats.input_format.is_32_bit() and dest_acc == DestAccumulation.No
271277
)

tests/python_tests/perf_eltwise_unary_sfpu.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
],
4949
mathop=[
5050
MathOperation.Reciprocal,
51+
MathOperation.Rsqrt,
5152
MathOperation.Sqrt,
5253
MathOperation.Silu,
5354
MathOperation.Gelu,

tests/python_tests/test_eltwise_unary_sfpu.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
MathOperation.ReluMax,
6262
MathOperation.ReluMin,
6363
],
64-
dest_acc=[DestAccumulation.No, DestAccumulation.Yes],
64+
dest_acc=[DestAccumulation.No],
6565
)
6666
def test_eltwise_unary_sfpu_float(
6767
formats: list[InputOutputFormat],
@@ -107,6 +107,13 @@ def test_eltwise_unary_sfpu_float(
107107
):
108108
pytest.skip(reason="This combination is not supported on BH architecture")
109109

110+
# Skip Float16_b->Float16 and Bfp8_b->Float16 with DestAccumulation.No
111+
if dest_acc == DestAccumulation.No and formats.output_format == DataFormat.Float16:
112+
if formats.input_format in [DataFormat.Float16_b, DataFormat.Bfp8_b]:
113+
pytest.skip(
114+
reason=f"{formats.input_format.name}->Float16 with DestAccumulation.No is not currently supported"
115+
)
116+
110117
if (
111118
approx_mode == ApproximationMode.Yes
112119
and mathop in [MathOperation.Exp, MathOperation.Exp2, MathOperation.Elu]
@@ -187,6 +194,11 @@ def eltwise_unary_sfpu(
187194
input_dimensions,
188195
)
189196

197+
# If dest_acc is off, we unpack Float32 into 16-bit format in src registers (later copied over in dest reg for SFPU op)
198+
# unpack_to_dest=(
199+
# formats.input_format.is_32_bit() and dest_acc == DestAccumulation.No
200+
# )
201+
190202
configuration = TestConfig(
191203
test_name,
192204
formats,
@@ -207,10 +219,7 @@ def eltwise_unary_sfpu(
207219
tile_count_res=tile_cnt_A,
208220
),
209221
dest_acc=dest_acc,
210-
# If dest_acc is off, we unpack Float32 into 16-bit format in src registers (later copied over in dest reg for SFPU op)
211-
unpack_to_dest=(
212-
formats.input_format.is_32_bit() and dest_acc == DestAccumulation.Yes
213-
),
222+
unpack_to_dest=False,
214223
)
215224

216225
res_from_L1 = configuration.run(workers_tensix_coordinates)

tests/sources/eltwise_binary_sfpu_perf.cpp

Lines changed: 44 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616

1717
// Globals
1818
// Globals
19-
uint32_t unp_cfg_context = 0;
20-
uint32_t pack_sync_tile_dst_ptr = 0;
21-
uint32_t math_sync_tile_dst_index = 0;
22-
static constexpr int MAX_TILES_DEST = is_fp32_dest_acc_en ? 4 : 8;
19+
uint32_t unp_cfg_context = 0;
20+
uint32_t pack_sync_tile_dst_ptr = 0;
21+
uint32_t math_sync_tile_dst_index = 0;
22+
static constexpr ckernel::DstSync DST_SYNC = ckernel::DstSync::SyncHalf;
2323

2424
#ifdef LLK_TRISC_UNPACK
2525

@@ -28,6 +28,9 @@ static constexpr int MAX_TILES_DEST = is_fp32_dest_acc_en ? 4 : 8;
2828

2929
void run_kernel(const volatile struct RuntimeParams* params)
3030
{
31+
const int MAX_TILES_DEST =
32+
is_fp32_dest_acc_en ? (BIT32_DEST_REGISTER_HALF_SIZE / (params->num_faces * FACE_R_DIM)) : (DEST_REGISTER_HALF_SIZE / (params->num_faces * FACE_R_DIM));
33+
3134
{
3235
ZONE_SCOPED("INIT")
3336

@@ -79,11 +82,14 @@ void run_kernel(const volatile struct RuntimeParams* params)
7982

8083
void run_kernel(const volatile struct RuntimeParams* params)
8184
{
85+
const int MAX_TILES_DEST =
86+
is_fp32_dest_acc_en ? (BIT32_DEST_REGISTER_HALF_SIZE / (params->num_faces * FACE_R_DIM)) : (DEST_REGISTER_HALF_SIZE / (params->num_faces * FACE_R_DIM));
87+
8288
{
8389
ZONE_SCOPED("INIT")
8490

8591
_llk_math_eltwise_unary_datacopy_init_<DataCopyType::A2D, is_fp32_dest_acc_en>(params->num_faces, formats.math);
86-
_llk_math_pack_sync_init_<DstSync::SyncHalf, is_fp32_dest_acc_en>();
92+
_llk_math_pack_sync_init_<DST_SYNC, is_fp32_dest_acc_en>();
8793
_llk_math_hw_configure_(formats.math, formats.math);
8894

8995
_llk_math_eltwise_binary_sfpu_init_<SfpuType::add1>();
@@ -98,12 +104,15 @@ void run_kernel(const volatile struct RuntimeParams* params)
98104
{
99105
for (int loop = 0; loop < params->LOOP_FACTOR; ++loop)
100106
{
101-
for (int i = 0; i < params->TILE_CNT; ++i)
107+
for (int block_start = 0; block_start < params->TILE_CNT; block_start += MAX_TILES_DEST)
102108
{
103-
// Only perform synchronization with unpacker, it does not copy
104-
// the data when unpack_to_dest is true - as data is already in dest.
105-
_llk_math_eltwise_unary_datacopy_<DataCopyType::A2D, DstSync::SyncHalf, is_fp32_dest_acc_en, BroadcastType::NONE, unpack_to_dest>(
106-
i, formats.math, formats.math);
109+
int block_tiles = std::min(params->TILE_CNT - block_start, MAX_TILES_DEST);
110+
111+
for (int block_tile = 0; block_tile < block_tiles; ++block_tile)
112+
{
113+
_llk_math_eltwise_unary_datacopy_<DataCopyType::A2D, DST_SYNC, is_fp32_dest_acc_en, BroadcastType::NONE, unpack_to_dest>(
114+
block_tile, formats.math, formats.math);
115+
}
107116
}
108117
}
109118
}
@@ -128,11 +137,11 @@ void run_kernel(const volatile struct RuntimeParams* params)
128137

129138
for (int block_tile = 0; block_tile < block_tiles; ++block_tile)
130139
{
131-
_llk_math_eltwise_unary_datacopy_<DataCopyType::A2D, DstSync::SyncHalf, is_fp32_dest_acc_en, BroadcastType::NONE, unpack_to_dest>(
132-
block_start + block_tile, formats.math, formats.math);
140+
_llk_math_eltwise_unary_datacopy_<DataCopyType::A2D, DST_SYNC, is_fp32_dest_acc_en, BroadcastType::NONE, unpack_to_dest>(
141+
block_tile, formats.math, formats.math);
133142
}
134143

135-
_llk_math_dest_section_done_<DstSync::SyncHalf, is_fp32_dest_acc_en>();
144+
_llk_math_dest_section_done_<DST_SYNC, is_fp32_dest_acc_en>();
136145
}
137146
}
138147
}
@@ -152,8 +161,8 @@ void run_kernel(const volatile struct RuntimeParams* params)
152161
/* iterations*/ params->num_faces);
153162
}
154163

155-
_llk_math_wait_for_dest_available_<DstSync::SyncHalf>();
156-
_llk_math_dest_section_done_<DstSync::SyncHalf, is_fp32_dest_acc_en>();
164+
_llk_math_wait_for_dest_available_<DST_SYNC>();
165+
_llk_math_dest_section_done_<DST_SYNC, is_fp32_dest_acc_en>();
157166
}
158167
}
159168
}
@@ -170,14 +179,13 @@ void run_kernel(const volatile struct RuntimeParams* params)
170179

171180
for (int block_tile = 0; block_tile < block_tiles; ++block_tile)
172181
{
173-
_llk_math_eltwise_unary_datacopy_<DataCopyType::A2D, DstSync::SyncHalf, is_fp32_dest_acc_en, BroadcastType::NONE, unpack_to_dest>(
174-
block_start + block_tile, formats.math, formats.math);
182+
_llk_math_eltwise_unary_datacopy_<DataCopyType::A2D, DST_SYNC, is_fp32_dest_acc_en, BroadcastType::NONE, unpack_to_dest>(
183+
block_tile, formats.math, formats.math);
175184
}
176185
}
177186

178-
_llk_math_eltwise_binary_sfpu_start_<DstSync::SyncHalf>(/* dst_index */ block_start);
179-
180-
test_utils::call_binary_sfpu_operation<APPROX_MODE, SFPU_BINARY_OPERATION, ITERATIONS>(block_start, formats.math);
187+
_llk_math_eltwise_binary_sfpu_start_<DST_SYNC>(/* dst_index */ 0);
188+
test_utils::call_binary_sfpu_operation<APPROX_MODE, SFPU_BINARY_OPERATION, ITERATIONS, formats.math>();
181189

182190
_llk_math_eltwise_binary_sfpu_done_();
183191
}
@@ -191,22 +199,21 @@ void run_kernel(const volatile struct RuntimeParams* params)
191199
{
192200
int block_tiles = std::min(params->TILE_CNT - block_start, MAX_TILES_DEST);
193201

194-
_llk_math_wait_for_dest_available_<DstSync::SyncHalf>();
202+
_llk_math_wait_for_dest_available_<DST_SYNC>();
195203

196204
// Copy from srcA to dest
197205
for (int block_tile = 0; block_tile < block_tiles; ++block_tile)
198206
{
199-
_llk_math_eltwise_unary_datacopy_<DataCopyType::A2D, DstSync::SyncHalf, is_fp32_dest_acc_en, BroadcastType::NONE, unpack_to_dest>(
200-
block_start + block_tile, formats.math, formats.math);
201-
}
202-
203-
// Start SFPU binary operation
204-
_llk_math_eltwise_binary_sfpu_start_<DstSync::SyncHalf>(/* dst_index */ block_start);
207+
_llk_math_eltwise_unary_datacopy_<DataCopyType::A2D, DST_SYNC, is_fp32_dest_acc_en, BroadcastType::NONE, unpack_to_dest>(
208+
block_tile, formats.math, formats.math);
205209

206-
test_utils::call_binary_sfpu_operation<APPROX_MODE, SFPU_BINARY_OPERATION, ITERATIONS>(block_start, formats.math);
210+
// Start SFPU binary operation
211+
_llk_math_eltwise_binary_sfpu_start_<DST_SYNC>(/* dst_index */ block_tile);
212+
test_utils::call_binary_sfpu_operation<APPROX_MODE, SFPU_BINARY_OPERATION, ITERATIONS, formats.math>();
213+
_llk_math_eltwise_binary_sfpu_done_();
214+
}
207215

208-
_llk_math_eltwise_binary_sfpu_done_();
209-
_llk_math_dest_section_done_<DstSync::SyncHalf, is_fp32_dest_acc_en>();
216+
_llk_math_dest_section_done_<DST_SYNC, is_fp32_dest_acc_en>();
210217
}
211218
}
212219
}
@@ -223,6 +230,9 @@ void run_kernel(const volatile struct RuntimeParams* params)
223230

224231
void run_kernel(const volatile struct RuntimeParams* params)
225232
{
233+
const int MAX_TILES_DEST =
234+
is_fp32_dest_acc_en ? (BIT32_DEST_REGISTER_HALF_SIZE / (params->num_faces * FACE_R_DIM)) : (DEST_REGISTER_HALF_SIZE / (params->num_faces * FACE_R_DIM));
235+
226236
{
227237
ZONE_SCOPED("INIT")
228238

@@ -235,7 +245,7 @@ void run_kernel(const volatile struct RuntimeParams* params)
235245
_llk_pack_init_<false, false>(formats.pack_dst, FACE_R_DIM, params->num_faces);
236246
#endif
237247
// Initialize destination for packing
238-
_llk_pack_dest_init_<DstSync::SyncHalf, is_fp32_dest_acc_en>();
248+
_llk_pack_dest_init_<DST_SYNC, is_fp32_dest_acc_en>();
239249

240250
PROFILER_SYNC();
241251
}
@@ -252,7 +262,7 @@ void run_kernel(const volatile struct RuntimeParams* params)
252262

253263
for (int block_tile = 0; block_tile < block_tiles; block_tile++)
254264
{
255-
_llk_pack_<DstSync::SyncHalf, is_fp32_dest_acc_en>(block_tile, PERF_ADDRESS(PERF_OUTPUT, block_start + block_tile));
265+
_llk_pack_<DST_SYNC, is_fp32_dest_acc_en, /* untilize */ false>(block_tile, PERF_ADDRESS(PERF_OUTPUT, block_tile));
256266
}
257267
}
258268
}
@@ -268,9 +278,9 @@ void run_kernel(const volatile struct RuntimeParams* params)
268278
_llk_packer_wait_for_math_done_();
269279
for (int block_tile = 0; block_tile < block_tiles; block_tile++)
270280
{
271-
_llk_pack_<DstSync::SyncHalf, is_fp32_dest_acc_en>(block_tile, PERF_ADDRESS(PERF_OUTPUT, block_start + block_tile));
281+
_llk_pack_<DST_SYNC, is_fp32_dest_acc_en, /* untilize */ false>(block_tile, PERF_ADDRESS(PERF_OUTPUT, block_tile));
272282
}
273-
_llk_pack_dest_section_done_<DstSync::SyncHalf, is_fp32_dest_acc_en>();
283+
_llk_pack_dest_section_done_<DST_SYNC, is_fp32_dest_acc_en>();
274284
}
275285
}
276286
}

0 commit comments

Comments
 (0)