Skip to content

Commit 3e7877f

Browse files
committed
Consolidate all of the ZEROACC loops together-duplicated code
1 parent d2bbb7b commit 3e7877f

File tree

1 file changed

+32
-112
lines changed

1 file changed

+32
-112
lines changed

tt_llk_blackhole/llk_lib/llk_math_eltwise_binary.h

Lines changed: 32 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,30 @@ inline void eltwise_binary_reuse_dest_as_src()
5151
}
5252
}
5353

54+
// Helper to run the eltwise binary loop with optional dest reuse and face clearing
55+
// Consolidates the repeated loop pattern for DRY
56+
template <bool is_fp32_dest_acc_en, EltwiseBinaryReuseDestType binary_reuse_dest>
57+
inline void eltwise_binary_reuse_dest_helper_func(
58+
const uint32_t loop_count, const uint32_t face_base_offset, const bool clear_fp32_dst_acc, const uint dst_index)
59+
{
60+
#pragma GCC unroll 0
61+
for (std::uint32_t face_num = 0; face_num < loop_count; face_num++)
62+
{
63+
eltwise_binary_reuse_dest_as_src<binary_reuse_dest>();
64+
65+
// Clear DEST face-by-face when reusing dest as source
66+
if constexpr (binary_reuse_dest != EltwiseBinaryReuseDestType::NONE)
67+
{
68+
constexpr uint32_t ZERO_ACC_MODE = p_zeroacc::CLR_16;
69+
int clear_fp32 = is_fp32_dest_acc_en && clear_fp32_dst_acc ? 1 : 0;
70+
auto buffer_base = is_fp32_dest_acc_en && clear_fp32_dst_acc ? get_dest_buffer_base_32b() : get_dest_buffer_base_16b();
71+
TT_ZEROACC(ZERO_ACC_MODE, clear_fp32, 0, ADDR_MOD_1, (buffer_base + get_dest_index_in_faces(dst_index, face_base_offset + face_num)));
72+
}
73+
74+
ckernel_template::run();
75+
}
76+
}
77+
5478
template <
5579
EltwiseBinaryType eltwise_binary_type,
5680
BroadcastType src_b_bcast_type,
@@ -61,8 +85,7 @@ template <
6185
inline void _llk_math_eltwise_binary_(const std::uint32_t num_faces, uint dst_index, const bool clear_fp32_dst_acc)
6286
{
6387
LLK_ASSERT(num_faces == 1 || num_faces == 2 || num_faces == 4, "num_faces must be 1, 2, or 4");
64-
constexpr bool high_fidelity = (NUM_FIDELITY_PHASES > 0);
65-
constexpr uint32_t ZERO_ACC_MODE = p_zeroacc::CLR_16;
88+
constexpr bool high_fidelity = (NUM_FIDELITY_PHASES > 0);
6689

6790
math::set_dst_write_addr<DstTileShape::Tile32x32, UnpackDestination::SrcRegs>(dst_index);
6891

@@ -111,125 +134,22 @@ inline void _llk_math_eltwise_binary_(const std::uint32_t num_faces, uint dst_in
111134
if constexpr (src_b_bcast_type == BroadcastType::COL)
112135
{
113136
// Mop for col broadcast only does 2 outerloops. Needs to clear B manually and call twice for full tile size
114-
constexpr uint32_t outerloop = (binary_reuse_dest != EltwiseBinaryReuseDestType::NONE) ? 2 : 1;
115-
if constexpr (high_fidelity)
116-
{
117-
#pragma GCC unroll 0
118-
for (std::uint32_t face_num = 0; face_num < 2; face_num++)
119-
{
120-
eltwise_binary_reuse_dest_as_src<binary_reuse_dest>();
121-
if constexpr (binary_reuse_dest != EltwiseBinaryReuseDestType::NONE)
122-
{
123-
// We clear the DEST face-by-face, given the DEST base, tile index and face index
124-
int clear_fp32 = is_fp32_dest_acc_en && clear_fp32_dst_acc ? 1 : 0;
125-
auto buffer_base = is_fp32_dest_acc_en && clear_fp32_dst_acc ? get_dest_buffer_base_32b() : get_dest_buffer_base_16b();
126-
TT_ZEROACC(
127-
ZERO_ACC_MODE, clear_fp32, 0, ADDR_MOD_1, (buffer_base + get_dest_index_in_faces(dst_index, (0 + face_num)))); // Clear faces 0 & 1
128-
}
129-
ckernel_template::run();
130-
}
131-
}
132-
else
133-
{
134-
#pragma GCC unroll 0
135-
for (std::uint32_t face_num = 0; face_num < outerloop; face_num++)
136-
{
137-
eltwise_binary_reuse_dest_as_src<binary_reuse_dest>();
138-
if constexpr (binary_reuse_dest != EltwiseBinaryReuseDestType::NONE)
139-
{
140-
// We clear the DEST face-by-face, given the DEST base, tile index and face index
141-
int clear_fp32 = is_fp32_dest_acc_en && clear_fp32_dst_acc ? 1 : 0;
142-
auto buffer_base = is_fp32_dest_acc_en && clear_fp32_dst_acc ? get_dest_buffer_base_32b() : get_dest_buffer_base_16b();
143-
TT_ZEROACC(
144-
ZERO_ACC_MODE, clear_fp32, 0, ADDR_MOD_1, (buffer_base + get_dest_index_in_faces(dst_index, (0 + face_num)))); // Clear faces 0 & 1
145-
}
146-
ckernel_template::run();
147-
}
148-
}
137+
constexpr uint32_t outerloop = (high_fidelity) ? 2 : ((binary_reuse_dest != EltwiseBinaryReuseDestType::NONE) ? 2 : 1);
138+
eltwise_binary_reuse_dest_helper_func<is_fp32_dest_acc_en, binary_reuse_dest>(outerloop, 0, clear_fp32_dst_acc, dst_index);
149139
TTI_SETRWC(p_setrwc::CLR_B, 0, 0, 0, 0, 0);
140+
150141
if (num_faces == 4)
151142
{
152-
if constexpr (high_fidelity)
153-
{
154-
#pragma GCC unroll 0
155-
for (std::uint32_t face_num = 0; face_num < 2; face_num++)
156-
{
157-
eltwise_binary_reuse_dest_as_src<binary_reuse_dest>();
158-
if constexpr (binary_reuse_dest != EltwiseBinaryReuseDestType::NONE)
159-
{
160-
// We clear the DEST face-by-face, given the DEST base, tile index and face index
161-
int clear_fp32 = is_fp32_dest_acc_en && clear_fp32_dst_acc ? 1 : 0;
162-
auto buffer_base = is_fp32_dest_acc_en && clear_fp32_dst_acc ? get_dest_buffer_base_32b() : get_dest_buffer_base_16b();
163-
TT_ZEROACC(
164-
ZERO_ACC_MODE,
165-
clear_fp32,
166-
0,
167-
ADDR_MOD_1,
168-
(buffer_base + get_dest_index_in_faces(dst_index, (2 + face_num)))); // Clear faces 2 & 3
169-
}
170-
ckernel_template::run();
171-
}
172-
}
173-
else
174-
{
175-
#pragma GCC unroll 0
176-
for (std::uint32_t face_num = 0; face_num < outerloop; face_num++)
177-
{
178-
eltwise_binary_reuse_dest_as_src<binary_reuse_dest>();
179-
if constexpr (binary_reuse_dest != EltwiseBinaryReuseDestType::NONE)
180-
{
181-
// We clear the DEST face-by-face, given the DEST base, tile index and face index
182-
int clear_fp32 = is_fp32_dest_acc_en && clear_fp32_dst_acc ? 1 : 0;
183-
auto buffer_base = is_fp32_dest_acc_en && clear_fp32_dst_acc ? get_dest_buffer_base_32b() : get_dest_buffer_base_16b();
184-
TT_ZEROACC(
185-
ZERO_ACC_MODE,
186-
clear_fp32,
187-
0,
188-
ADDR_MOD_1,
189-
(buffer_base + get_dest_index_in_faces(dst_index, (2 + face_num)))); // Clear faces 2 & 3
190-
}
191-
ckernel_template::run();
192-
}
193-
}
143+
eltwise_binary_reuse_dest_helper_func<is_fp32_dest_acc_en, binary_reuse_dest>(outerloop, 2, clear_fp32_dst_acc, dst_index);
194144
TTI_SETRWC(p_setrwc::CLR_B, 0, 0, 0, 0, 0);
195145
}
196146
}
197147
else
198148
{
199149
// Row and no broadcasted behaves similarly
200-
const uint32_t outerloop = (binary_reuse_dest != EltwiseBinaryReuseDestType::NONE) ? num_faces : 1;
201-
if constexpr (high_fidelity)
202-
{
203-
#pragma GCC unroll 0
204-
for (std::uint32_t face_num = 0; face_num < num_faces; face_num++)
205-
{
206-
eltwise_binary_reuse_dest_as_src<binary_reuse_dest>();
207-
if constexpr (binary_reuse_dest != EltwiseBinaryReuseDestType::NONE)
208-
{
209-
// We clear the DEST face-by-face, given the DEST base, tile index and face index
210-
int clear_fp32 = is_fp32_dest_acc_en && clear_fp32_dst_acc ? 1 : 0;
211-
auto buffer_base = is_fp32_dest_acc_en && clear_fp32_dst_acc ? get_dest_buffer_base_32b() : get_dest_buffer_base_16b();
212-
TT_ZEROACC(ZERO_ACC_MODE, clear_fp32, 0, ADDR_MOD_1, (buffer_base + get_dest_index_in_faces(dst_index, face_num)));
213-
}
214-
ckernel_template::run();
215-
}
216-
}
217-
else
218-
{
219-
#pragma GCC unroll 0
220-
for (std::uint32_t face_num = 0; face_num < outerloop; face_num++)
221-
{
222-
eltwise_binary_reuse_dest_as_src<binary_reuse_dest>();
223-
if constexpr (binary_reuse_dest != EltwiseBinaryReuseDestType::NONE)
224-
{
225-
// We clear the DEST face-by-face, given the DEST base, tile index and face index
226-
int clear_fp32 = is_fp32_dest_acc_en && clear_fp32_dst_acc ? 1 : 0;
227-
auto buffer_base = is_fp32_dest_acc_en && clear_fp32_dst_acc ? get_dest_buffer_base_32b() : get_dest_buffer_base_16b();
228-
TT_ZEROACC(ZERO_ACC_MODE, clear_fp32, 0, ADDR_MOD_1, (buffer_base + get_dest_index_in_faces(dst_index, face_num)));
229-
}
230-
ckernel_template::run();
231-
}
232-
}
150+
const uint32_t outerloop = (high_fidelity) ? num_faces : ((binary_reuse_dest != EltwiseBinaryReuseDestType::NONE) ? num_faces : 1);
151+
eltwise_binary_reuse_dest_helper_func<is_fp32_dest_acc_en, binary_reuse_dest>(outerloop, 0, clear_fp32_dst_acc, dst_index);
152+
233153
if constexpr (src_b_bcast_type == BroadcastType::SCALAR)
234154
{
235155
TTI_SETRWC(p_setrwc::CLR_B, 0, 0, 0, 0, p_setrwc::SET_D);

0 commit comments

Comments
 (0)