Skip to content

Commit edf8757

Browse files
authored
frequency increased to 237MHz, throughput increased by 5% (#1)
1 parent 59de226 commit edf8757

File tree

7 files changed

+106
-74
lines changed

7 files changed

+106
-74
lines changed

spmv/libfpga/common.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,18 @@
1212
#include <iomanip>
1313
#endif
1414

15+
#ifdef __SYNTHESIS__
16+
#include "utils/x_hls_utils.h" // for reg() function
17+
#else
18+
#ifndef REG_FOR_SW_EMU
19+
#define REG_FOR_SW_EMU
20+
template<typename T>
21+
T reg(T in) {
22+
return in;
23+
}
24+
#endif
25+
#endif
26+
1527
//-------------------------------------------------------------------------
1628
// overlay configurations
1729
//-------------------------------------------------------------------------

spmv/libfpga/pe.h

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,6 @@
66

77
#include "common.h"
88

9-
#ifdef __SYNTHESIS__
10-
#include "utils/x_hls_utils.h" // for reg() function
11-
#else
12-
#ifndef REG_FOR_SW_EMU
13-
#define REG_FOR_SW_EMU
14-
template<typename T>
15-
T reg(T in) {
16-
return in;
17-
}
18-
#endif
19-
#endif
20-
219
#ifndef __SYNTHESIS__
2210
// #define PE_LINE_TRACING
2311
#endif
@@ -53,6 +41,7 @@ void ufixed_pe_process(
5341
#pragma HLS pipeline II=1
5442
#pragma HLS dependence variable=output_buffer inter false
5543
#pragma HLS dependence variable=ifwq intra true
44+
#pragma HLS dependence variable=ifwq inter false
5645
bool valid = false;
5746
UPDATE_PLD_T pld;
5847
if(input.read_nb(pld)) {
@@ -70,33 +59,33 @@ void ufixed_pe_process(
7059
valid = false;
7160
}
7261

62+
IN_FLIGHT_WRITE ifwq_new_entry;
63+
IDX_T bank_addr = pld.row_idx / pack_size;
64+
VAL_T incr = pld.mat_val * pld.vec_val;
65+
VAL_T q = output_buffer[bank_addr];
66+
VAL_T q_fwd = ((bank_addr == ifwq[0].addr) && ifwq[0].valid) ? ifwq[0].value :
67+
((bank_addr == ifwq[1].addr) && ifwq[1].valid) ? ifwq[1].value :
68+
((bank_addr == ifwq[2].addr) && ifwq[2].valid) ? ifwq[2].value :
69+
((bank_addr == ifwq[3].addr) && ifwq[3].valid) ? ifwq[3].value :
70+
((bank_addr == ifwq[4].addr) && ifwq[4].valid) ? ifwq[4].value :
71+
q;
72+
VAL_T new_q = q_fwd + incr;
73+
#pragma HLS bind_op variable=new_q op=add impl=dsp latency=0
74+
VAL_T new_q_reg = reg(new_q); // force a register after addition
75+
ifwq_new_entry.addr = bank_addr;
76+
ifwq_new_entry.value = new_q;
77+
ifwq_new_entry.valid = valid;
78+
7379
if (valid) {
74-
IDX_T bank_addr = pld.row_idx / pack_size;
75-
VAL_T incr = pld.mat_val * pld.vec_val;
76-
VAL_T q = output_buffer[bank_addr];
77-
VAL_T q_fwd = ((bank_addr == ifwq[0].addr) && ifwq[0].valid) ? ifwq[0].value :
78-
((bank_addr == ifwq[1].addr) && ifwq[1].valid) ? ifwq[1].value :
79-
((bank_addr == ifwq[2].addr) && ifwq[2].valid) ? ifwq[2].value :
80-
((bank_addr == ifwq[3].addr) && ifwq[3].valid) ? ifwq[3].value :
81-
((bank_addr == ifwq[4].addr) && ifwq[4].valid) ? ifwq[4].value :
82-
q;
83-
VAL_T new_q = q_fwd + incr;
84-
#pragma HLS bind_op variable=new_q op=add impl=dsp latency=0
85-
VAL_T new_q_reg = reg(new_q); // force a register after addition
8680
output_buffer[bank_addr] = new_q_reg;
87-
ifwq[4] = ifwq[3];
88-
ifwq[3] = ifwq[2];
89-
ifwq[2] = ifwq[1];
90-
ifwq[1] = ifwq[0];
91-
ifwq[0] = (IN_FLIGHT_WRITE){true, bank_addr, new_q};
92-
} else {
93-
ifwq[4] = ifwq[3];
94-
ifwq[3] = ifwq[2];
95-
ifwq[2] = ifwq[1];
96-
ifwq[1] = ifwq[0];
97-
ifwq[0] = (IN_FLIGHT_WRITE){false, 0, 0};
9881
}
9982

83+
ifwq[4] = ifwq[3];
84+
ifwq[3] = ifwq[2];
85+
ifwq[2] = ifwq[1];
86+
ifwq[1] = ifwq[0];
87+
ifwq[0] = ifwq_new_entry;
88+
10089
}
10190
}
10291

spmv/libfpga/shuffle.h

Lines changed: 53 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
unsigned long long iter_cnt = 0;
1717
#endif
1818

19-
const unsigned ARBITER_LATENCY = 5;
19+
const unsigned ARBITER_LATENCY = 7;
2020
//------------------------------------------------------------
2121
// arbiters (2 overloads)
2222
//------------------------------------------------------------
@@ -176,11 +176,38 @@ void arbiter_1p(
176176
// #endif
177177
}
178178

179+
//------------------------------------------------------------
180+
// crossbar
181+
//------------------------------------------------------------
182+
template<typename PayloadT, unsigned num_lanes>
183+
void crossbar(
184+
ap_uint<num_lanes> in_valid,
185+
ap_uint<num_lanes> out_valid,
186+
unsigned select[num_lanes],
187+
PayloadT in[num_lanes],
188+
hls::stream<PayloadT> output_lanes[num_lanes]
189+
) {
190+
// #pragma HLS pipeline II=1
191+
// #pragma HLS latency min=8 max=8
192+
#pragma HLS inline
193+
for (unsigned OLid = 0; OLid < num_lanes; OLid++) {
194+
#pragma HLS unroll
195+
if (out_valid[OLid]) {
196+
if (in_valid[select[OLid]]) {
197+
output_lanes[OLid].write(in[select[OLid]]);
198+
}
199+
}
200+
}
201+
}
202+
203+
204+
//------------------------------------------------------------
205+
// shuffler core: works on 1 partition
206+
//------------------------------------------------------------
179207
// shuffler states
180208
#define SF_WORKING 0 // normal working state
181209
#define SF_ENDING 1 // flushing the remaining packets in the arbiter
182210

183-
// shuffler core: works on 1 partition
184211
template<typename PayloadT, unsigned num_lanes>
185212
void shuffler_core(
186213
// fifos
@@ -226,9 +253,8 @@ void shuffler_core(
226253
loop_shuffle_pipeline:
227254
while (!loop_exit) {
228255
#pragma HLS pipeline II=1
229-
#pragma HLS dependence variable=resend inter RAW true distance=6
230-
#pragma HLS dependence variable=payload_resend inter RAW true distance=6
231-
// #pragma HLS dependence variable=in_addr inter RAW true distance=6
256+
#pragma HLS dependence variable=resend inter RAW true distance=9
257+
#pragma HLS dependence variable=payload_resend inter RAW true distance=9
232258

233259
// Fetch stage (F)
234260
// #ifndef __SYNTHESIS__
@@ -251,7 +277,8 @@ void shuffler_core(
251277
if (resend[ILid]) {
252278
valid[ILid] = 1;
253279
payload[ILid] = payload_resend[ILid];
254-
} else if (fetch_complete[ILid]) {
280+
} else
281+
if (fetch_complete[ILid]) {
255282
valid[ILid] = 0;
256283
payload[ILid] = (PayloadT){0,0,0,0};
257284
} else {
@@ -310,25 +337,26 @@ void shuffler_core(
310337
// ------- end of A stage
311338

312339
// crossbar stage (C)
313-
for (unsigned OLid = 0; OLid < num_lanes; OLid++) {
314-
#pragma HLS unroll
315-
if (xbar_valid[OLid]) {
316-
if (valid[xbar_sel[OLid]]) {
317-
output_lanes[OLid].write(payload[xbar_sel[OLid]]);
318-
}
319-
}
320-
// #ifndef __SYNTHESIS__
321-
// if (line_tracing_shuffle_core) {
322-
// if (state == SF_WORKING) {
323-
// std::cout << " Shuffle core: OLane " << OLid << ", "
324-
// << "sel: " << xbar_sel[OLid] << ", "
325-
// << "payload: " << payload[xbar_sel[OLid]] << ", "
326-
// << "p-valid:" << (valid[xbar_sel[OLid]] ? "x" : ".") << ", "
327-
// << "x-valid: " << (xbar_valid[OLid] ? "x" : ".") << std::endl;
328-
// }
329-
// }
330-
// #endif
331-
}
340+
crossbar<PayloadT, num_lanes>(
341+
valid,
342+
xbar_valid,
343+
xbar_sel,
344+
payload_resend,
345+
output_lanes
346+
);
347+
#ifndef __SYNTHESIS__
348+
// if (line_tracing_shuffle_core) {
349+
// for (unsigned OLid = 0; OLid < num_lanes; OLid++) {
350+
// if (state == SF_WORKING) {
351+
// std::cout << " Shuffle core: OLane " << OLid << ", "
352+
// << "sel: " << xbar_sel[OLid] << ", "
353+
// << "payload: " << payload_resend[xbar_sel[OLid]] << ", "
354+
// << "p-valid:" << (valid[xbar_sel[OLid]] ? "x" : ".") << ", "
355+
// << "x-valid: " << (xbar_valid[OLid] ? "x" : ".") << std::endl;
356+
// }
357+
// }
358+
// }
359+
#endif
332360
// ------- end of C stage
333361

334362
// line tracing for debug

spmv/libfpga/stream_utils.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,12 @@ void axis_duplicate(
1414
while (!exit) {
1515
#pragma HLS pipeline II=1
1616
VEC_AXIS_T pkt = in.read();
17+
VEC_AXIS_T pkt_reg = reg(reg(pkt));
18+
VEC_AXIS_T pkt_replicas[N];
1719
for (unsigned k = 0; k < N; k++) {
1820
#pragma HLS unroll
19-
out[k].write(pkt);
21+
pkt_replicas[k] = reg(pkt_reg);
22+
out[k].write(pkt_replicas[k]);
2023
}
2124
exit = (pkt.user == EOS);
2225
}

unit_test_wrapper/makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
TARGET := hw_emu
22
UNIT := pe_tb
3-
IMPL := float
3+
IMPL := fixed
44

55
DEVICE = /opt/xilinx/platforms/xilinx_u280_xdma_201920_3/xilinx_u280_xdma_201920_3.xpfm
66

unit_tests/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
PROJ_ROOT = ..
2-
IMPL := float
2+
IMPL := fixed
33

44
ifeq ($(IMPL), float)
55
LIBFPGA_DIR = $(PROJ_ROOT)/spmv-fp/libfpga

unit_tests/test_pe.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -295,17 +295,17 @@ bool test_random() {
295295
int main(int argc, char ** argv) {
296296
bool passed = true;
297297
// passed = passed && test_nodep();
298-
// passed = passed && test_dep(1);
299-
// passed = passed && test_dep(2);
300-
// passed = passed && test_dep(3);
301-
// passed = passed && test_dep(4);
302-
// passed = passed && test_dep(5);
303-
// passed = passed && test_dep(6);
304-
// passed = passed && test_dep(7);
305-
passed = passed && test_dep(8);
306-
passed = passed && test_dep(9);
307-
passed = passed && test_dep(10);
308-
passed = passed && test_dep(11);
298+
passed = passed && test_dep(1);
299+
passed = passed && test_dep(2);
300+
passed = passed && test_dep(3);
301+
passed = passed && test_dep(4);
302+
passed = passed && test_dep(5);
303+
passed = passed && test_dep(6);
304+
passed = passed && test_dep(7);
305+
// passed = passed && test_dep(8);
306+
// passed = passed && test_dep(9);
307+
// passed = passed && test_dep(10);
308+
// passed = passed && test_dep(11);
309309
passed = passed && test_random();
310310

311311
std::cout << (passed ? "===== All Test Passed! =====" : "===== Test FAILED! =====") << std::endl;

0 commit comments

Comments
 (0)