111
:Def: Critical Path :Def: Latency [of an action in a sequential circuit]
`default_nettype none
module mult_behav_1
#(int w = 16)
(output uwire [2*w-1:0] prod, input uwire [w-1:0] cand, plier);
assign prod = cand * plier;
endmodule
module carry_prop_adder #(int w=16)
(output uwire [w:1] s, input uwire [w:1] a,b);
assign s = a + b;
endmodule
module mult_linear
#(int w = 16)
(output logic [2*w-1:0] prod, input uwire [w-1:0] plier, cand);
uwire [2*w-1:0] b[w:0];
assign b[0] = 0;
assign prod = b[w];
for ( genvar row = 0; row < w; row++ ) begin
uwire [2*w-1:0] pp = plier[row] ? cand << row : 0;
carry_prop_adder #(2*w) a( b[row+1], pp, b[row] );
end
endmodule
module mult_linear_clk
#( int w = 16 )
( output logic [2*w-1:0] prod,
input uwire [w-1:0] cand, plier,
input uwire clk);
uwire [2*w-1:0] p;
logic [w-1:0] candcpy, pliercpy;
mult_linear #(w) ml(p, candcpy, pliercpy);
always_ff @( posedge clk ) begin
candcpy <= cand;
pliercpy <= plier;
prod <= p;
end
endmodule
module bfa(output uwire c, s, input uwire a, b, cin);
assign {c,s} = { a & b | a & cin | b & cin, a ^ b ^ cin };
endmodule
module bha(output uwire c, s, input uwire a, cin);
assign {c,s} = { a & cin, a ^ cin };
endmodule
module mult_bfas
#( int wa = 5, wb = wa )
( output uwire [wa+wb-1:0] prod,
input uwire [wa-1:0] a,
input uwire [wb-1:0] b );
uwire [wb:0][wa-1:0] s;
assign s[wb] = wa'(0);
assign prod[wa-1:0] = s[0];
uwire [wb:-1] c2;
assign c2[-1] = 0;
for ( genvar ib = 0; ib < wb; ib++ ) begin
uwire [wa-1:0] c, pp = b[ib] ? a : 0;
assign s[ib][0] = pp[0];
assign c[0] = 0;
for ( genvar ia = 1; ia < wa; ia++ )
bfa B( c[ia], s[ib][ia], s[ib+1][ia-1], pp[ia], c[ia-1] );
bfa my_bfa( c2[ib], prod[wa+ib], s[ib+1][wa-1], c[wa-1], c2[ib-1] );
end
endmodule
module mult_tree
#( int wa = 16, int wb = wa, int wp = wa + wb )
( output uwire [wp-1:0] prod,
input uwire [wa-1:0] a,
input uwire [wb-1:0] b );
if ( wa == 1 ) begin
assign prod = a ? b : 0;
end else begin
localparam int wn = wa / 2;
localparam int wx = wb + wn;
uwire [wx-1:0] prod_lo, prod_hi;
mult_tree #(wn,wb) mlo( prod_lo, a[wn-1:0], b );
mult_tree #(wn,wb) mhi( prod_hi, a[wa-1:wn], b );
assign prod = prod_lo + ( prod_hi << wn );
end
endmodule
function int min(int a, int b);
min = a <= b ? a : b;
endfunction
module mult_tree_bfas
#( int wa = 16, int wb = wa, int wp = wa + wb )
( output uwire [wp-1:0] prod,
input uwire [wa-1:0] a,
input uwire [wb-1:0] b );
if ( wa == 1 ) begin
assign prod = a ? b : 0;
end else begin
localparam int wa_lo = wa / 2;
localparam int wp_lo_full = wb + wa_lo;
localparam int wa_hi = wa - wa_lo;
localparam int wp_hi_full = wb + wa_hi;
localparam int wp_lo = min( wp_lo_full, wp );
localparam int wp_hi = min( wp_hi_full, wp - wa_lo );
uwire [wp_lo-1:0] prod_lo;
uwire [wp_hi-1:0] prod_hi;
localparam int wb_hi = min( wb, wp_hi );
mult_tree_bfas #(wa_lo,wb, wp_lo)
mlo( prod_lo, a[wa_lo-1:0], b );
mult_tree_bfas #(wa_hi,wb_hi,wp_hi)
mhi( prod_hi, a[wa-1:wa_lo], b[wb_hi-1:0] );
assign prod[wa_lo-1:0] = prod_lo[wa_lo-1:0];
uwire c[wp-1:wa_lo-1];
assign c[wa_lo-1] = 0;
for ( genvar i=wa_lo; i<wp_lo; i++ )
bfa b(c[i], prod[i], prod_lo[i], prod_hi[i-wa_lo], c[i-1] );
for ( genvar i=wp_lo; i<wp_hi+wa_lo; i++ )
bha b(c[i], prod[i], prod_hi[i-wa_lo], c[i-1] );
localparam int wz = wp - wp_hi - wa_lo;
if ( wz > 0 ) assign prod[wp-1 : wp_hi+wa_lo] = 0;
end
endmodule
`ifdef XXX
Module Name Area Period Period Total Init.
Target Actual Latency Interv
mult_bfas_wa8 54394 200000 6116 6116 6116
mult_bfas_wa16 225365 200000 13812 13812 13812
mult_bfas_wa32 920712 200000 29204 29204 29204
mult_bfas_wa64 3616401 200000 60949 60949 60949
mult_tori_bfas_wa8 64685 200000 5786 5786 5786
mult_tori_bfas_wa16 279167 200000 11750 11750 11750
mult_tori_bfas_wa32 1148780 200000 23256 23256 23256
mult_tori_bfas_wa64 3699504 200000 50712 50712 50712
mult_tree_wa8 53168 200000 6131 6131 6131
mult_tree_wa16 224139 200000 13046 13046 13046
mult_tree_wa32 919486 200000 26875 26875 26875
mult_tree_wa64 3615079 200000 55494 55494 55494
mult_behav_1_w8 53168 200000 6062 6062 6062
mult_behav_1_w16 215672 200000 13551 13551 13551
mult_behav_1_w32 764479 200000 26485 26485 26485
mult_behav_1_w64 2891012 200000 52637 52637 52637
mult_bfas_wa8 145104 1 2878 2878 2878
mult_bfas_wa16 510180 1 6319 6319 6319
mult_bfas_wa32 1968611 1 12733 12733 12733
mult_bfas_wa64 7601789 1 25830 25830 25830
mult_tori_bfas_wa8 148649 1 2443 2443 2443
mult_tori_bfas_wa16 503217 1 4908 4908 4908
mult_tori_bfas_wa32 2502944 1 9204 9204 9204
mult_tori_bfas_wa64 8862465 1 14525 14525 14525
mult_tree_wa8 130322 1 2184 2184 2184
mult_tree_wa16 534195 1 3129 3129 3129
mult_tree_wa32 1951056 1 4166 4166 4166
mult_tree_wa64 7283978 1 5320 5320 5320
mult_behav_1_w8 131694 1 2182 2182 2182
mult_behav_1_w16 501798 1 3106 3106 3106
mult_behav_1_w32 1582738 1 4310 4310 4310
mult_behav_1_w64 5488252 1 5272 5272 5272
`endif
module mult_seq_sr #( int w = 16 )
( output logic [2*w-1:0] prod,
output logic ready,
input uwire [w-1:0] cand, plier,
input uwire start, clk);
localparam int wlog = $clog2(w+1);
logic [wlog-1:0] pos;
logic [2*w-1:0] accum;
logic [w-1:0] pp;
always_ff @( posedge clk )
if ( start ) begin
ready <= 0;
accum <= 0;
pos <= 0;
end else if ( pos != w ) begin
pp = plier[pos] ? cand : 0;
accum <= ( pp << pos ) + accum;
pos <= pos + 1;
end else begin
ready <= 1;
prod <= accum;
end
endmodule
module mult_seq #( int w = 16 )
( output logic [2*w-1:0] prod,
input uwire [w-1:0] cand, plier,
input uwire clk);
localparam int wlog = $clog2(w);
cadence initial if ( w != 1 << wlog ) $fatal(1,"Size must be a power of 2.");
cadence
bit [wlog-1:0] pos;
logic [2*w-1:0] accum;
always_ff @( posedge clk ) begin
if ( pos == 0 ) begin
prod = accum;
accum = 0;
end
if ( plier[pos] ) accum += cand << pos;
pos++;
end
endmodule
module mult_seq_ga #( int w = 16 )
( output logic [2*w-1:0] prod,
input uwire [w-1:0] cand, plier,
input uwire clk );
localparam int wlog = $clog2(w);
bit [wlog-1:0] pos;
bit [2*w-1:0] accum;
uwire [2*w-1:0] sum;
uwire [2*w-1:0] pp = plier[pos] ? cand << pos : 0;
carry_prop_adder #(2*w) ga( sum, accum, pp );
always_ff @( posedge clk ) pos <= pos + 1;
always_ff @( posedge clk )
if ( pos == 0 ) begin
prod = sum;
accum = 0;
end else begin
accum = sum;
end
endmodule
module mult_seq_stream_sr #( int w = 16 )
( output logic [2*w-1:0] prod,
output logic ready,
input uwire [w-1:0] cand, plier,
input uwire start, clk);
localparam int wlog = $clog2(w+1);
logic [wlog-1:0] pos;
logic [2*w-1:0] accum;
logic [w-1:0] pp;
always_ff @( posedge clk )
if ( start ) begin
ready <= 0;
accum <= plier;
pos <= 0;
end else if ( pos != w ) begin
pp = accum[0] ? cand : 0;
accum <= ( pp << w-1 ) + ( accum >> 1 );
pos <= pos + 1;
end else begin
ready <= 1;
prod <= accum;
end
endmodule
000000010111111
111111
module mult_seq_stream #( int w = 16 )
( output logic [2*w-1:0] prod,
input uwire [w-1:0] cand, plier,
input uwire clk);
localparam int wlog = $clog2(w);
bit [wlog-1:0] pos;
logic [2*w-1:0] accum;
always_ff @( posedge clk ) begin
logic [w:0] pp;
if ( pos == 0 ) begin
prod = accum;
accum = plier;
pos = w - 1;
end else begin
pos--;
end
pp = accum[0] ? { 1'b0, cand } : 0;
accum = { { 1'b0, accum[2*w-1:w] } + pp, accum[w-1:1] };
end
endmodule
`ifdef XXX
Module Name Area Period Period Total Init.
Target Actual Latency Interv
mult_behav_1_w8 53168 1000 6062 6062 6062
mult_behav_1_w16 215672 1000 13551 13551 13551
mult_behav_1_w32 764479 1000 26485 26485 26485
mult_behav_1_w64 2891332 1000 52332 52332 52332
mult_seq_w8 56081 1000 8120 64960 64960
mult_seq_w16 122475 1000 15916 254656 254656
mult_seq_w32 258750 1000 31385 1004320 1004320
mult_seq_w64 544285 1000 58476 3742464 3742464
mult_seq_stream_w8 44320 1000 4518 36144 36144
mult_seq_stream_w16 78395 1000 8868 141888 141888
mult_seq_stream_w32 153863 1000 16361 523552 523552
mult_seq_stream_w64 304047 1000 30276 1937664 1937664
endmodule
`endif
module mult_seq_m_sr #( int w = 16, int m = 2 )
( output logic [2*w-1:0] prod,
output logic ready,
input uwire [w-1:0] cand, plier,
input uwire start, clk);
localparam int iterations = ( w + m - 1 ) / m;
localparam int iter_lg = $clog2(iterations);
logic [iter_lg:0] iter;
logic [2*w-1:0] accum, accum_next;
logic [w-1:0] pp;
always_ff @( posedge clk )
if ( start ) begin
ready <= 0;
accum <= 0;
iter <= 0;
end else if ( iter != iterations ) begin
accum_next = accum;
for ( int i=0; i<m; i++ )
begin
int pos;
pos = iter * m + i;
pp = plier[pos] ? cand : 0;
accum_next = ( pp << pos ) + accum_next;
end
accum <= accum_next;
iter <= iter + 1;
end else begin
ready <= 1;
prod <= accum;
end
endmodule
module mult_seq_dm
#( int w = 16,
int m = 2 )
( output logic [2*w-1:0] prod,
input uwire [w-1:0] cand, plier,
input uwire clk);
localparam int iterations = ( w + m - 1 ) / m;
localparam int iter_lg = $clog2(iterations);
uwire [iterations-1:0][m-1:0] plier_2d = plier;
bit [iter_lg:1] iter;
logic [2*w-1:0] accum;
always_ff @( posedge clk ) begin
if ( iter == iter_lg'(iterations) ) begin
prod = accum;
accum = 0;
iter = 0;
end
accum += cand * plier_2d[iter] << ( iter * m );
iter++;
end
endmodule
module mult_seq_wfront
#( int w = 16 )
( output logic [2*w-1:0] prod,
input uwire [w-1:0] plier, cand,
input uwire clk );
localparam int wlog = $clog2(2*w);
cadence if ( 2**wlog != 2*w )
$fatal(2,"Size, parameter w=%0d, must be a power of 2.\n",w);
cadence
bit [wlog-1:0] pos;
always_ff @( posedge clk ) pos <= pos + 1;
logic [w-1:0] sum, carry;
logic [1:0] sc;
always_ff @( posedge clk ) begin
for ( int i=0; i<w; i++ ) begin
logic a, b, c;
a = pos < w && cand[w-1-i] ? plier[pos] : 0;
b = pos && i ? sum[i-1] : 0;
c = pos ? carry[i] : 0;
sc = a + b + c;
sum[i] <= sc[0];
carry[i] <= sc[1];
end
prod[pos] = sc[0];
end
endmodule
module mult_seq_wfront_sr
#( int w = 16 )
( output logic [2*w-1:0] prod,
output logic ready,
input uwire [w-1:0] plier, cand,
input uwire start, clk );
localparam int wlog = $clog2(2*w+1);
logic [wlog-1:0] pos;
logic [w-1:0] sum, carry;
logic [1:0] sc;
always_ff @( posedge clk )
if ( start ) begin
pos <= 0;
ready <= 0;
end else if ( pos != 2*w ) begin
for ( int i=0; i<w; i++ ) begin
logic a, b, c;
a = pos < w && cand[w-1-i] ? plier[pos] : 0;
b = pos && i ? sum[i-1] : 0;
c = pos ? carry[i] : 0;
sc = a + b + c;
sum[i] <= sc[0];
carry[i] <= sc[1];
end
prod[pos] = sc[0];
pos <= pos + 1;
end else begin
ready <= 1;
end
endmodule
module mult_seq_wfront_opt
#( int w = 16 )
( output logic [2*w-1:0] prod,
input uwire [w-1:0] plier, cand,
input uwire clk );
localparam int wlog = $clog2(w);
cadence if ( 2**wlog != w )
$fatal(2,"Size, parameter w=%0d, must be a power of 2.\n",w);
cadence
bit pos_eq_0;
bit [wlog:0] pos;
always_ff @( posedge clk ) { pos_eq_0, pos } <= pos + 1;
uwire pos_lt_w = !pos[wlog];
logic [w-1:0] sum, carry;
logic [1:0] sc;
uwire [w-1:0] plier_rot = {plier[0],plier[w-1:1]};
logic plier_pos;
always_ff @( posedge clk ) plier_pos <= plier_rot[wlog'(pos)];
always_ff @( posedge clk ) begin
for ( int i=0; i<w; i++ ) begin
logic a, b, c;
a = !pos_eq_0 && carry[i];
b = !pos_eq_0 && i && sum[i-1];
c = pos_lt_w && plier_pos && cand[w-1-i];
sc = a + b + c;
sum[i] <= sc[0];
carry[i] <= sc[1];
end
prod[pos] = sc[0];
end
endmodule
`ifdef xxx
Module Name Area Period Period Total Init.
Target Actual Latency Interv
mult_seq_wfront_w8 45390 1000 3132 50112 50112
mult_seq_wfront_w16 89668 1000 3260 104320 104320
mult_seq_wfront_w32 178367 1000 4202 268928 268928
mult_seq_wfront_w64 345415 1000 4716 603648 603648
mult_seq_wfront_opt_w8 47575 1000 2428 38848 38848
mult_seq_wfront_opt_w16 94652 1000 2275 72800 72800
mult_seq_wfront_opt_w32 177706 1000 2546 162944 162944
mult_seq_wfront_opt_w64 345301 1000 2724 348672 348672
mult_seq_stream_w8 44320 1000 4518 36144 36144
mult_seq_stream_w16 78395 1000 8868 141888 141888
mult_seq_stream_w32 153863 1000 16361 523552 523552
mult_seq_stream_w64 304047 1000 30276 1937664 1937664
`endif
module mult_seq_wfront_m #( int w = 16, int m = 2 )
( output logic [2*w-1:0] prod,
input uwire [w-1:0] plier, cand,
input uwire clk );
localparam int iterations = ( 2*w + m - 1 ) / m;
localparam int iter_lg = $clog2(iterations);
localparam int wlog = $clog2(m * iterations);
bit [iter_lg-1:0] iter;
always_ff @( posedge clk ) iter <= iter + 1;
logic [w-1:-1] sum, carry;
always_ff @( posedge clk ) begin
logic [w-1:-1] j_sum[m+1], j_carry[m+1];
logic [1:0] sc;
j_sum[0] = iter ? sum : 0;
j_carry[0] = iter ? carry : 0;
for ( int j=0; j<m; j++ ) begin
logic [wlog-1:0] pos;
pos = m*iter + j;
j_sum[j+1][-1] = 0;
for ( int i=0; i<w; i++ ) begin
logic a, b, c;
a = j_carry[j][i];
b = j_sum[j][i-1];
c = pos < w && cand[w-1-i] && plier[pos];
sc = a + b + c;
{ j_carry[j+1][i], j_sum[j+1][i] } = sc;
end
if ( pos < 2*w ) prod[pos] = sc[0];
end
sum <= j_sum[m];
carry <= j_carry[m];
end
endmodule
`ifdef DONT_DEFINE_ME
Module Name Area Period Period Total Init.
Target Actual Latency Interv
mult_seq_wfront_w16 89668 1000 3260 104320 104320
mult_seq_wfront_opt_w16 94652 1000 2275 72800 72800
mult_seq_wfront_m_w16_m1 97631 1000 3500 112000 112000
mult_seq_wfront_m_w16_m2 104787 1000 3658 58528 58528
mult_seq_wfront_m_w16_m4 130378 1000 3942 31536 31536
mult_seq_wfront_m_w16_m8 171258 1000 6417 25668 25668
mult_seq_dm_w16_m1 121611 1000 16360 261760 261760
mult_seq_dm_w16_m2 131048 1000 16730 133840 133840
mult_seq_dm_w16_m4 145285 1000 16704 66816 66816
mult_seq_dm_w16_m8 196118 1000 15161 30322 30322
mult_seq_wfront_w32 178367 1000 4202 268928 268928
mult_seq_wfront_opt_w32 177706 1000 2546 162944 162944
mult_seq_wfront_m_w32_m1 191334 1000 3766 241024 241024
mult_seq_wfront_m_w32_m2 205303 1000 3857 123424 123424
mult_seq_wfront_m_w32_m4 260182 1000 5266 84256 84256
mult_seq_wfront_m_w32_m8 351910 1000 7031 56248 56248
mult_seq_dm_w32_m1 246818 1000 31113 995616 995616
mult_seq_dm_w32_m2 279486 1000 30994 495904 495904
mult_seq_dm_w32_m4 314724 1000 32127 257016 257016
mult_seq_dm_w32_m8 408659 1000 31251 125004 125004
`endif
`ifdef DONT_DEFINE_ME
Module Name Area Period Period Total Init.
Target Actual Latency Interv
mult_behav_1_w8 53168 1000 6062 6062 6062
mult_behav_1_w16 215672 1000 13551 13551 13551
mult_behav_1_w32 764479 1000 26485 26485 26485
mult_behav_1_w64 2891332 1000 52332 52332 52332
mult_seq_stream_w8 44320 1000 4518 36144 36144
mult_seq_stream_w16 78395 1000 8868 141888 141888
mult_seq_stream_w32 153863 1000 16361 523552 523552
mult_seq_stream_w64 304047 1000 30276 1937664 1937664
mult_seq_wfront_w8 45390 1000 3132 50112 50112
mult_seq_wfront_w16 89668 1000 3260 104320 104320
mult_seq_wfront_w32 178367 1000 4202 268928 268928
mult_seq_wfront_w64 345415 1000 4716 603648 603648
mult_seq_wfront_opt_w8 47575 1000 2428 38848 38848
mult_seq_wfront_opt_w16 94652 1000 2275 72800 72800
mult_seq_wfront_opt_w32 177706 1000 2546 162944 162944
mult_seq_wfront_opt_w64 345301 1000 2724 348672 348672
mult_pipe_wfront_w8 239827 1000 993 7944 7944
mult_pipe_wfront_w16 1012675 1000 1173 18768 18768
mult_pipe_wfront_w32 4158007 1000 1512 48384 48384
mult_pipe_wfront_w64 16865186 1000 2256 144384 144384
mult_seq_csa_w8 80488 1000 9266 74128 74128
mult_seq_csa_w16 162743 1000 16580 265280 265280
mult_seq_csa_w32 343497 1000 31074 994368 994368
mult_seq_csa_w64 715059 1000 60431 3867584 3867584
mult_pipe1_w8 137822 1000 3859 30872 3859
mult_pipe1_w16 571541 1000 7499 119984 7499
mult_pipe1_w32 2325284 1000 14746 471872 14746
mult_pipe1_w64 9397076 1000 28722 1838208 28722
mult_seq_dm_w16_m1 121611 1000 16360 261760 261760
mult_seq_dm_w16_m2 131048 1000 16730 133840 133840
mult_seq_dm_w16_m4 145285 1000 16704 66816 66816
mult_seq_dm_w16_m8 196118 1000 15161 30322 30322
mult_seq_dm_w32_m1 246818 1000 31113 995616 995616
mult_seq_dm_w32_m2 279486 1000 30994 495904 495904
mult_seq_dm_w32_m4 314724 1000 32127 257016 257016
mult_seq_dm_w32_m8 408659 1000 31251 125004 125004
mult_seq_wfront_m_w16_m1 97631 1000 3500 112000 112000
mult_seq_wfront_m_w16_m2 104787 1000 3658 58528 58528
mult_seq_wfront_m_w16_m4 130378 1000 3942 31536 31536
mult_seq_wfront_m_w16_m8 171258 1000 6417 25668 25668
mult_seq_wfront_m_w32_m1 191334 1000 3766 241024 241024
mult_seq_wfront_m_w32_m2 205303 1000 3857 123424 123424
mult_seq_wfront_m_w32_m4 260182 1000 5266 84256 84256
mult_seq_wfront_m_w32_m8 351910 1000 7031 56248 56248
`endif
cadence
program reactivate
(output uwire clk_reactive, output int cycle_reactive,
input uwire clk, input var int cycle);
assign clk_reactive = clk;
assign cycle_reactive = cycle;
endprogram
module testbench;
localparam int w = 16;
localparam int num_tests = 1000;
localparam int NUM_MULT = 20;
localparam int err_limit = 7;
bit use_others;
logic start;
logic [w-1:0] plier, cand;
logic [w-1:0] plierp, candp;
logic [2*w-1:0] prod[NUM_MULT];
logic ready[NUM_MULT];
typedef struct
{ int idx; int err_count = 0;
bit seq = 0;
bit pipe = 0; bit wf = 0; bit st_rdy = 0;
int deg = 1;
logic [2*w-1:0] sout = 'h111; int cyc_tot = 0;
int latency = 0;
} Info;
Info pi[string];
localparam int cycle_limit = num_tests * w * 8;
int cycle;
bit done;
logic clock;
logic clk_reactive;
int cycle_reactive;
reactivate ra(clk_reactive,cycle_reactive,clock,cycle);
initial begin
clock = 0;
cycle = 0;
fork
forever #10 cycle += clock++;
wait( done );
wait( cycle >= cycle_limit )
$write("*** Cycle limit exceeded, ending.\n");
join_any;
$finish();
end
initial begin
while ( !done ) @( posedge clk_reactive ) #1
if ( use_others ) begin
start = 1;
plierp = plier;
candp = cand;
use_others = 0;
end else begin
start = 0;
plierp = cycle;
candp = 256;
end
end
task pi_seq(input int idx, input string name, input int deg);
automatic string m = $sformatf("%s Deg %0d", name, deg);
pi[m].deg = deg;
pi[m].idx = idx; pi[m].seq = 1;
endtask
task pi_seqs(input int idx, input string name, input int deg);
automatic string m = $sformatf("%s Deg %0d", name, deg);
pi[m].deg = deg;
pi[m].idx = idx; pi[m].seq = 1; pi[m].st_rdy = 1;
endtask
task pi_seqw(input int idx, input string name, input int deg);
automatic string m = $sformatf("%s Deg %0d", name, deg);
pi[m].deg = deg;
pi[m].idx = idx; pi[m].seq = 1; pi[m].wf = 1;
endtask
task pi_seqws(input int idx, input string name, input int deg);
automatic string m = $sformatf("%s Deg %0d", name, deg);
pi[m].deg = deg;
pi[m].idx = idx; pi[m].seq = 1; pi[m].wf = 1; pi[m].st_rdy = 1;
endtask
task pi_pipe(input int idx, input string name, input int deg);
automatic string m = $sformatf("%s Deg %0d", name, deg);
pi[m].deg = deg;
pi[m].idx = idx; pi[m].seq = 1; pi[m].pipe = 1;
endtask
mult_behav_1 #(w) mb1(prod[0], plier, cand);
initial pi["Behavioral"].idx = 0;
mult_bfas #(w) mb17(prod[17], plier, cand);
initial pi["Flat BFAs"].idx = 17;
mult_linear #(w) ms1(prod[1], plier, cand);
initial pi["Linear"].idx = 1;
mult_tree #(w) ms2(prod[2], plier, cand);
initial pi["Tree"].idx = 2;
mult_tree_bfas #(w) ms18(prod[18], plier, cand);
initial pi["Tree BFAs"].idx = 18;
mult_seq_sr #(w) ms19(prod[19], ready[19], plier, cand, start, clock);
initial pi_seqs(19,"Seq SR Simple",1);
mult_seq #(w) ms3(prod[3], plier, cand, clock);
initial begin
automatic string m = "Sequential";
pi[m].idx = 3; pi[m].seq = 1;
end
mult_seq_stream_sr #(w) mss1(prod[4], ready[4], plier, cand, start, clock);
initial pi_seqs(4,"Seq SR Streamlined",1);
mult_seq_wfront_sr #(w) ms7(prod[7], ready[7], plier, cand, start, clock);
initial pi_seqws(7,"WFront SR", 1);
mult_seq_wfront_opt #(w) ms8(prod[8], plier, cand, clock);
initial pi_seqw(8,"WFront-Opt", 1);
mult_seq_wfront_m #(w,2) ms12(prod[12], plier, cand, clock);
initial pi_seqw(12,"WFront", ms12.m);
mult_seq_wfront_m #(w,5) ms13(prod[13], plier, cand, clock);
initial pi_seqw(13,"WFront", ms13.m);
mult_seq_m_sr #(w,4) ms44(prod[5], ready[5], plier, cand, start, clock);
initial pi_seqs(5,"Seq SR", ms44.m);
mult_seq_m #(w,3) ms43(prod[6], plier, cand, clock);
initial pi_seq(6,"Seq", ms43.m);
mult_seq_dm #(w,1) msd14(prod[14], plier, cand, clock);
initial pi_seq(14,"Seq Rad", msd14.m);
mult_seq_dm #(w,2) msd16(prod[16], plier, cand, clock);
initial pi_seq(16,"Seq Rad", msd16.m);
mult_seq_dm #(w,4) msd44(prod[9], plier, cand, clock);
initial pi_seq(9,"Seq Rad", ms44.m);
mult_seq_dm #(w,3) msd43(prod[10], plier, cand, clock);
initial pi_seq(10,"Seq Rad", ms43.m);
mult_linear_clk #(w) mlc1(prod[15], plier, cand, clock);
initial begin
automatic string m = $sformatf("Linear Clock");
pi[m].idx = 15; pi[m].seq = 1;
end
int tests[$] = {1,32, 1,1, 1,2, 1,32, 32, 1};
initial begin
done = 0;
use_others = 0;
start = 1;
#0 begin
string index_used[NUM_MULT];
automatic int n_unused = 0, n_reused = 0;
foreach ( pi[ mut ] ) begin
automatic int idx = pi[mut].idx;
if ( index_used[idx].len() )
begin
$write("*** Index %0d used by %s and %s.\n",
idx, index_used[idx], mut );
n_reused++;
end
index_used[idx] = mut;
end
$write("Unused positions: ");
foreach ( index_used[idx] )
if ( index_used[idx].len() == 0 )
$write("%s%0d", n_unused++ ? ", " : "", idx);
$write("%s.\n",n_unused ? "" : "none -- all used");
if ( n_reused )
$fatal(2, "\nFound %0d re-used indices. Aborting simulation.\n\n",
n_reused);
end
@( posedge clk_reactive );
plier = 1;
cand = 1;
use_others = 1;
wait ( cycle_reactive >= 3*w );
@( posedge clk_reactive );
for ( int i=0; i<num_tests; i++ ) begin
automatic int cyc_start = cycle;
automatic int awaiting = pi.num();
plier = tests.size() ? tests.pop_front() : $random();
cand = tests.size() ? tests.pop_front() : $random();
plierp = plier;
candp = cand;
use_others = 1;
foreach ( pi[muti] ) begin
automatic string mut = muti; automatic Info p = pi[mut];
fork begin
automatic int arrival_late = 1;
automatic int steps = ( w + p.deg - 1 ) / p.deg;
automatic int latency
= !p.seq ? arrival_late + 1 :
p.wf ? 5 * steps :
!p.pipe ? 2 * steps : arrival_late + steps;
automatic int eta = 1 + cyc_start + latency;
automatic int idx = p.idx;
pi[mut].latency = latency;
if ( p.st_rdy ) begin
if ( !ready[idx] ) begin
$write("Expected %s to be ready.\n",mut);
$finish(2);
end
wait ( !ready[idx] );
wait ( ready[idx] );
end else begin
wait ( cycle_reactive == eta );
end
awaiting--;
pi[mut].sout = prod[pi[mut].idx];
pi[mut].cyc_tot += cycle - cyc_start;
end join_none;
end
wait ( awaiting == 0 );
foreach ( pi[ mut ] )
if ( prod[0] !== pi[mut].sout ) begin
pi[mut].err_count++;
if ( pi[mut].err_count < 5 )
$write
("%-25s wrong result: %0d * %0d: 0x%0h != 0x%0h (correct)\n",
mut, plier, cand, pi[mut].sout, prod[0]);
end
@( posedge clk_reactive );
end
foreach ( pi[ mut ] )
$write("Ran %4d tests for %-25s, %4d errors found. Avg cyc %.1f\n",
num_tests, mut, pi[mut].err_count,
pi[mut].seq ? real'(pi[mut].cyc_tot) / num_tests : 1);
done = 1;
$finish(2);
end
endmodule
cadence