299
`default_nettype none
module try_mult
#( int wsig = 23, wexp = 8, ieee = 1, wf = 1 + wexp + wsig )
( output uwire [wf-1:0] result,
output uwire ready,
input uwire [wf-1:0] v0, v1,
input uwire start, clk);
localparam logic [2:0] rm = 0; uwire [7:0] mul_s1;
CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m00( .a(v0), .b(v1), .rnd(rm), .z(result), .status(mul_s1));
assign ready = 1;
endmodule
module try_sq
#( int wsig = 23, wexp = 8, ieee = 1, wf = 1 + wexp + wsig )
( output uwire [wf-1:0] result,
output uwire ready,
input uwire [wf-1:0] v0,
input uwire start, clk);
try_mult #(wsig,wexp,ieee) tm( result, ready, v0, v0, start, clk);
endmodule
module try_add
#( int wsig = 23, wexp = 8, ieee = 1, wf = 1 + wexp + wsig )
( output uwire [wf-1:0] result,
output uwire ready,
input uwire [wf-1:0] v0, v1,
input uwire start, clk );
localparam logic [2:0] rm = 0; uwire [7:0] add_s1;
CW_fp_add #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
a00( .a(v0), .b(v1), .rnd(rm), .z(result), .status(add_s1));
assign ready = 1;
endmodule
cadencemodule ms_functional
( output real mag, input real v0, v1 );
localparam string name = "Func";
always_comb mag = v0 * v0 + v0 * v1 + v1 * v1;
endmodule
cadence
module ms_comb
#( int wsig = 23, wexp = 8, ieee = 1, wf = 1 + wexp + wsig )
( output uwire [wf-1:0] result,
output uwire ready,
input uwire [wf-1:0] v0, v1,
input uwire start, clk);
cadence localparam string name = "Comb";
cadence
localparam int nstages = 1;
localparam logic [2:0] rm = 0;
uwire [7:0] mul_s1, mul_s2, mul_s3, a_s1, a_s2;
uwire [wf-1:0] v00, v01, v11, s1;
CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m00( .a(v0), .b(v0), .rnd(rm), .z(v00), .status(mul_s1));
CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m01( .a(v0), .b(v1), .rnd(rm), .z(v01), .status(mul_s2));
CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m11( .a(v1), .b(v1), .rnd(rm), .z(v11), .status(mul_s3));
CW_fp_add #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
a1(.a(v00), .b(v11), .rnd(rm), .z(s1), .status(a_s1));
CW_fp_add #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
a2(.a(s1), .b(v01), .rnd(rm), .z(result), .status(a_s2));
assign ready = 1;
endmodule
module ms_seq
#( int wsig = 23, wexp = 8, ieee = 1, wf = 1 + wexp + wsig )
( output logic [wf-1:0] result, output logic ready,
input uwire [wf-1:0] v0, v1, input uwire start, clk );
cadence localparam string name = "Seq";
cadence
uwire [7:0] mul_s, add_s; uwire [wf-1:0] mul_a, mul_b, add_a, add_b, prod, sum;
logic [2:0] step;
logic [wf-1:0] ac0, ac1;
localparam int last_step = 4;
always_ff @( posedge clk )
if ( start ) step <= 0;
else if ( step < last_step ) step <= step + 1;
localparam logic [2:0] rm = 0; CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m1( .z(prod), .a(mul_a), .b(mul_b), .rnd(rm), .status(mul_s));
CW_fp_add #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
a1( .z(sum), .a(add_a), .b(add_b), .rnd(rm), .status(add_s));
assign mul_a = step < 2 ? v0 : v1;
assign mul_b = step == 0 ? v0 : v1;
assign add_a = ac0, add_b = ac1;
always_ff @( posedge clk )
begin
ac0 <= prod;
if ( step < 3 ) ac1 <= step ? sum : 0;
if ( start ) ready <= 0; else if ( step == last_step-1 ) ready <= 1;
end
assign result = sum;
endmodule
module ms_pipe
#( int wsig = 23, wexp = 8, ieee = 1, wf = 1 + wexp + wsig )
( output uwire [wf-1:0] result,
output uwire ready,
input uwire [wf-1:0] v0, v1,
input uwire start, clk);
cadence localparam string name = "Pipe";
cadence
localparam int nstages = 4;
localparam logic [2:0] rm = 0;
uwire [7:0] mul_s1, mul_s2, mul_s3, a_s1, a_s2;
uwire [wf-1:0] v00, v01, v11, s1, s2;
logic [wf-1:0] pl_1_v00, pl_1_v01, pl_1_v11;
logic [wf-1:0] pl_2_v0001, pl_2_v11;
logic [wf-1:0] pl_3_sum;
logic pl_1_occ, pl_2_occ, pl_3_occ;
CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m00( .z(v00), .a(v0), .b(v0), .rnd(rm), .status(mul_s1));
CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m01( .z(v01), .a(v0), .b(v1), .rnd(rm), .status(mul_s2));
CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m11( .z(v11), .a(v1), .b(v1), .rnd(rm), .status(mul_s3));
CW_fp_add #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
a1(.z(s1), .a(pl_1_v00), .b(pl_1_v01), .rnd(rm), .status(a_s1));
CW_fp_add #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
a2(.z(s2), .a(pl_2_v0001), .b(pl_2_v11), .rnd(rm), .status(a_s2));
assign ready = pl_3_occ;
assign result = pl_3_sum;
always_ff @( posedge clk ) begin
pl_1_v00 <= v00;
pl_1_v01 <= v01;
pl_1_v11 <= v11;
pl_1_occ <= start;
pl_2_v0001 <= s1;
pl_2_v11 <= pl_1_v11;
pl_2_occ <= pl_1_occ;
pl_3_sum <= s2;
pl_3_occ <= pl_2_occ;
end
endmodule
cadencemodule m1_functional
( output real mag,
input real v0, v1 );
localparam string name = "One Func";
always_comb mag = v0 + v0 * v1 + v1 * v1;
endmodule
cadence
module m1_comb
#( int wsig = 23, wexp = 8, ieee = 1, wf = 1 + wexp + wsig )
( output uwire [wf-1:0] result,
output uwire ready,
input uwire [wf-1:0] v0, v1,
input uwire start, clk);
cadence localparam string name = "One Comb";
cadence
localparam int nstages = 1;
localparam logic [2:0] rm = 0;
uwire [7:0] mul_s1, mul_s2, mul_s3, a_s1, a_s2;
uwire [wf-1:0] v00, v01, v11, s1;
CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m01( .a(v0), .b(v1), .rnd(rm), .z(v01), .status(mul_s2));
CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m11( .a(v1), .b(v1), .rnd(rm), .z(v11), .status(mul_s3));
CW_fp_add #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
a1(.a(v0), .b(v11), .rnd(rm), .z(s1), .status(a_s1));
CW_fp_add #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
a2(.a(s1), .b(v01), .rnd(rm), .z(result), .status(a_s2));
assign ready = 1;
endmodule
module m1_seq
#( int wsig = 23, wexp = 8, ieee = 1, wf = 1 + wexp + wsig )
( output logic [wf-1:0] result, output logic ready,
input uwire [wf-1:0] v0, v1, input uwire start, clk );
cadence localparam string name = "One Seq";
cadence
uwire [7:0] mul_s, add_s; uwire [wf-1:0] mul_a, mul_b, add_a, add_b, prod, sum;
logic [2:0] step;
logic [wf-1:0] ac0, ac1;
localparam int last_step = 4;
always_ff @( posedge clk )
if ( start ) step <= 0;
else if ( step < last_step ) step <= step + 1;
localparam logic [2:0] rm = 0; CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m1( .z(prod), .a(mul_a), .b(mul_b), .rnd(rm), .status(mul_s));
CW_fp_add #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
a1( .z(sum), .a(add_a), .b(add_b), .rnd(rm), .status(add_s));
localparam logic [wf-1:0] one = { ( 1 << wexp - 1 ) - 1, wsig'(0) };
assign mul_a = step < 2 ? v0 : v1;
assign mul_b = step == 0 ? one : v1;
assign add_a = ac0, add_b = ac1;
always_ff @( posedge clk )
begin
ac0 <= prod;
if ( step < 3 ) ac1 <= step ? sum : 0;
if ( start ) ready <= 0; else if ( step == last_step-1 ) ready <= 1;
end
assign result = sum;
endmodule
module m1_pipe
#( int wsig = 23, wexp = 8, ieee = 1, wf = 1 + wexp + wsig )
( output uwire [wf-1:0] result,
output uwire ready,
input uwire [wf-1:0] v0, v1,
input uwire start, clk);
cadence localparam string name = "One Pipe";
cadence
localparam int nstages = 4;
localparam logic [2:0] rm = 0;
uwire [7:0] mul_s1, mul_s2, mul_s3, a_s1, a_s2;
uwire [wf-1:0] v00, v01, v11, s1, s2;
logic [wf-1:0] pl_1_v00, pl_1_v01, pl_1_v11;
logic [wf-1:0] pl_2_v0001, pl_2_v11;
logic [wf-1:0] pl_3_sum;
logic pl_1_occ, pl_2_occ, pl_3_occ;
CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m01( .z(v01), .a(v0), .b(v1), .rnd(rm), .status(mul_s2));
CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m11( .z(v11), .a(v1), .b(v1), .rnd(rm), .status(mul_s3));
CW_fp_add #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
a1(.z(s1), .a(pl_1_v00), .b(pl_1_v01), .rnd(rm), .status(a_s1));
CW_fp_add #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
a2(.z(s2), .a(pl_2_v0001), .b(pl_2_v11), .rnd(rm), .status(a_s2));
assign ready = pl_3_occ;
assign result = pl_3_sum;
always_ff @( posedge clk ) begin
pl_1_v00 <= v0;
pl_1_v01 <= v01;
pl_1_v11 <= v11;
pl_1_occ <= start;
pl_2_v0001 <= s1;
pl_2_v11 <= pl_1_v11;
pl_2_occ <= pl_1_occ;
pl_3_sum <= s2;
pl_3_occ <= pl_2_occ;
end
endmodule
cadence
function automatic real rand_real(real minv, real maxv);
rand_real = minv + ( maxv - minv ) * ( real'({$random}) ) / 2.0**32;
endfunction
function automatic real fabs(real val);
fabs = val < 0 ? -val : val;
endfunction
virtual class conv #(int wexp=6, wsig=10);
localparam int w = 1 + wexp + wsig;
localparam int bias_r = ( 1 << 11 - 1 ) - 1;
localparam int w_sig_r = 52;
localparam int w_exp_r = 11;
localparam int bias_h = ( 1 << wexp - 1 ) - 1;
static function logic [w-1:0] rtof( real r );
logic [wsig-1:0] sig_f;
logic [w_sig_r-wsig-1:0] sig_x;
logic [w_exp_r-1:0] exp_r;
logic sign_r;
{ sign_r, exp_r, sig_f, sig_x } = $realtobits(r);
rtof = !r ? 0 : { sign_r, wexp'( exp_r + bias_h - bias_r ), sig_f };
endfunction
static function real ftor( logic [w-1:0] f );
ftor = !f ? 0.0
: $bitstoreal
( { f[w-1],
w_exp_r'( bias_r + f[w-2:wsig] - bias_h ),
f[wsig-1:0], (w_sig_r-wsig)'(0) } );
endfunction
endclass
program reactivate
(output uwire clk_reactive, output int cycle_reactive,
input uwire clk, input var int cycle);
assign clk_reactive = clk;
assign cycle_reactive = cycle;
endprogram
module testbench;
localparam int npsets = 4; localparam int pset[npsets][2] =
'{ { 7, 0 }, { 23, 0}, {7, 1 }, {23, 1} };
int n_err_shown; int n_err_sh_nc, n_err_sh_nw, n_err_sh_avg, n_err_sh_state;
initial begin
n_err_sh_nc = 0;
n_err_sh_nw = 0;
n_err_sh_avg = 0;
n_err_sh_state = 0;
end
int t_errs; initial begin t_errs = 0; n_err_shown = 0; end
final $write("Total number of errors: %0d\n",t_errs);
uwire d[npsets:-1]; assign d[-1] = 1;
for ( genvar i=0; i<npsets; i++ )
testbench_n #(pset[i][0],pset[i][1]) t2( .done(d[i]), .tstart(d[i-1]) );
endmodule
module testbench_n
#( int w_sig = 7, use_one = 0 )
( output logic done, input uwire tstart );
typedef enum { MT_comb, MT_seq, MT_pipe } Module_Type;
localparam int w_exp = 8;
localparam int wid = w_sig + w_exp + 1;
localparam int max_latency = 10;
localparam int num_tests = 16;
localparam int nmuts = 10;
int err[nmuts];
uwire [wid-1:0] mag[nmuts];
uwire ready[nmuts];
real magr;
real vr[2];
logic [wid-1:0] v[2], vp[2];
logic start;
typedef struct
{
int idx;
int err_count = 0;
int ncyc = 0;
Module_Type mt = MT_comb;
logic [wid-1:0] sout = 'h111;
int cyc_tot = 0;
} Info;
Info pi[string];
localparam int cycle_limit = num_tests * max_latency * 4;
int cycle, cyc_start;
logic clock;
bit use_others;
logic clk_reactive;
int cycle_reactive;
reactivate ra(clk_reactive,cycle_reactive,clock,cycle);
task pi_seq(input int idx, input string name);
automatic string m = $sformatf("%s", name);
pi[m].idx = idx; pi[m].mt = MT_seq;
endtask
task pi_comb(input int idx, input string name);
automatic string m = $sformatf("%s", name);
pi[m].idx = idx; pi[m].mt = MT_comb;
endtask
task pi_pipe(input int idx, input string name, input int ncyc);
automatic string m = $sformatf("%s", name);
pi[m].idx = idx; pi[m].mt = MT_pipe;
pi[m].ncyc = ncyc;
endtask
initial begin
clock = 0;
cycle = 0;
done = 0;
wait( tstart );
fork
while ( !done ) #10 cycle += clock++;
wait( done );
wait( cycle >= cycle_limit )
$write("*** Cycle limit exceeded, ending.\n");
join_any;
done = 1;
end
if ( use_one ) begin
m1_functional mf( magr, vr[0], vr[1] );
m1_seq #( .wsig(w_sig), .wexp(w_exp), .ieee(0) )
m2( mag[1], ready[1], v[0],v[1], start, clock );
initial begin pi_seq(1,m2.name); end
m1_comb #( .wsig(w_sig), .wexp(w_exp), .ieee(0) )
m5r( mag[5], ready[5], v[0],v[1], start, clock );
initial begin pi_comb(5,m5r.name); end
m1_pipe #( .wsig(w_sig), .wexp(w_exp), .ieee(0) )
m3( mag[3], ready[3], vp[0],vp[1], start, clock );
initial begin pi_pipe(3,m3.name,m3.nstages); end
end else begin
ms_functional mf( magr, vr[0], vr[1] );
ms_seq #( .wsig(w_sig), .wexp(w_exp), .ieee(0) )
m2( mag[1], ready[1], v[0],v[1], start, clock );
initial begin pi_seq(1,m2.name); end
ms_comb #( .wsig(w_sig), .wexp(w_exp), .ieee(0) )
m5r( mag[5], ready[5], v[0],v[1], start, clock );
initial begin pi_comb(5,m5r.name); end
ms_pipe #( .wsig(w_sig), .wexp(w_exp), .ieee(0) )
m3( mag[3], ready[3], vp[0],vp[1], start, clock );
initial begin pi_pipe(3,m3.name,m3.nstages); end
end
initial begin
while ( !done ) @( posedge clk_reactive ) #2
if ( use_others ) begin
vp = v;
use_others = 0;
start = 1;
end else begin
vp[0] = conv#(w_exp,w_sig)::rtof( real'(cycle-cyc_start) );
vp[1] = cycle - cyc_start;
start = 0;
end
end
initial begin
automatic int n_err = 0;
use_others = 0;
start = 0;
@( posedge clk_reactive );
for ( int i=0; i<num_tests; i++ ) begin
automatic int awaiting = pi.num();
cyc_start = cycle;
if ( i < 4 ) begin
for ( int j=0; j<2; j++ ) vr[j] = i & 1 << j ? 1.0 : 0.0;
end else begin
for ( int j=0; j<2; j++ ) vr[j] = rand_real(-10,+10);
end
for ( int j=0; j<2; j++ ) v[j] = conv#(w_exp,w_sig)::rtof(vr[j]);
vp = v;
use_others = 1;
foreach ( pi[muti] ) begin
automatic string mut = muti;
fork begin
if ( pi[mut].mt == MT_seq ) begin
wait ( !ready[pi[mut].idx] );
wait ( ready[pi[mut].idx] );
end else begin
automatic int latency =
pi[mut].mt == MT_comb ? 1 : pi[mut].ncyc;
automatic int eta = cyc_start + latency;
wait ( cycle_reactive == eta );
end
awaiting--;
pi[mut].sout = mag[pi[mut].idx];
pi[mut].cyc_tot += cycle - cyc_start;
end join_none;
end
wait ( awaiting == 0 );
foreach ( pi[ mut ] ) begin
automatic real mmagr = conv#(w_exp,w_sig)::ftor(pi[mut].sout);
automatic real err_mag =
fabs( mmagr - magr ) / fabs( magr ? magr : 1 );
localparam real tol = real'(4) / ( 1 << w_sig );
automatic bit okay = err_mag < tol;
if ( !okay ) begin
pi[mut].err_count++;
n_err++;
if ( pi[mut].err_count < 5 )
$write("%s (%0d) test #%0d vec (%.1f,%.1f) error: h'%8h %7.4f != %7.4f (correct)\n",
mut, w_sig, i, vr[1], vr[0],
pi[mut].sout, mmagr, magr);
end
end
while ( {$random} & 1 == 1 ) @( posedge clk_reactive );
end
foreach ( pi[ mut ] )
$write("Ran %4d tests for (%0d) %-0s, %4d errors found. Avg cyc %.1f\n",
num_tests, w_sig, mut,
pi[mut].err_count,
pi[mut].mt == MT_comb ? 1 : real'(pi[mut].cyc_tot) / num_tests);
done = 1;
testbench.t_errs += n_err;
end
endmodule
`define SIMULATION_ON
cadence
`default_nettype wire
`ifdef SIMULATION_ON
`include "/apps/linux/cadence/GENUS211/share/synth/lib/chipware/sim/verilog/CW/CW_fp_mult.v"
`include "/apps/linux/cadence/GENUS211/share/synth/lib/chipware/sim/verilog/CW/CW_fp_add.v"
`else
`include "/apps/linux/cadence/GENUS211/share/synth/lib/chipware/syn/CW/CW_fp_mult.v"
`include "/apps/linux/cadence/GENUS211/share/synth/lib/chipware/syn/CW/CW_fp_add.v"
`endif