`default_nettype none
cadencemodule ms_functional
( output shortreal mag,
input shortreal v0, v1 );
always_comb mag = v0 * v0 + v0 * v1 + v1 * v1;
endmodule
cadence
module ms_comb
#( int wsig = 23, wexp = 8, ieee = 1, wf = 1 + wexp + wsig )
( output uwire [wf-1:0] result,
output uwire ready,
input uwire [wf-1:0] v0, v1,
input uwire start, clk);
localparam int nstages = 1;
localparam logic [2:0] rm = 0;
uwire [7:0] mul_s1, mul_s2, mul_s3, a_s1, a_s2;
uwire [wf-1:0] v00, v01, v11, s1;
CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m00( .z(v00), .a(v0), .b(v0), .rnd(rm), .status(mul_s1) );
CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m01( .z(v01), .a(v0), .b(v1), .rnd(rm), .status(mul_s2) );
CW_fp_mult #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
m11( .z(v11), .a(v1), .b(v1), .rnd(rm), .status(mul_s3) );
CW_fp_add #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
a1( .z(s1), .a(v00), .b(v11), .rnd(rm), .status(a_s1) );
CW_fp_add #( .sig_width(wsig), .exp_width(wexp), .ieee_compliance(ieee) )
a2( .z(result), .a(s1), .b(v01), .rnd(rm), .status(a_s2) );
assign ready = 1;
endmodule
module ms_seq_live
( output uwire [31:0] result,
output uwire ready,
input uwire [31:0] v0, v1,
input uwire start, clk);
uwire [7:0] mul_s, add_s;
uwire [31:0] mul_a, mul_b;
uwire [31:0] add_a, add_b;
uwire [31:0] prod, sum;
logic [31:0] ac0, ac1;
logic [2:0] step;
localparam int last_step = 1; localparam logic [2:0] rm = 0;
always_ff @( posedge clk )
if ( start ) step <= 0; else if ( step < last_step ) step <= step + 1;
assign ready = step == last_step;
CW_fp_mult m1( .z(prod), .a(mul_a), .b(mul_b), .rnd(rm), .status(mul_s) );
CW_fp_add a1( .z(sum), .a(add_a), .b(add_b), .rnd(rm), .status(add_s) );
endmodule
module ms_pipe_live
( output logic [31:0] result, output uwire ready,
input uwire [31:0] v0, v1, input uwire start, clk);
localparam int nstages = 3;
localparam logic [2:0] rm = 0;
uwire [7:0] mul_s, add_s;
uwire [31:0] mul_a, mul_b;
uwire [31:0] add_a, add_b;
uwire [31:0] prod, sum;
CW_fp_mult m1( .z(prod), .a(mul_a), .b(mul_b), .rnd(rm), .status(mul_s) );
CW_fp_add a1( .z(sum), .a(add_a), .b(add_b), .rnd(rm), .status(add_s) );
endmodule
module ms_seq
( output logic [31:0] result,
output logic ready,
input uwire [31:0] v0, v1,
input uwire start,
input uwire clk );
localparam logic [2:0] rnd = 0;
uwire [7:0] mul_s, add_s;
logic [2:0] step;
uwire [31:0] prod, sum;
logic [31:0] ac0, ac1;
localparam int last_step = 4;
always_ff @( posedge clk )
if ( start ) step <= 0;
else if ( step < last_step ) step <= step + 1;
uwire [31:0] mul_a = step < 2 ? v0 : v1;
uwire [31:0] mul_b = step == 0 ? v0 : v1;
uwire [31:0] add_a = ac0, add_b = ac1;
CW_fp_mult m1( .z(prod), .a(mul_a), .b(mul_b), .rnd(rnd), .status(mul_s) );
CW_fp_add a1( .z(sum), .a(add_a), .b(add_b), .rnd(rnd), .status(add_s) );
always_ff @( posedge clk )
begin
ac0 <= prod;
case ( step )
0: ac1 <= 0;
1: ac1 <= sum;
2: ac1 <= sum;
endcase
if ( start ) ready <= 0; else if ( step == last_step-1 ) ready <= 1;
end
assign result = sum;
endmodule
module ms_pipe
( output uwire [31:0] result,
output uwire ready,
input uwire [31:0] v0, v1,
input uwire start, clk);
localparam int nstages = 3;
localparam logic [2:0] rm = 0;
uwire [7:0] mul_s1, mul_s2, mul_s3, a_s1, a_s2;
uwire [31:0] v00, v01, v11, s1, s2;
logic [31:0] pl_1_v00, pl_1_v01, pl_1_v11;
logic [31:0] pl_2_v0001, pl_2_v11;
logic [31:0] pl_3_sum;
logic pl_1_occ, pl_2_occ, pl_3_occ;
CW_fp_mult m00( .z(v00), .a(v0), .b(v0), .rnd(rm), .status(mul_s1) );
CW_fp_mult m01( .z(v01), .a(v0), .b(v1), .rnd(rm), .status(mul_s2) );
CW_fp_mult m11( .z(v11), .a(v1), .b(v1), .rnd(rm), .status(mul_s3) );
CW_fp_add a1( .z(s1), .a(pl_1_v00), .b(pl_1_v01), .rnd(rm), .status(a_s1) );
CW_fp_add a2( .z(s2), .a(pl_2_v0001), .b(pl_2_v11), .rnd(rm), .status(a_s2) );
assign ready = pl_3_occ;
assign result = pl_3_sum;
always_ff @( posedge clk ) begin
pl_1_v00 <= v00;
pl_1_v01 <= v01;
pl_1_v11 <= v11;
pl_1_occ <= start;
pl_2_v0001 <= s1;
pl_2_v11 <= pl_1_v11;
pl_2_occ <= pl_1_occ;
pl_3_sum <= s2;
pl_3_occ <= pl_2_occ;
end
endmodule
cadence
function automatic real rand_real(real minv, real maxv);
rand_real = minv + ( maxv - minv ) * ( real'({$random}) ) / 2.0**32;
endfunction
function automatic shortreal fabs(shortreal val);
fabs = val < 0 ? -val : val;
endfunction
program reactivate
(output uwire clk_reactive, output int cycle_reactive,
input uwire clk, input var int cycle);
assign clk_reactive = clk;
assign cycle_reactive = cycle;
endprogram
module testbench();
typedef enum { MT_comb, MT_seq, MT_pipe } Module_Type;
localparam int wid = 32;
localparam int max_latency = 10;
localparam int num_tests = 16;
localparam int nmuts = 10;
int err[nmuts];
uwire [31:0] mag[nmuts];
uwire ready[nmuts];
shortreal magr;
shortreal vr[2];
logic [31:0] v[2], vp[2];
logic start;
typedef struct
{
int idx;
int err_count = 0;
int ncyc = 0;
Module_Type mt = MT_comb;
logic [wid-1:0] sout = 'h111;
int cyc_tot = 0;
int latency = 0;
} Info;
Info pi[string];
localparam int cycle_limit = num_tests * max_latency * 4;
int cycle, cyc_start;
bit done;
logic clock;
bit use_others;
logic clk_reactive;
int cycle_reactive;
reactivate ra(clk_reactive,cycle_reactive,clock,cycle);
task pi_seq(input int idx, input string name);
automatic string m = $sformatf("%s", name);
pi[m].idx = idx; pi[m].mt = MT_seq;
endtask
task pi_pipe(input int idx, input string name, input int ncyc);
automatic string m = $sformatf("%s", name);
pi[m].idx = idx; pi[m].mt = MT_pipe;
pi[m].ncyc = ncyc;
endtask
initial begin
clock = 0;
cycle = 0;
fork
forever #10 begin
cycle += clock++;
end
wait( done );
wait( cycle >= cycle_limit )
$write("*** Cycle limit exceeded, ending.\n");
join_any;
$finish();
end
ms_functional mf( magr, vr[0], vr[1] );
ms_seq m2( mag[1], ready[1], v[0],v[1], start, clock );
initial begin pi_seq(1,"Seq."); end
ms_seq_live m2r( mag[2], ready[2], v[0],v[1], start, clock );
initial begin pi_seq(2,"Seq. Live"); end
ms_pipe m3( mag[3], ready[3], vp[0],vp[1], start, clock );
initial begin pi_pipe(3,"Pipe",m3.nstages); end
ms_pipe_live m4( mag[4], ready[4], vp[0],vp[1], start, clock );
initial begin pi_pipe(4,"Pipe Live",m4.nstages); end
initial begin
while ( !done ) @( posedge clk_reactive ) #2
if ( use_others ) begin
vp = v;
use_others = 0;
start = 1;
end else begin
vp[0] = $shortrealtobits(shortreal'(cycle-cyc_start));
vp[1] = cycle - cyc_start;
start = 0;
end
end
initial begin
automatic int tot_errors = 0;
done = 0;
use_others = 0;
start = 0;
@( posedge clk_reactive );
for ( int i=0; i<num_tests; i++ ) begin
automatic int awaiting = pi.num();
cyc_start = cycle;
if ( i < 4 ) begin
for ( int j=0; j<2; j++ ) vr[j] = i & 1 << j ? 1.0 : 0.0;
end else begin
for ( int j=0; j<2; j++ ) vr[j] = rand_real(-10,+10);
end
for ( int j=0; j<2; j++ ) v[j] = $shortrealtobits(vr[j]);
vp = v;
use_others = 1;
foreach ( pi[muti] ) begin
automatic string mut = muti; automatic Info p = pi[mut];
fork begin
automatic int steps = pi[mut].ncyc;
automatic int latency =
pi[mut].mt == MT_comb ? 1 :
pi[mut].mt == MT_seq ? 2 : steps;
automatic int eta = 1 + cyc_start + latency;
pi[mut].latency = latency;
wait ( cycle_reactive == eta );
if ( pi[mut].mt == MT_seq ) wait( ready[pi[mut].idx] );
awaiting--;
pi[mut].sout = mag[pi[mut].idx];
pi[mut].cyc_tot += cycle - cyc_start;
end join_none;
end
wait ( awaiting == 0 );
foreach ( pi[ mut ] ) begin
automatic shortreal mmagr = $bitstoshortreal(pi[mut].sout);
automatic shortreal err_mag = fabs( mmagr - magr );
automatic bit okay = err_mag < 1e-4;
if ( !okay ) begin
pi[mut].err_count++;
if ( pi[mut].err_count < 5 )
$write("%s test #%0d vec (%.1f,%.1f) error: h'%8h %7.4f != %7.4f (correct)\n",
mut, i, vr[1], vr[0],
pi[mut].sout, mmagr, magr);
end
end
while ( {$random} & 1 == 1 ) @( posedge clk_reactive );
end
foreach ( pi[ mut ] )
$write("Ran %4d tests for %-25s, %4d errors found. Avg cyc %.1f\n",
num_tests, mut, pi[mut].err_count,
pi[mut].mt == MT_comb ? 1 : real'(pi[mut].cyc_tot) / num_tests);
done = 1;
$finish(2);
end
endmodule
cadence
`default_nettype uwire
`include "/apps/linux/cadence/GENUS211/share/synth/lib/chipware/sim/verilog/CW/CW_fp_mult.v"
`include "/apps/linux/cadence/GENUS211/share/synth/lib/chipware/sim/verilog/CW/CW_fp_add.v"