`default_nettype none
cadence
module mag_functional
( output shortreal mag,
input shortreal v [3] );
always_comb begin
shortreal sos;
sos = 0;
for ( int i=0; i<3; i++ ) sos += v[i] * v[i];
mag = sos;
end
endmodule
cadence
module mag_comb
( output uwire [31:0] mag,
input uwire [31:0] v [3] );
uwire [31:0] vsq[3];
uwire [7:0] status[5];
uwire [31:0] sum01;
localparam logic [2:0] rnd = 0;
for ( genvar i=0; i<3; i++ )
CW_fp_mult m1( v[i], v[i], rnd, status[i], vsq[i]);
CW_fp_add a1( vsq[0], vsq[1], rnd, sum01, status[3] );
CW_fp_add a2( sum01, vsq[2], rnd, mag, status[4] );
endmodule
cadenceclass Debug;
int cycle;
int test_cyc; int test_num;
shortreal vr[3];
logic [31:0] v[3];
shortreal magr; logic [31:0] mag;
endclass
cadence
module mag_seq
( output uwire [31:0] mag,
output uwire ready,
input uwire [31:0] v [3],
input uwire start,
input uwire clk );
cadence Debug db;
cadence
localparam logic [2:0] rnd = 0;
uwire [7:0] sm, sa;
logic [31:0] accum[2];
uwire [31:0] prod, sum;
logic [2:0] step;
uwire [31:0] ma = v[ step ];
CW_fp_mult m1( .a(ma), .b(ma), .rnd(rnd), .z(prod), .status(sm));
CW_fp_add a1( .a(accum[0]), .b(accum[1]), .rnd(rnd), .z(sum), .status(sa));
localparam int last_step = 4;
assign ready = step == last_step;
always_ff @( posedge clk )
if ( start ) step <= 0;
else if ( step < last_step ) step <= step + 1;
always_ff @( posedge clk )
begin
case ( step )
0: accum[0] <= prod;
1: accum[1] <= prod;
2: begin
accum[0] <= prod; accum[1] <= sum; end
3: accum[1] <= sum;
endcase
end
assign mag = accum[1];
endmodule
module mag_pipe
( output uwire [31:0] mag,
input uwire [31:0] v [3],
input uwire clk );
cadence Debug db;
cadence
localparam int nstages = 4;
localparam logic [2:0] rnd = 0;
logic [31:0] pl_vsq[1:2][3];
logic [31:0] pl_sos[2:3];
uwire [31:0] vsq[3], sum01, sum012;
uwire [7:0] s[5];
logic [31:0] pl_v[3];
for ( genvar i=0; i<3; i++ )
CW_fp_mult m1(.a(pl_v[i]), .b(pl_v[i]),
.rnd(rnd), .z(vsq[i]), .status(s[i]));
CW_fp_add a1( pl_vsq[1][0], pl_vsq[1][1], rnd, sum01, s[3] );
CW_fp_add a2( pl_sos[2], pl_vsq[2][2], rnd, sum012, s[4] );
always_ff @( posedge clk ) begin
pl_v <= v;
pl_vsq[1] <= vsq;
pl_vsq[2][2] <= pl_vsq[1][2];
pl_sos[2] <= sum01;
pl_sos[3] <= sum012;
end
assign mag = pl_sos[3];
endmodule
cadence
function automatic real rand_real(real minv, real maxv);
rand_real = minv + ( maxv - minv ) * ( real'({$random}) ) / 2.0**32;
endfunction
function automatic shortreal fabs(shortreal val);
fabs = val < 0 ? -val : val;
endfunction
program reactivate
(output uwire clk_reactive, output int cycle_reactive,
input uwire clk, input var int cycle);
assign clk_reactive = clk;
assign cycle_reactive = cycle;
endprogram
module testbench();
typedef enum { MT_comb, MT_seq, MT_pipe } Module_Type;
localparam int wid = 32;
localparam int max_latency = 10;
localparam int num_tests = 16;
localparam int nmuts = 10;
int err[nmuts];
uwire [31:0] mag[nmuts];
uwire ready[nmuts];
shortreal magr;
shortreal vr[3];
logic [31:0] v[3];
logic [31:0] vp[3];
logic start;
typedef struct
{
int idx;
int err_count = 0;
int ncyc = 0;
Module_Type mt = MT_comb;
logic [wid-1:0] sout = 'h111;
int cyc_tot = 0;
int latency = 0;
} Info;
Info pi[string];
localparam int cycle_limit = num_tests * max_latency * 4;
int cycle, cyc_start;
bit done;
logic clock;
bit use_others;
logic clk_reactive;
int cycle_reactive;
reactivate ra(clk_reactive,cycle_reactive,clock,cycle);
task pi_seq(input int idx, input string name);
automatic string m = $sformatf("%s", name);
pi[m].idx = idx; pi[m].mt = MT_seq;
endtask
task pi_pipe(input int idx, input string name, input int ncyc);
automatic string m = $sformatf("%s", name);
pi[m].idx = idx; pi[m].mt = MT_pipe;
pi[m].ncyc = ncyc;
endtask
Debug db;
initial db = new;
initial begin
clock = 0;
cycle = 0;
fork
forever #10 begin
cycle += clock++;
db.cycle = cycle;
db.test_cyc = cycle - cyc_start;
end
wait( done );
wait( cycle >= cycle_limit )
$write("*** Cycle limit exceeded, ending.\n");
join_any;
$finish();
end
mag_functional mf( magr, vr );
mag_comb m1( mag[0], v );
initial pi["Comb."].idx = 0;
mag_seq m2( mag[1], ready[1], v, start, clock );
initial begin pi_seq(1,"Seq."); m2.db = db; end
mag_pipe m4( mag[3], vp, clock );
initial begin pi_pipe(3,"Pipe",m4.nstages); m4.db = db; end
initial begin
while ( !done ) @( posedge clk_reactive ) #2
if ( use_others ) begin
vp = v;
use_others = 0;
start = 1;
end else begin
vp[0] = $shortrealtobits(shortreal'(cycle-cyc_start));
vp[1] = cycle - cyc_start;
vp[2] = 0;
start = 0;
end
end
initial begin
automatic int tot_errors = 0;
done = 0;
use_others = 0;
start = 0;
@( posedge clk_reactive );
for ( int i=0; i<num_tests; i++ ) begin
automatic int awaiting = pi.num();
db.test_num = i;
cyc_start = cycle;
db.test_cyc = 0;
if ( i < 8 ) begin
for ( int j=0; j<3; j++ ) vr[j] = i & 1 << j ? 1.0 : 0.0;
end else begin
for ( int j=0; j<3; j++ ) vr[j] = rand_real(-10,+10);
end
for ( int j=0; j<3; j++ ) v[j] = $shortrealtobits(vr[j]);
db.vr = vr;
db.v = v;
fork
#0 begin
db.magr = magr;
db.mag = $shortrealtobits(magr);
end
join_none
vp = v;
use_others = 1;
foreach ( pi[muti] ) begin
automatic string mut = muti; automatic Info p = pi[mut];
fork begin
automatic int steps = pi[mut].ncyc;
automatic int latency =
pi[mut].mt == MT_comb ? 1 :
pi[mut].mt == MT_seq ? 2 : steps;
automatic int eta = 1 + cyc_start + latency;
pi[mut].latency = latency;
wait ( cycle_reactive == eta );
if ( pi[mut].mt == MT_seq ) wait( ready[pi[mut].idx] );
awaiting--;
pi[mut].sout = mag[pi[mut].idx];
pi[mut].cyc_tot += cycle - cyc_start;
end join_none;
end
wait ( awaiting == 0 );
foreach ( pi[ mut ] ) begin
automatic shortreal mmagr = $bitstoshortreal(pi[mut].sout);
automatic shortreal err_mag = fabs( mmagr - magr );
automatic bit okay = err_mag < 1e-4;
if ( !okay ) begin
pi[mut].err_count++;
if ( pi[mut].err_count < 5 )
$write("%s test #%0d vec (%.1f,%.1f,%.1f) error: h'%8h %7.4f != %7.4f (correct)\n",
mut, i, vr[2], vr[1], vr[0],
pi[mut].sout, mmagr, magr);
end
end
while ( {$random} & 1 == 1 ) @( posedge clk_reactive );
end
foreach ( pi[ mut ] )
$write("Ran %4d tests for %-25s, %4d errors found. Avg cyc %.1f\n",
num_tests, mut, pi[mut].err_count,
pi[mut].mt == MT_comb ? 1 : real'(pi[mut].cyc_tot) / num_tests);
done = 1;
$finish(2);
end
endmodule
cadence
`default_nettype uwire
`include "/apps/linux/cadence/RC142/share/synth/lib/chipware/sim/verilog/CW/CW_fp_mult.v"
`include "/apps/linux/cadence/RC142/share/synth/lib/chipware/sim/verilog/CW/CW_fp_add.v"