`default_nettype none
cadence
module prob1_functional
( output shortreal mag,
input shortreal v0, v1 );
always_comb mag = v0 * v0 + v0 * v1 + v1 * v1;
endmodule
cadence
module prob1_seq
( output uwire [31:0] result,
output logic ready,
input uwire [31:0] v0, v1,
input uwire start, clk);
uwire [7:0] mul_s, add_s;
uwire [31:0] prod;
logic [31:0] ac0, ac1;
logic [2:0] step;
localparam int last_step = 4;
always_ff @( posedge clk )
if ( start ) step <= 0; else if ( step < last_step ) step <= step + 1;
logic [31:0] mul_a, mul_b;
always_comb begin
case ( step )
0: begin mul_a = v0; mul_b = v0; end
1: begin mul_a = v0; mul_b = v1; end
2: begin mul_a = v1; mul_b = v1; end
default begin mul_a = v0; mul_b = v1; end
endcase
end
always_ff @( posedge clk ) begin
ac0 <= prod;
ac1 <= step == 0 ? 32'h0 : step < 3 ? result : ac1;
ready <= start ? 0 : step == 3 ? 1 : ready;
end
CW_fp_mult m1( .a(mul_a), .b(mul_b), .rnd(3'd0), .z(prod), .status(mul_s));
CW_fp_add a1( .a(ac0), .b(ac1), .rnd(3'd0), .z(result), .status(add_s));
endmodule
module prob1_seq_sol
( output logic [31:0] result,
output logic ready,
input uwire [31:0] v0, v1,
input uwire start,
input uwire clk );
localparam logic [2:0] rnd = 0;
uwire [7:0] mul_s, add_s;
logic [2:0] step;
uwire [31:0] mul_a, mul_b;
uwire [31:0] add_a, add_b;
uwire [31:0] prod, sum;
logic [31:0] ac0, ac1;
localparam int last_step = 4;
always_ff @( posedge clk )
if ( start ) step <= 0;
else if ( step < last_step ) step <= step + 1;
CW_fp_mult m1( .a(mul_a), .b(mul_b), .rnd(rnd), .z(prod), .status(mul_s));
CW_fp_add a1( .a(add_a), .b(add_b), .rnd(rnd), .z(sum), .status(add_s));
assign mul_a = step < 2 ? v0 : v1;
assign mul_b = step == 0 ? v0 : v1;
assign add_a = ac0, add_b = ac1;
always_ff @( posedge clk )
begin
ac0 <= prod;
case ( step )
0: ac1 <= 0;
1: ac1 <= sum;
2: ac1 <= sum;
endcase
if ( start ) ready <= 0; else if ( step == last_step-1 ) ready <= 1;
end
assign result = sum;
endmodule
cadence
function automatic real rand_real(real minv, real maxv);
rand_real = minv + ( maxv - minv ) * ( real'({$random}) ) / 2.0**32;
endfunction
function automatic shortreal fabs(shortreal val);
fabs = val < 0 ? -val : val;
endfunction
program reactivate
(output uwire clk_reactive, output int cycle_reactive,
input uwire clk, input var int cycle);
assign clk_reactive = clk;
assign cycle_reactive = cycle;
endprogram
module testbench();
typedef enum { MT_comb, MT_seq, MT_pipe } Module_Type;
localparam wid = 32;
localparam max_latency = 10;
localparam int num_tests = 16;
localparam int nmuts = 10;
int err[nmuts];
uwire [31:0] mag[nmuts];
uwire ready[nmuts];
shortreal magr;
shortreal vr[2];
logic [31:0] v[2];
logic [31:0] vp[2];
logic start;
typedef struct
{
int idx;
int err_count = 0;
int ncyc = 0;
Module_Type mt = MT_comb;
logic [wid-1:0] sout = 'h111;
int cyc_tot = 0;
int latency = 0;
} Info;
Info pi[string];
localparam int cycle_limit = num_tests * max_latency * 4;
int cycle, cyc_start;
bit done;
logic clock;
bit use_others;
logic clk_reactive;
int cycle_reactive;
reactivate ra(clk_reactive,cycle_reactive,clock,cycle);
task pi_seq(input int idx, input string name);
automatic string m = $sformatf("%s", name);
pi[m].idx = idx; pi[m].mt = MT_seq;
endtask
task pi_pipe(input int idx, input string name, input int ncyc);
automatic string m = $sformatf("%s", name);
pi[m].idx = idx; pi[m].mt = MT_pipe;
pi[m].ncyc = ncyc;
endtask
initial begin
clock = 0;
cycle = 0;
fork
forever #10 begin
cycle += clock++;
end
wait( done );
wait( cycle >= cycle_limit )
$write("*** Cycle limit exceeded, ending.\n");
join_any;
$finish();
end
prob1_functional mf( magr, vr[0], vr[1] );
prob1_seq_sol m2( mag[1], ready[1], v[0],v[1], start, clock );
initial begin pi_seq(1,"Seq. Sol"); end
prob1_seq m2r( mag[2], ready[2], v[0],v[1], start, clock );
initial begin pi_seq(2,"Seq."); end
initial begin
while ( !done ) @( posedge clk_reactive ) #2
if ( use_others ) begin
vp = v;
use_others = 0;
start = 1;
end else begin
vp[0] = $shortrealtobits(shortreal'(cycle-cyc_start));
vp[1] = cycle - cyc_start;
start = 0;
end
end
initial begin
automatic int tot_errors = 0;
done = 0;
use_others = 0;
start = 0;
@( posedge clk_reactive );
for ( int i=0; i<num_tests; i++ ) begin
automatic int awaiting = pi.num();
cyc_start = cycle;
if ( i < 4 ) begin
for ( int j=0; j<2; j++ ) vr[j] = i & 1 << j ? 1.0 : 0.0;
end else begin
for ( int j=0; j<2; j++ ) vr[j] = rand_real(-10,+10);
end
for ( int j=0; j<2; j++ ) v[j] = $shortrealtobits(vr[j]);
vp = v;
use_others = 1;
foreach ( pi[muti] ) begin
automatic string mut = muti; automatic Info p = pi[mut];
fork begin
automatic int steps = pi[mut].ncyc;
automatic int latency =
pi[mut].mt == MT_comb ? 1 :
pi[mut].mt == MT_seq ? 2 : steps;
automatic int eta = 1 + cyc_start + latency;
pi[mut].latency = latency;
wait ( cycle_reactive == eta );
if ( pi[mut].mt == MT_seq ) wait( ready[pi[mut].idx] );
awaiting--;
pi[mut].sout = mag[pi[mut].idx];
pi[mut].cyc_tot += cycle - cyc_start;
end join_none;
end
wait ( awaiting == 0 );
foreach ( pi[ mut ] ) begin
automatic shortreal mmagr = $bitstoshortreal(pi[mut].sout);
automatic shortreal err_mag = fabs( mmagr - magr );
automatic bit okay = err_mag < 1e-4;
if ( !okay ) begin
pi[mut].err_count++;
if ( pi[mut].err_count < 5 )
$write("%s test #%0d vec (%.1f,%.1f) error: h'%8h %7.4f != %7.4f (correct)\n",
mut, i, vr[1], vr[0],
pi[mut].sout, mmagr, magr);
end
end
while ( {$random} & 1 == 1 ) @( posedge clk_reactive );
end
foreach ( pi[ mut ] )
$write("Ran %4d tests for %-25s, %4d errors found. Avg cyc %.1f\n",
num_tests, mut, pi[mut].err_count,
pi[mut].mt == MT_comb ? 1 : real'(pi[mut].cyc_tot) / num_tests);
done = 1;
$finish(2);
end
endmodule
cadence
`default_nettype wire
`include "/apps/linux/cadence/GENUS211/share/synth/lib/chipware/sim/verilog/CW/CW_fp_mult.v"
`include "/apps/linux/cadence/GENUS211/share/synth/lib/chipware/sim/verilog/CW/CW_fp_add.v"