`default_nettype none
module add_accum
#( int w = 20, n_stages = 3 )
( output logic [w-1:0] sum,
output logic sum_valid,
input uwire [w-1:0] ai,
input uwire ai_valid, reset, clk );
`ifdef xxxxx
always_ff @ ( posedge clk )
if ( reset ) sum = 0; else if ( ai_valid ) sum += ai;
always_comb sum_valid = 1;
`endif
logic [n_stages:0] st_occ; logic sum_occ;
uwire aout_valid = st_occ[n_stages-1];
uwire [w-1:0] aout;
uwire [w-1:0] a0 = ai_valid ? ai : sum; uwire [w-1:0] a1 = aout_valid ? aout : sum;
add_pipe #(w,n_stages) add_p0( aout, a0, a1, clk);
uwire [1:0] n_items = ai_valid + aout_valid + sum_occ;
logic sum_zero;
always_ff @( posedge clk ) if ( reset ) begin
st_occ <= 0;
sum <= 0;
sum_valid <= 1;
sum_occ <= 0;
sum_zero <= 1;
end else begin
st_occ[0] <= n_items > 1;
for ( int i=1; i<=n_stages; i++ ) st_occ[i] <= st_occ[i-1];
if ( ai_valid ) sum_zero <= 0;
sum_occ <= n_items[0];
sum_valid <= sum_zero || !st_occ[n_stages-1:0] && n_items == 1;
if ( n_items == 1 ) sum <= aout_valid ? aout : ai_valid ? ai : sum;
end
endmodule
`ifdef YYYYY
module add_accum_25_Nov_2020
#( int w = 20, n_stages = 3 )
( output logic [w-1:0] sum,
output logic sum_valid,
input uwire [w-1:0] ai,
input uwire ai_valid, reset, clk );
`ifdef xxxxx
always_ff @ ( posedge clk )
if ( reset ) sum = 0; else if ( ai_valid ) sum += ai;
always_comb sum_valid = 1;
`endif
logic [n_stages:0] st_occ; logic sum_occ;
uwire aout_valid = st_occ[n_stages-1];
uwire [w-1:0] aout;
uwire [w-1:0] a0 = ai_valid ? ai : sum; uwire [w-1:0] a1 = aout_valid ? aout : sum;
add_pipe #(w,n_stages) add_p0( aout, a0, a1, clk);
uwire [1:0] n_items = ai_valid + aout_valid + sum_occ;
always_ff @( posedge clk ) if ( reset ) begin
st_occ <= 0;
sum <= 0;
sum_valid <= 1;
sum_occ <= 0;
end else begin
st_occ[0] <= n_items > 1;
for ( int i=1; i<=n_stages; i++ ) st_occ[i] <= st_occ[i-1];
sum_occ <= n_items[0];
sum_valid <= n_items == 1;
if ( n_items == 1 ) sum <= aout_valid ? aout : ai_valid ? ai : sum;
end
endmodule
`endif
`ifdef xxxx
module add_accum_20_November_2020
#( int w = 20, n_stages = 3 )
( output logic [w-1:0] sum,
output logic sum_valid,
input uwire [w-1:0] ai,
input uwire ai_valid, reset, clk );
`ifdef xxxxx
always_ff @ ( posedge clk )
if ( reset ) sum = 0; else if ( ai_valid ) sum += ai;
always_comb sum_valid = 1;
`endif
uwire [w-1:0] aout;
uwire [w-1:0] a0 = ai; uwire [w-1:0] a1 = sum;
add_pipe #(w,n_stages) add_p0( aout, a0, a1, clk);
logic [n_stages:0] st_occ;
uwire aout_valid = st_occ[n_stages-1];
assign sum_valid = !st_occ[n_stages-1:0];
uwire [1:0] n_items = ai_valid + aout_valid + sum_valid;
always_ff @( posedge clk ) if ( reset ) begin
st_occ <= 0;
sum <= 0;
sum_valid <= 0;
end else begin
st_occ[0] <= n_items > 1;
for ( int i=1; i<=n_stages; i++ ) st_occ[i] <= st_occ[i-1];
sum <= aout;
if ( aout_valid ) sum <= aout; else if ( !sum_valid ) sum <= ai;
end
endmodule
`endif
module add_pipe
#( int w = 21, n_stages = 3 )
( output uwire [w-1:0] sum,
input uwire [w-1:0] a, b,
input uwire clk );
localparam int bits_per_stage = ( w + n_stages - 1 ) / n_stages;
localparam int wr = n_stages * bits_per_stage;
logic [wr-1:0] pl_a[n_stages+1], pl_b[n_stages+1], pl_sum[n_stages+1];
logic pl_carry[n_stages+1];
always_ff @( posedge clk ) begin
pl_a[0] = a;
pl_b[0] = b;
pl_carry[0] = 0;
for ( int s=0; s<n_stages; s++ ) begin
automatic logic [bits_per_stage:0] sumi =
pl_a[s][bits_per_stage-1:0] +
pl_b[s][bits_per_stage-1:0] + pl_carry[s];
pl_carry[s+1] <= sumi[bits_per_stage];
pl_sum[s+1] <=
{ sumi[bits_per_stage-1:0], pl_sum[s] } >> bits_per_stage;
pl_a[s+1] <= pl_a[s] >> bits_per_stage;
pl_b[s+1] <= pl_b[s] >> bits_per_stage;
end
end
assign sum = pl_sum[ n_stages ][w-1:0];
endmodule
cadence
program reactivate
(output uwire clk_reactive, output int cycle_reactive,
input uwire clk, input var int cycle);
assign clk_reactive = clk;
assign cycle_reactive = cycle;
endprogram
module testbench;
localparam int n_stages[] = { 2, 3, 5, 6 };
localparam int nw = 4; initial if ( nw != n_stages.size() )
$fatal(1,"Constant nw should be %0d.\n",n_stages.size() );
int t_errs; initial t_errs = 0;
final $write("Total number of errors: %0d\n",t_errs);
uwire d[nw:-1]; assign d[-1] = 1;
for ( genvar i=0; i<nw; i++ )
testbench_n #(n_stages[i]) t2( .done(d[i]), .tstart(d[i-1]) );
endmodule
module testbench_n
#( int n_stages = 3 )
( output logic done, input uwire tstart );
localparam int n_tests = 10000;
localparam int w = 30;
localparam int a_in_max = 42;
localparam int cyc_max = 1 << 30;
localparam int lat_limit_empty = n_stages + 2;
localparam int lat_min_empty = n_stages;
localparam int lat_limit_full = 2 + (1+$clog2(n_stages)) * ( n_stages + 1 );
bit clk;
int cycle, cycle_limit;
logic clk_reactive;
int cycle_reactive;
reactivate ra(clk_reactive,cycle_reactive,clk,cycle);
string event_trace;
initial begin
clk = 0;
cycle = 0;
done = 0;
cycle_limit = cyc_max;
wait( tstart );
fork
while ( !done ) #1 cycle += clk++;
wait( cycle >= cycle_limit )
$write("Exit from clock loop at cycle %0d, limit %0d. %s\n %s\n",
cycle, cycle_limit, "** CYCLE LIMIT EXCEEDED **",
event_trace);
join_any;
done = 1;
end
uwire [w-1:0] sum;
uwire sum_valid;
logic [w-1:0] a;
logic a_valid, reset;
add_accum #(w,n_stages) fpa(sum, sum_valid, a, a_valid, reset, clk);
int rsum;
bit tests_start;
int series_idx, value_idx, series_n_vals;
int n_errs, n_underdue_errs, n_overdue_errs, n_tests_done;
int sum_due_cyc_earliest, sum_due_cyc, n_correct;
int last_a_cyc;
int latency_sum, latency_sum_n;
bit error_val_issued, error_late_issued;
initial wait ( done ) begin
automatic int not_done = n_tests - series_idx;
$write("Done with %0d-stage tests, %0d series.\n Correct, %0d; errors : %0d not done, %0d val, %0d/%0d early/late.\n",
n_stages, series_idx,
n_correct, not_done, n_errs, n_underdue_errs, n_overdue_errs );
$write("For %0d stages average latency %.2f cycles.\n",
n_stages,
real'(latency_sum) / ( latency_sum_n ? latency_sum_n : 1 ) );
testbench.t_errs += n_errs + n_underdue_errs + n_overdue_errs + not_done;
end
initial begin
wait( tests_start );
while ( !done ) @( posedge clk_reactive ) begin
if ( sum_valid ) begin
automatic bit pending = sum_due_cyc < cyc_max;
if ( pending ) begin
n_tests_done++;
sum_due_cyc = cyc_max;
if ( sum === rsum ) n_correct++;
latency_sum += cycle - last_a_cyc;
latency_sum_n++;
if ( cycle < sum_due_cyc_earliest ) begin
n_underdue_errs++;
if ( n_underdue_errs < 5 ) begin
$write
("At cyc %0d, value ready too soon, %0d, cyc. (Min cyc %0d.)\n",
cycle, last_a_cyc - cycle, lat_limit_empty
);
if ( event_trace != "" ) $write(" %s\n",event_trace);
end
end
end
if ( !error_val_issued && sum !== rsum ) begin
error_val_issued = 1;
n_errs++;
if ( n_errs < 5 ) begin
$write("At cyc %0d, wrong sum, %0d != %g (correct)\n",
cycle, sum, rsum);
if ( event_trace != "" ) $write(" %s\n",event_trace);
end
end
end else if ( sum_due_cyc <= cycle ) begin
if ( !error_late_issued ) begin
error_late_issued = 1;
n_overdue_errs++;
sum_due_cyc = cyc_max;
if ( n_overdue_errs < 5 ) begin
$write("At cycle %0d, sum overdue.\n",cycle);
if ( event_trace != "" ) $write(" %s\n",event_trace);
end
end
end
end
end
initial begin
automatic int seed = 4755;
automatic int series_sparsity = 0;
rsum = 0;
n_errs = 0;
latency_sum_n = 0;
latency_sum = 0;
error_val_issued = 0;
error_late_issued = 1;
series_idx = 0;
value_idx = 0;
series_n_vals = 0;
n_overdue_errs = 0;
n_underdue_errs = 0;
sum_due_cyc = cyc_max;
sum_due_cyc_earliest = 0;
n_tests_done = 0;
n_correct = 0;
event_trace = "";
wait( tstart );
$write("Starting tests for %0d-stage pipeline.\n",n_stages);
@( negedge clk );
reset = 1;
event_trace = $sformatf("R(%0d)",cycle);
a_valid = 0;
a = 0;
@( negedge clk );
cycle_limit = cycle + n_stages * 2;
tests_start = 1;
reset = 0;
@( negedge clk );
wait( sum_valid );
while ( series_idx < n_tests ) begin
@( negedge clk );
a = $dist_uniform( seed, 1, a_in_max );
if ( value_idx >= series_n_vals ) begin
a_valid = 0;
if ( sum_valid ) begin
series_idx++;
value_idx = 0;
event_trace = $sformatf("R(%0d)",cycle);
reset = 1;
a_valid = 0;
rsum = 0;
series_n_vals = $dist_uniform( seed, 1, 10 );
series_sparsity = series_idx % 6;
sum_due_cyc = cycle + 1;
sum_due_cyc_earliest = cycle;
error_val_issued = 0;
error_late_issued = 0;
cycle_limit = cycle + 1;
end
end else begin
reset = 0;
a_valid = series_sparsity == 0
|| $dist_uniform( seed, 0, series_sparsity ) == 0;
cycle_limit = cycle + lat_limit_full;
end
if ( a_valid ) begin
value_idx++;
event_trace = {event_trace,$sformatf("+%0d(%0d)",a,cycle)};
error_val_issued = 0;
error_late_issued = 0;
rsum += a;
last_a_cyc = cycle;
sum_due_cyc = cycle +
( sum_valid ? lat_limit_empty : lat_limit_full );
sum_due_cyc_earliest =
cycle + ( value_idx > 1 ? lat_min_empty : 0 );
end
end
done = 1;
end
endmodule
cadence