111
`default_nettype none
module mult_combinational
#( int w = 16 )
( output logic [2*w-1:0] prod,
input uwire [w-1:0] plier, cand );
always_comb begin
logic [2*w-1:0] accum;
logic [w-1:0] pp;
accum = 0;
for ( int pos=0; pos<w; pos++ ) begin
pp = plier[pos] ? cand : 0;
accum = ( pp << pos ) + accum;
end
prod = accum;
end
endmodule
module mult_combinational_clk
#( int w = 16 )
( output logic [2*w-1:0] prod,
input uwire [w-1:0] cand, plier,
input uwire clk);
uwire [2*w-1:0] p;
logic [w-1:0] candcpy, pliercpy;
mult_combinational #(w) ml(p, candcpy, pliercpy);
always_ff @( posedge clk ) begin
candcpy <= cand;
pliercpy <= plier;
prod <= p;
end
endmodule
module mult_seq #( int w = 16 )
( output logic [2*w-1:0] prod,
output logic ready,
input uwire [w-1:0] cand, plier,
input uwire start, clk);
localparam int wlog = $clog2(w+1);
logic [wlog-1:0] pos;
logic [2*w-1:0] accum;
logic [w-1:0] pp;
always_ff @( posedge clk )
if ( start ) begin
ready <= 0;
accum <= 0;
pos <= 0;
end else if ( pos != w ) begin
pp = plier[pos] ? cand : 0;
accum <= ( pp << pos ) + accum;
pos <= pos + 1;
end else begin
ready <= 1;
prod <= accum;
end
endmodule
module mult_seq_stream #( int w = 16 )
( output logic [2*w-1:0] prod,
output logic ready,
input uwire [w-1:0] cand, plier,
input uwire start, clk);
localparam int wlog = $clog2(w+1);
logic [wlog-1:0] pos;
logic [2*w-1:0] accum;
logic [w-1:0] pp;
always_ff @( posedge clk )
if ( start ) begin
ready <= 0;
accum <= plier;
pos <= 0;
end else if ( pos != w ) begin
pp = accum[0] ? cand : 0;
accum <= ( pp << w-1 ) + ( accum >> 1 );
pos <= pos + 1;
end else begin
ready <= 1;
prod <= accum;
end
endmodule
module mult_pipe1
#( int w = 16, int m = 1 )
( output logic [2*w-1:0] prod,
input uwire [w-1:0] cand, plier,
input uwire clk);
localparam int stages = w;
logic [2*w-1:0] pl_accum[0:stages];
logic [w-1:0] pl_cand[0:stages], pp;
logic [w-1:0] pl_plier[0:stages];
always_ff @( posedge clk ) begin
pl_accum[0] <= 0;
pl_cand[0] <= cand;
pl_plier[0] <= plier;
for ( int stage=0; stage<stages; stage++ ) begin
automatic int pos = stage;
pp = pl_plier[stage][pos] ? pl_cand[stage] : 0;
pl_accum[stage+1] <= ( pp << pos ) + pl_accum[stage];
pl_cand[stage+1] <= pl_cand[stage];
pl_plier[stage+1] <= pl_plier[stage];
end
end
assign prod = pl_accum[stages];
endmodule
module mult_behav_1
#(int w = 16)
(output uwire [2*w-1:0] prod, input uwire [w-1:0] cand, plier);
assign prod = cand * plier;
endmodule
cadence
program reactivate
(output uwire clk_reactive, output int cycle_reactive,
input uwire clk, input var int cycle);
assign clk_reactive = clk;
assign cycle_reactive = cycle;
endprogram
module testbench;
localparam int w = 8;
localparam int num_tests = 1000;
localparam int NUM_MULT = 20;
localparam int err_limit = 7;
bit use_others;
logic start;
logic [w-1:0] plier, cand;
logic [w-1:0] plierp, candp;
logic [2*w-1:0] prod[NUM_MULT];
logic ready[NUM_MULT];
typedef struct
{ int idx; int err_count = 0;
bit seq = 0;
bit pipe = 0; bit wf = 0; bit st_rdy = 0;
int deg = 1;
logic [2*w-1:0] sout = 'h111; int cyc_tot = 0;
int latency = 0;
} Info;
Info pi[string];
localparam int cycle_limit = num_tests * w * 8;
int cycle;
bit done;
logic clock;
logic clk_reactive;
int cycle_reactive;
reactivate ra(clk_reactive,cycle_reactive,clock,cycle);
initial begin
clock = 0;
cycle = 0;
fork
forever #10 cycle += clock++;
wait( done );
wait( cycle >= cycle_limit )
$write("*** Cycle limit exceeded, ending.\n");
join_any;
$finish();
end
initial begin
while ( !done ) @( posedge clk_reactive ) #1
if ( use_others ) begin
start = 1;
plierp = plier;
candp = cand;
use_others = 0;
end else begin
start = 0;
plierp = cycle;
candp = 256;
end
end
task pi_seq(input int idx, input string name, input int deg);
automatic string m = $sformatf("%s Deg %0d", name, deg);
pi[m].deg = deg;
pi[m].idx = idx; pi[m].seq = 1;
endtask
task pi_seqs(input int idx, input string name, input int deg);
automatic string m = $sformatf("%s Deg %0d", name, deg);
pi[m].deg = deg;
pi[m].idx = idx; pi[m].seq = 1; pi[m].st_rdy = 1;
endtask
task pi_seqw(input int idx, input string name, input int deg);
automatic string m = $sformatf("%s Deg %0d", name, deg);
pi[m].deg = deg;
pi[m].idx = idx; pi[m].seq = 1; pi[m].wf = 1;
endtask
task pi_pipe(input int idx, input string name, input int deg);
automatic string m = $sformatf("%s Deg %0d", name, deg);
pi[m].deg = deg;
pi[m].idx = idx; pi[m].seq = 1; pi[m].pipe = 1;
endtask
mult_behav_1 #(w) mb1(prod[0], plier, cand);
initial pi["Behavioral"].idx = 0;
mult_seq #(w) ms4(prod[4], ready[4], plier, cand, start, clock);
initial pi_seqs(4,"Seq SR Simple",1);
mult_seq_stream #(w) ms5(prod[5], ready[5], plier, cand, start, clock);
initial pi_seqs(5,"Seq SR Streamlined",1);
mult_combinational_clk #(w) mlc1(prod[15], plier, cand, clock);
initial begin
automatic string m = $sformatf("Combinational Clock");
pi[m].idx = 15; pi[m].seq = 0;
end
mult_pipe1 #(w) ms18(prod[18], plierp, candp, clock);
initial pi_pipe(18,"Pipelined Simple",1);
int tests[$] = {1,32, 1,1, 1,2, 1,32, 32, 1};
initial begin
done = 0;
use_others = 0;
start = 1;
#0 begin
string index_used[NUM_MULT];
automatic int n_unused = 0, n_reused = 0;
foreach ( pi[ mut ] ) begin
automatic int idx = pi[mut].idx;
if ( index_used[idx].len() )
begin
$write("*** Index %0d used by %s and %s.\n",
idx, index_used[idx], mut );
n_reused++;
end
index_used[idx] = mut;
end
$write("Unused positions: ");
foreach ( index_used[idx] )
if ( index_used[idx].len() == 0 )
$write("%s%0d", n_unused++ ? ", " : "", idx);
$write("%s.\n",n_unused ? "" : "none -- all used");
if ( n_reused )
$fatal(2, "\nFound %0d re-used indices. Aborting simulation.\n\n",
n_reused);
end
@( posedge clk_reactive );
plier = 1;
cand = 1;
use_others = 1;
wait ( cycle_reactive >= 2*w );
@( posedge clk_reactive );
for ( int i=0; i<num_tests; i++ ) begin
automatic int cyc_start = cycle;
automatic int awaiting = pi.num();
plier = tests.size() ? tests.pop_front() : $random();
cand = tests.size() ? tests.pop_front() : $random();
start = 1;
plierp = plier;
candp = cand;
use_others = 1;
foreach ( pi[muti] ) begin
automatic string mut = muti; automatic Info p = pi[mut];
fork begin
automatic int arrival_late = 1;
automatic int steps = ( w + p.deg - 1 ) / p.deg;
automatic int latency
= !p.seq ? 2 :
p.wf ? 5 * steps :
!p.pipe ? 2 * steps : arrival_late+steps;
automatic int eta = 1 + cyc_start + latency;
automatic int idx = p.idx;
pi[mut].latency = latency;
if ( p.st_rdy ) begin
if ( !ready[idx] ) begin
$write("Expected %s to be ready\n",mut);
$finish(2);
end
wait ( !ready[idx] );
wait ( ready[idx] );
end else begin
wait ( cycle_reactive == eta );
end
awaiting--;
pi[mut].sout = prod[idx];
pi[mut].cyc_tot += cycle - cyc_start;
end join_none;
end
wait ( awaiting == 0 );
foreach ( pi[ mut ] )
if ( prod[0] !== pi[mut].sout ) begin
pi[mut].err_count++;
if ( pi[mut].err_count < 5 )
$write
("%-25s wrong result: %0d * %0d: 0x%0h != 0x%0h (correct)\n",
mut, plier, cand, pi[mut].sout, prod[0]);
end
@( posedge clk_reactive );
end
foreach ( pi[ mut ] )
$write("Ran %4d tests for %-25s, %4d errors found. Avg cyc %.1f\n",
num_tests, mut, pi[mut].err_count,
pi[mut].seq ? real'(pi[mut].cyc_tot) / num_tests : 1);
done = 1;
$finish(2);
end
endmodule
cadence