//////////////////////////////////////////////////////////////////////////////// /// LSU EE 4755 // /// Add-Accumulate Sample Problem // // Based on 2019 Homework 6 // https://www.ece.lsu.edu/koppel/v/2019/hw06.pdf // /// Related Problems // // Fall 2022 Final Exam Problem 3 - Show timing for add_accum hardware. `default_nettype none ////////////////////////////////////////////////////////////////////////////// /// Problem 1 // /// Complete add_accum so that it accumulates a sum. // // [ ] Put your solution in add_accum. No other modules should // be modified. (Except the testbench, to help debug.) // // [ ] add_accum must use an add_pipe module to compute the sum. // // [ ] Make sure that the testbench does not report errors. // [ ] Module must be synthesizable. Use command: genus -files syn.tcl // // [ ] As always, avoid costly, slow, and confusing code. // [ ] As always, don't assume parameters will be at their default values. module add_accum #( int w = 20, n_stages = 3 ) ( output logic [w-1:0] sum, output logic sum_valid, input uwire [w-1:0] ai, input uwire ai_valid, reset, clk ); always_ff @ ( posedge clk ) if ( reset ) sum = 0; else if ( ai_valid ) sum += ai; always_comb sum_valid = 1; /// The code above must be removed and the pipelined adder used instead. uwire [w-1:0] aout; uwire [w-1:0] a0 = ai; // May need other connections. uwire [w-1:0] a1; add_pipe #(w,n_stages) add_p0(aout,a0,a1,clk); logic [n_stages:0] st_occ; // Indicate which stage of add_p0 is occupied. uwire aout_valid = st_occ[n_stages-1]; always_ff @( posedge clk ) if ( reset ) begin st_occ <= 0; end else begin // Keep track of which stage of add_p0 is occupied. // st_occ[0] <= ai_valid; // Lets initially assume all values enter pipe. // // Advance other occupied signals. // for ( int i=1; i<=n_stages; i++ ) st_occ[i] <= st_occ[i-1]; // // Simpler way: st_occ <= { st_occ, ai_valid }; end endmodule /// Pipelined Adder // // Do not modify this. module add_pipe #( int w = 21, n_stages = 3 ) ( output uwire [w-1:0] sum, input uwire [w-1:0] a, b, input uwire clk ); localparam int bits_per_stage = ( w + n_stages - 1 ) / n_stages; localparam int wr = n_stages * bits_per_stage; // w rounded. logic [wr-1:0] pl_a[n_stages+1], pl_b[n_stages+1], pl_sum[n_stages+1]; logic pl_carry[n_stages+1]; always_ff @( posedge clk ) begin pl_a[0] = a; pl_b[0] = b; pl_carry[0] = 0; for ( int s=0; s> bits_per_stage; pl_a[s+1] <= pl_a[s] >> bits_per_stage; pl_b[s+1] <= pl_b[s] >> bits_per_stage; end end assign sum = pl_sum[ n_stages ][w-1:0]; endmodule ////////////////////////////////////////////////////////////////////////////// /// Testbench // // May be modified to facilitate debugging. // cadence translate_off program reactivate (output uwire clk_reactive, output int cycle_reactive, input uwire clk, input var int cycle); assign clk_reactive = clk; assign cycle_reactive = cycle; endprogram module testbench; localparam int n_stages[] = { 2, 3, 5, 6 }; localparam int nw = 4; // Cadence, please fix this. initial if ( nw != n_stages.size() ) $fatal(1,"Constant nw should be %0d.\n",n_stages.size() ); int t_errs; // Total number of errors. initial t_errs = 0; final $write("Total number of errors: %0d\n",t_errs); uwire d[nw:-1]; // Start / Done signals. assign d[-1] = 1; // Initialize first at true. // Instantiate a testbench at each size. // for ( genvar i=0; i= cycle_limit ) $write("Exit from clock loop at cycle %0d, limit %0d. %s\n %s\n", cycle, cycle_limit, "** CYCLE LIMIT EXCEEDED **", event_trace); join_any; done = 1; end uwire [w-1:0] sum; uwire sum_valid; logic [w-1:0] a; logic a_valid, reset; add_accum #(w,n_stages) fpa(sum, sum_valid, a, a_valid, reset, clk); int rsum; bit tests_start; int series_idx, value_idx, series_n_vals; int n_errs, n_underdue_errs, n_overdue_errs, n_tests_done; int sum_due_cyc_earliest, sum_due_cyc, n_correct; int last_a_cyc; int latency_sum, latency_sum_n; bit error_val_issued, error_late_issued; initial wait ( done ) begin automatic int not_done = n_tests - series_idx; $write("Done with %0d-stage tests, %0d series.\n Correct, %0d; errors : %0d not done, %0d val, %0d/%0d early/late.\n", n_stages, series_idx, n_correct, not_done, n_errs, n_underdue_errs, n_overdue_errs ); $write("For %0d stages average latency %.2f cycles.\n", n_stages, real'(latency_sum) / ( latency_sum_n ? latency_sum_n : 1 ) ); testbench.t_errs += n_errs + n_underdue_errs + n_overdue_errs + not_done; end initial begin wait( tests_start ); while ( !done ) @( posedge clk_reactive ) begin if ( sum_valid ) begin automatic bit pending = sum_due_cyc < cyc_max; if ( pending ) begin n_tests_done++; sum_due_cyc = cyc_max; if ( sum === rsum ) n_correct++; latency_sum += cycle - last_a_cyc; latency_sum_n++; if ( cycle < sum_due_cyc_earliest ) begin n_underdue_errs++; if ( n_underdue_errs < 5 ) begin $write ("At cyc %0d, value ready too soon, %0d, cyc. (Min cyc %0d.)\n", cycle, last_a_cyc - cycle, lat_limit_empty ); if ( event_trace != "" ) $write(" %s\n",event_trace); end end end if ( !error_val_issued && sum !== rsum ) begin error_val_issued = 1; n_errs++; if ( n_errs < 5 ) begin $write("At cyc %0d, wrong sum, %0d != %g (correct)\n", cycle, sum, rsum); if ( event_trace != "" ) $write(" %s\n",event_trace); end end end else if ( sum_due_cyc <= cycle ) begin if ( !error_late_issued ) begin error_late_issued = 1; n_overdue_errs++; sum_due_cyc = cyc_max; if ( n_overdue_errs < 5 ) begin $write("At cycle %0d, sum overdue.\n",cycle); if ( event_trace != "" ) $write(" %s\n",event_trace); end end end end end initial begin automatic int seed = 4755; automatic int series_sparsity = 0; rsum = 0; n_errs = 0; latency_sum_n = 0; latency_sum = 0; error_val_issued = 0; error_late_issued = 1; series_idx = 0; value_idx = 0; series_n_vals = 0; n_overdue_errs = 0; n_underdue_errs = 0; sum_due_cyc = cyc_max; sum_due_cyc_earliest = 0; n_tests_done = 0; n_correct = 0; event_trace = ""; wait( tstart ); $write("Starting tests for %0d-stage pipeline.\n",n_stages); @( negedge clk ); reset = 1; event_trace = $sformatf("R(%0d)",cycle); a_valid = 0; a = 0; @( negedge clk ); cycle_limit = cycle + n_stages * 2; tests_start = 1; reset = 0; @( negedge clk ); wait( sum_valid ); while ( series_idx < n_tests ) begin @( negedge clk ); a = $dist_uniform( seed, 1, a_in_max ); if ( value_idx >= series_n_vals ) begin a_valid = 0; if ( sum_valid ) begin series_idx++; value_idx = 0; event_trace = $sformatf("R(%0d)",cycle); reset = 1; a_valid = 0; rsum = 0; series_n_vals = $dist_uniform( seed, 1, 10 ); series_sparsity = series_idx % 6; sum_due_cyc = cycle + 1; sum_due_cyc_earliest = cycle; error_val_issued = 0; error_late_issued = 0; cycle_limit = cycle + 1; end end else begin reset = 0; a_valid = series_sparsity == 0 || $dist_uniform( seed, 0, series_sparsity ) == 0; cycle_limit = cycle + lat_limit_full; end if ( a_valid ) begin value_idx++; event_trace = {event_trace,$sformatf("+%0d(%0d)",a,cycle)}; error_val_issued = 0; error_late_issued = 0; rsum += a; last_a_cyc = cycle; sum_due_cyc = cycle + ( sum_valid ? lat_limit_empty : lat_limit_full ); sum_due_cyc_earliest = cycle + ( value_idx > 1 ? lat_min_empty : 0 ); end end done = 1; end endmodule // cadence translate_on