//////////////////////////////////////////////////////////////////////////////// // /// LSU EE 4755 Fall 2018 Homework 7 -- SOLUTION // /// Assignment https://www.ece.lsu.edu/koppel/v/2018/hw07.pdf `default_nettype none ////////////////////////////////////////////////////////////////////////////// /// Problem 1 // /// Complete mult_seq_ds_prob_1 as described in the handout and below. // // [✔] Start multiplying when in_valid is 1 at a positive clock edge .. // [✔] .. even if that means abandoning a multiplication in progress. // [✔] Set out_avail to 1 when prod holds the result for // most recent plier*cand. // // [✔] The module must pass the testbench. // Average cycles should be w/m+1 // [✔] The module must be synthesizable. // [✔] Make sure that synthesized hardware is reasonably fast. // // [✔] Code must be reasonably efficient. // [✔] Do not change module parameters. // [✔] Do not change ports, EXCEPT changing between var and net kinds. // [✔] Don't assume that parameter values will match those used here. // [✔] USE DEBUGGING TOOLS LIKE SimVision. // module mult_seq_ds_prob_1 #( int w = 16, int m = 2 ) ( output logic [2*w-1:0] prod, // SOLUTION: Change kind of out_avail from net (uwire) to var. output var logic out_avail, input uwire clk, in_valid, input uwire [w-1:0] plier, cand ); localparam int iterations = ( w + m - 1 ) / m; localparam int iter_lg = $clog2(iterations); localparam logic [w+m-1:0] zero = 0; // Used to set precision to w+m bits. uwire [iterations-1:0][m-1:0] cand_2d = cand; bit [iter_lg:0] iter; logic [2*w-1:0] accum; always_ff @( posedge clk ) begin /// SOLUTION, Problem 1 // // - Start a new multiplication whenever in_valid is 1. // - When multiplication is finished set out_avail to 1. // if ( in_valid ) begin // If in_valid is 1 start a multiplication. accum = cand; iter = 0; out_avail = 0; end else if ( !out_avail && iter == iterations ) begin // If a multiplication is in progress (!out_avail) .. // .. and we just finished the last iteration of a multiplication .. // .. make the result available. out_avail = 1; prod = accum; end // Add on a partial product. // Do this whether or not a multiplication is in progress. accum = { zero + plier * accum[m-1:0] + accum[2*w-1:w], accum[w-1:m] }; iter++; end endmodule ////////////////////////////////////////////////////////////////////////////// /// Problem 2 // /// Complete mult_seq_d_prob_2 as described in the handout and below. // // [✔] Skip over multiplicand digits that are zero. // [✔] Start multiplying when in_valid is 1 at a positive clock edge .. // [✔] .. even if that means abandoning a multiplication in progress. // [✔] Set out_avail to 1 when prod holds the result for // most recent plier*cand. // // [✔] The module must pass the testbench. // Average cycles should be less than w/m+1 // [✔] The module must be synthesizable. // The period should not be too much longer than the original module. // [✔] Make sure that synthesized hardware is reasonably fast. // // [✔] The module must be synthesizable. // [✔] Code must be reasonably efficient. // [✔] Do not change module parameters. // [✔] Do not change ports, EXCEPT changing between var and net kinds. // [✔] Don't assume that parameter values will match those used here. // [✔] USE DEBUGGING TOOLS LIKE SimVision. module mult_seq_d_prob_2 #( int w = 16, int m = 2 ) ( output logic [2*w-1:0] prod, // SOLUTION: Change kind of out_avail from net (uwire) to var. output logic out_avail, input uwire clk, in_valid, input uwire [w-1:0] plier, cand ); localparam int iterations = ( w + m - 1 ) / m; localparam int iter_lg = $clog2(iterations); uwire [iterations-1:0][m-1:0] cand_2d = cand; bit [iter_lg-1:0] iter; logic [2*w-1:0] accum; always_ff @( posedge clk ) begin logic [iter_lg-1:0] next_iter; /// SOLUTION -- Problem 2 // // Implement handshaking. // Computation is completed when iter is zero. (See below.) // if ( in_valid ) begin iter = 0; accum = 0; out_avail = 0; end else if ( !out_avail && iter == 0 ) begin prod = accum; out_avail = 1; end accum += plier * cand_2d[iter] << ( iter * m ); /// SOLUTION -- Problem 2 // // Set iter to .. // .. index of next non-zero multiplicand digit .. // .. or to zero if multiplication is complete. // // Scan multiplicand digits starting at most significant digit. // Update next_iter whenever .. // i > iter ( meaning that that partial product not yet use ) .. // and digit, cand_2d[i], is non-zero. // next_iter = 0; for ( int i=iterations-1; i>0; i-- ) if ( i>iter && cand_2d[i] ) next_iter = i; iter = next_iter; end endmodule ////////////////////////////////////////////////////////////////////////////// /// Comparison Modules /// /// The modules below are for reference. module mult_seq_ds_prob_1_orig #( int w = 16, int m = 2 ) ( output logic [2*w-1:0] prod, output uwire out_avail, input uwire clk, in_valid, input uwire [w-1:0] plier, cand ); /// DO NOT MODIFY THIS MODULE. // It is to be used for comparison when performing synthesis. localparam int iterations = ( w + m - 1 ) / m; localparam int iter_lg = $clog2(iterations); localparam logic [w+m-1:0] zero = 0; // Used to set precision to w+m bits. uwire [iterations-1:0][m-1:0] cand_2d = cand; bit [iter_lg:0] iter; logic [2*w-1:0] accum; always_ff @( posedge clk ) begin if ( iter == iterations ) begin prod = accum; accum = cand; iter = 0; end // Note: accum[m-1:0] is the same as cand_2d[iter]; accum = { zero + plier * accum[m-1:0] + accum[2*w-1:w], accum[w-1:m] }; iter++; end endmodule module mult_seq_d_prob_2_orig #( int w = 16, int m = 2 ) ( output logic [2*w-1:0] prod, output uwire out_avail, input uwire clk, in_valid, input uwire [w-1:0] plier, cand ); /// DO NOT MODIFY THIS MODULE. // It is to be used for comparison when performing synthesis. localparam int iterations = ( w + m - 1 ) / m; localparam int iter_lg = $clog2(iterations); uwire [iterations-1:0][m-1:0] cand_2d = cand; bit [iter_lg:0] iter; logic [2*w-1:0] accum; always_ff @( posedge clk ) begin if ( iter == iterations ) begin prod = accum; accum = 0; iter = 0; end accum += plier * cand_2d[iter] << ( iter * m ); iter++; end endmodule ////////////////////////////////////////////////////////////////////////////// /// Testbench Code // cadence translate_off program reactivate (output uwire clk_reactive, output int cycle_reactive, input uwire clk, input var int cycle); assign clk_reactive = clk; assign cycle_reactive = cycle; endprogram module testbench; localparam int w = 20; localparam int num_tests = 400; localparam int NUM_MULT = 20; localparam int err_limit = 7; bit use_others; logic [w-1:0] plier, cand; logic [w-1:0] plierp[NUM_MULT], candp[NUM_MULT]; logic [2*w-1:0] prod[NUM_MULT]; uwire availn[NUM_MULT]; logic avail[NUM_MULT]; logic in_valid[NUM_MULT]; typedef struct { int tidx; int cycle_start; } Test_Vector; typedef struct { int idx; int err_count = 0; int err_timing = 0; Test_Vector tests_active[$]; bit all_tests_started = 0; bit seq = 0; bit pipe = 0; bit bpipe = 0; int deg = 1; int ncompleted = 0; int cyc_tot = 0; int latency = 0; } Info; Info pi[string]; localparam int cycle_limit = num_tests * w * 4; int cycle; bit done; logic clock; logic clk_reactive; int cycle_reactive; reactivate ra(clk_reactive,cycle_reactive,clock,cycle); initial begin clock = 0; cycle = 0; fork forever #10 cycle += clock++; wait( done ); wait( cycle >= cycle_limit ) $write("*** Cycle limit exceeded, ending.\n"); join_any; $finish(); end task pi_seq(input int idx, input string name, input int deg); automatic string m = $sformatf("%s Deg %0d", name, deg); pi[m].deg = deg; pi[m].idx = idx; pi[m].seq = 1; pi[m].bpipe = 0; endtask task pi_bseq(input int idx, input string name, input int deg); automatic string m = $sformatf("%s Deg %0d", name, deg); pi[m].deg = deg; pi[m].idx = idx; pi[m].seq = 1; pi[m].bpipe = 1; endtask task pi_pipe(input int idx, input string name, input int deg); automatic string m = $sformatf("%s Deg %0d", name, deg); pi[m].deg = deg; pi[m].idx = idx; pi[m].seq = 1; pi[m].pipe = 1; pi[m].bpipe = 0; endtask task pi_bpipe(input int idx, input string name, input int deg); automatic string m = $sformatf("%s Deg %0d", name, deg); pi[m].deg = deg; pi[m].idx = idx; pi[m].seq = 1; pi[m].pipe = 1; pi[m].bpipe = 1; endtask mult_seq_ds_prob_1 #(w,1) prob1_m1(prod[6], availn[6], clock, in_valid[6], plierp[6], candp[6]); initial pi_bseq(6,"Prob 1",prob1_m1.m); mult_seq_ds_prob_1 #(w,2) prob1_m2(prod[7], availn[7], clock, in_valid[7], plierp[7], candp[7]); initial pi_bseq(7,"Prob 1",prob1_m2.m); mult_seq_ds_prob_1 #(w,4) prob1_m4(prod[9], availn[9], clock, in_valid[9], plierp[9], candp[9]); initial pi_bseq(9,"Prob 1",prob1_m4.m); mult_seq_ds_prob_1_orig #(w,1) ms14(prod[14], availn[14], clock, in_valid[14], plierp[14], candp[14]); initial pi_seq(14,"Seq",ms14.m); mult_seq_ds_prob_1_orig #(w,2) ms4(prod[4], availn[4], clock, in_valid[4], plierp[4], candp[4]); initial pi_seq(4,"Seq",ms4.m); mult_seq_ds_prob_1_orig #(w,4) ms5(prod[5], availn[5], clock, in_valid[5], plierp[5], candp[5]); initial pi_seq(5,"Seq",ms5.m); mult_seq_d_prob_2 #(w,1) prob2_m1(prod[17], availn[17], clock, in_valid[17], plierp[17], candp[17]); initial pi_bseq(17,"Prob 2",prob2_m1.m); mult_seq_d_prob_2 #(w,2) prob2_m2(prod[16], availn[16], clock, in_valid[16], plierp[16], candp[16]); initial pi_bseq(16,"Prob 2",prob2_m2.m); mult_seq_d_prob_2 #(w,4) prob2_m4(prod[15], availn[15], clock, in_valid[15], plierp[15], candp[15]); initial pi_bseq(15,"Prob 2",prob2_m4.m); always @* begin foreach ( availn[i] ) begin if ( availn[i] !== 1'bz ) avail[i] = availn[i]; end end // Array of multiplier/multiplicand values to try out. // After these values are used a random number generator will be used. // int tests[$] = {1,1, 1,2, 1,3, 1,4, 1,5, 1,32, 32, 1}; initial begin automatic int awaiting = pi.size(); logic [w-1:0] pliers[num_tests], cands[num_tests]; done = 0; foreach ( pi[mut] ) begin automatic int midx = pi[mut].idx; automatic int steps = ( w + pi[mut].deg - 1 ) / pi[mut].deg; automatic int latency = !pi[mut].seq ? 1 : !pi[mut].pipe ? 2 * steps : steps; pi[mut].latency = latency; if ( pi[mut].bpipe == 0 ) begin avail[midx] = 1; end in_valid[midx] = 0; end for ( int i=0; i<num_tests; i++ ) begin automatic int num_bits_c = {$random()}%w + 1; automatic logic [w-1:0] mask_c = ( (w+1)'(1) << num_bits_c ) - 1; automatic int num_bits_p = {$random()}%w + 1; automatic logic [w-1:0] mask_p = ( (w+1)'(1) << num_bits_p ) - 1; pliers[i] = tests.size() ? tests.pop_front() : {$random()}&mask_p; cands[i] = tests.size() ? tests.pop_front() : {$random()}&mask_c; end fork begin forever @( negedge clk_reactive ) begin foreach ( pi[mut] ) begin automatic int midx = pi[mut].idx; if ( !in_valid[midx] && pi[mut].pipe ) begin plierp[midx] = cycle; candp[midx] = 1; end end end end join_none; repeat ( 2 * w ) @( negedge clock ); foreach ( pi[mutii] ) begin automatic string muti = mutii; fork begin automatic string mut = muti; automatic int midx = pi[mut].idx; for ( int i=0; i<num_tests; i++ ) begin automatic int gap_cyc = !pi[mut].pipe ? w * 2 : ( {$random} % 2 ) ? {$random} % ( w + 2 ) : 0; automatic Test_Vector tv; repeat ( gap_cyc ) @( negedge clock ); plierp[midx] = pliers[i]; candp[midx] = cands[i]; in_valid[midx] = 1; tv.tidx = i; tv.cycle_start = cycle; pi[mut].tests_active.push_back( tv ); @( negedge clock ); in_valid[midx] = 0; end pi[mut].all_tests_started = 1; end join_none; fork begin automatic string mut = muti; automatic int midx = pi[mut].idx; while ( 1 ) begin @( negedge clock ); while ( pi[mut].tests_active.size() == 0 && !pi[mut].all_tests_started ) @( negedge clock ); if ( pi[mut].tests_active.size() == 0 ) break; begin automatic Test_Vector tv = pi[mut].tests_active.pop_front(); automatic int i = tv.tidx; automatic logic [2*w-1:0] shadow_prod = pliers[i] * cands[i]; automatic int eta = tv.cycle_start + pi[mut].latency; automatic bit timing_err = 0; automatic int delta_t; if ( pi[mut].bpipe ) begin if ( !pi[mut].pipe && cycle == tv.cycle_start ) @( negedge clock ); while ( !avail[midx] && cycle < eta ) @( negedge clock ); if ( !avail[midx] || cycle > eta ) begin timing_err = 1; if ( pi[mut].err_timing++ < err_limit ) $write("At cyc %4d (eta %0d) avail not set for %s (idx %0d) after %0d cycles for 0x%0h*0x%0h.\n", cycle, eta, mut, midx, cycle - tv.cycle_start, pliers[i], cands[i]); end end else begin wait ( cycle >= eta ); end delta_t = cycle - tv.cycle_start; if ( !timing_err ) begin pi[mut].ncompleted++; pi[mut].cyc_tot += delta_t; end if ( !timing_err && shadow_prod !== prod[midx] ) begin pi[mut].err_count++; if ( pi[mut].err_count < err_limit ) begin $write ("%-15s test %5d cyc %0d+%0d (%0d) wrong: 0x%0h * 0x%0h: 0x%0h != 0x%0h (correct)\n", mut, i, tv.cycle_start, delta_t, pi[mut].latency, pliers[i], cands[i], prod[midx], shadow_prod); end end end end awaiting--; end join_none; end wait( awaiting == 0 || cycle > cycle_limit ); $write("At cycle %0d. Error types: couldn't test / wrong result / timing\n",cycle); foreach ( pi[ mut ] ) $write("For %-18s ran %4d tests, %4d/%4d/%4d errors found. Avg cyc %.1f\n", mut, num_tests, num_tests - pi[mut].ncompleted, pi[mut].err_count, pi[mut].err_timing, pi[mut].seq ? real'(pi[mut].cyc_tot) / pi[mut].ncompleted : 1); done = 1; $write("Modules instantiated with w = %0d.\n",w); $finish(2); end endmodule // cadence translate_on