//////////////////////////////////////////////////////////////////////////////// // /// LSU EE 4755 Fall 2017 Homework 7 // /// Assignment http://www.ece.lsu.edu/koppel/v/2017/hw07.pdf /// Instructions: // // (1) Find the undergraduate workstation laboratory, room 126 EE // Building. // // (2) Locate your account. If you did not get an account please // E-mail: koppel@ece.lsu.edu // // (3) Log in to a Linux workstation. // // (4) If you haven't already, follow the account setup instructions here: // http://www.ece.lsu.edu/koppel/v/proc.html // // (5) Copy this assignment, local path name // /home/faculty/koppel/pub/ee4755/hw/2017/hw07 // to a directory ~/hw07 in your class account. (~ is your home // directory.) Use this file for your solution. /// BE SURE THAT YOUR FILE IS CORRECTLY NAMED AND IN THE RIGHT PLACE. // // (6) Find the problems in this file and solve them. // // Your entire solution should be in this file. // // Do not change module names. // // (7) Your solution will automatically be copied from your account by // the TA-bot. /// Additional Resources // // Verilog Documentation // The Verilog Standard // http://standards.ieee.org/getieee/1800/download/1800-2012.pdf // Introductory Treatment (Warning: Does not include SystemVerilog) // Brown & Vranesic, Fundamentals of Digital Logic with Verilog, 3rd Ed. // // Account Setup and Emacs (Text Editor) Instructions // http://www.ece.lsu.edu/koppel/v/proc.html // To learn Emacs look for Emacs tutorial. ////////////////////////////////////////////////////////////////////////////// /// Problems 1 and 2 // /// Complete so that mult_fast sets out_avail as described in the handout. // // [ ] Module must be synthesizable. // [ ] Code must be reasonably efficient. // [ ] Do not change module parameters. // [ ] Do not change ports, EXCEPT changing between var and net kinds. // [ ] The module must by synthesizable. // [ ] Don't assume that parameter values will match those used here. // [ ] USE DEBUGGING TOOLS LIKE SimVision. // [ ] Make sure that Avg cyc shown in testbench for Fast is lower // than Pipelined module of same degree (when all tests pass). module mult_fast #( int w = 16, int m = 4 ) ( output uwire [2*w-1:0] prod, output uwire out_avail, input uwire clk, in_valid, input uwire [w-1:0] plier, cand ); localparam int nstages = ( w + m - 1 ) / m; logic [2*w-1:0] pl_accum[0:nstages]; logic [w-1:0] pl_plier[0:nstages]; logic [w-1:0] pl_cand[0:nstages]; assign prod = pl_accum[nstages]; always_ff @( posedge clk ) begin pl_accum[0] = 0; pl_plier[0] = plier; pl_cand[0] = cand; for ( int stage=0; stage<nstages; stage++ ) begin pl_accum[stage+1] <= pl_accum[stage] + ( pl_plier[stage] * pl_cand[stage][m-1:0] << stage*m ); pl_cand[stage+1] <= pl_cand[stage] >> m; pl_plier[stage+1] <= pl_plier[stage]; end end endmodule ////////////////////////////////////////////////////////////////////////////// /// Comparison Modules /// /// The modules below are for reference. module mult_behav_1 #(int w = 16) (output logic [2*w-1:0] prod, input logic [w-1:0] plier, cand); assign prod = plier * cand; endmodule /// :Example: Basic Pipelined Multiplier -- mult_pipe // // Computes m partial products per stage. // module mult_pipe #( int w = 16, int m = 4 ) ( output logic [2*w-1:0] prod, input logic [w-1:0] plier, input logic [w-1:0] cand, input clk); localparam int stages = ( w + m - 1 ) / m; // Note: pl is for pipeline latch. logic [2*w-1:0] pl_accum[0:stages]; logic [w-1:0] pl_plier[0:stages]; logic [w-1:0] pl_cand[0:stages]; always_ff @( posedge clk ) begin pl_accum[0] = 0; pl_plier[0] = plier; pl_cand[0] = cand; for ( int stage=0; stage<stages; stage++ ) begin logic [2*w-1:0] accum; accum = pl_accum[stage]; for ( int j=0; j<m; j++ ) begin int pos; pos = stage * m + j; if ( pos < w && pl_cand[stage][pos] ) accum += pl_plier[stage] << pos; end /// Values to use in the next clock cycle, *not* the next iteration. pl_accum[stage+1] <= accum; pl_cand[stage+1] <= pl_cand[stage]; pl_plier[stage+1] <= pl_plier[stage]; end end assign prod = pl_accum[stages]; endmodule ////////////////////////////////////////////////////////////////////////////// /// Testbench Code // cadence translate_off program reactivate (output uwire clk_reactive, output int cycle_reactive, input uwire clk, input int cycle); assign clk_reactive = clk; assign cycle_reactive = cycle; endprogram module testbench; localparam int w = 16; localparam int num_tests = 400; localparam int NUM_MULT = 20; localparam int err_limit = 7; bit use_others; logic [w-1:0] plier, cand; logic [w-1:0] plierp[NUM_MULT], candp[NUM_MULT]; logic [2*w-1:0] prod[NUM_MULT]; uwire availn[NUM_MULT]; logic avail[NUM_MULT]; logic in_valid[NUM_MULT]; typedef struct { int tidx; int cycle_start; } Test_Vector; typedef struct { int idx; int err_count = 0; int err_timing = 0; Test_Vector tests_active[$]; bit all_tests_started = 0; bit seq = 0; bit pipe = 0; bit bpipe = 0; int deg = 1; logic [2*w-1:0] sout = 'h111; logic [2*w-1:0] prod_history[$]; int ncompleted = 0; int cyc_tot = 0; int latency = 0; } Info; Info pi[string]; localparam int cycle_limit = num_tests * w * 4; int cycle; bit done; logic clock; logic clk_reactive; int cycle_reactive; reactivate ra(clk_reactive,cycle_reactive,clock,cycle); initial begin clock = 0; cycle = 0; fork forever #10 cycle += clock++; wait( done ); wait( cycle >= cycle_limit ) $write("*** Cycle limit exceeded, ending.\n"); join_any; $finish(); end task pi_seq(input int idx, input string name, input int deg); automatic string m = $sformatf("%s Deg %0d", name, deg); pi[m].deg = deg; pi[m].idx = idx; pi[m].seq = 1; pi[m].bpipe = 0; endtask task pi_pipe(input int idx, input string name, input int deg); automatic string m = $sformatf("%s Deg %0d", name, deg); pi[m].deg = deg; pi[m].idx = idx; pi[m].seq = 1; pi[m].pipe = 1; pi[m].bpipe = 0; endtask task pi_bpipe(input int idx, input string name, input int deg); automatic string m = $sformatf("%s Deg %0d", name, deg); pi[m].deg = deg; pi[m].idx = idx; pi[m].seq = 1; pi[m].pipe = 1; pi[m].bpipe = 1; endtask mult_behav_1 #(w) mb1(prod[0], plierp[0], candp[0]); initial pi["Behavioral"].idx = 0; mult_pipe #(w,4) ms54(prod[7], plierp[7], candp[7], clock); initial pi_pipe(7,"Pipelined",ms54.m); mult_pipe #(w,2) ms53(prod[8], plierp[8], candp[8], clock); initial pi_pipe(8,"Pipelined",ms53.m); mult_fast #(w,4) ms17(prod[17], availn[17], clock, in_valid[17], plierp[17], candp[17]); initial pi_bpipe(17,"Fast",ms17.m); mult_fast #(w,2) ms16(prod[16], availn[16], clock, in_valid[16], plierp[16], candp[16]); initial pi_bpipe(16,"Fast",ms16.m); mult_fast #(w,1) ms15(prod[15], availn[15], clock, in_valid[15], plierp[15], candp[15]); initial pi_bpipe(15,"Fast",ms15.m); always @* begin foreach ( availn[i] ) begin if ( availn[i] !== 1'bz ) avail[i] = availn[i]; end end // Array of multiplier/multiplicand values to try out. // After these values are used a random number generator will be used. // int tests[$] = {1,1, 1,2, 1,3, 1,4, 1,5, 1,32, 32, 1}; initial begin automatic int awaiting = pi.size(); logic [w-1:0] pliers[num_tests], cands[num_tests]; done = 0; foreach ( pi[mut] ) begin automatic int midx = pi[mut].idx; automatic int steps = ( w + pi[mut].deg - 1 ) / pi[mut].deg; automatic int latency = !pi[mut].seq ? 1 : !pi[mut].pipe ? 2 * steps : steps; pi[mut].latency = latency; if ( pi[mut].bpipe == 0 ) begin avail[midx] = 1; end in_valid[midx] = 0; end for ( int i=0; i<num_tests; i++ ) begin automatic int num_bits_c = {$random()}%w + 1; automatic logic [w-1:0] mask_c = ( (w+1)'(1) << num_bits_c ) - 1; automatic int num_bits_p = {$random()}%w + 1; automatic logic [w-1:0] mask_p = ( (w+1)'(1) << num_bits_p ) - 1; pliers[i] = tests.size() ? tests.pop_front() : {$random()}&mask_p; cands[i] = tests.size() ? tests.pop_front() : {$random()}&mask_c; end fork begin forever @( negedge clk_reactive ) begin foreach ( pi[mut] ) begin automatic int midx = pi[mut].idx; if ( !in_valid[midx] && pi[mut].pipe ) begin plierp[midx] = cycle; candp[midx] = 1; end end end end join_none; fork begin forever @( posedge clk_reactive ) begin foreach ( pi[mut] ) begin if ( pi[mut].pipe ) begin automatic int midx = pi[mut].idx; pi[mut].prod_history.push_back(prod[midx]); if ( pi[mut].prod_history.size() > 2 * w ) begin automatic bit pad = pi[mut].prod_history.pop_front(); end end end end end join_none; repeat ( 2 * w ) @( negedge clock ); foreach ( pi[mutii] ) begin automatic string muti = mutii; fork begin automatic string mut = muti; automatic int midx = pi[mut].idx; for ( int i=0; i<num_tests; i++ ) begin automatic int gap_cyc = ( {$random} % 2 ) ? {$random} % ( w + 2 ) : 0; automatic int wait_limit = cycle + 10 * w + gap_cyc; automatic Test_Vector tv; repeat ( gap_cyc ) @( negedge clock ); if ( cycle > wait_limit ) begin $write("Wait limit exceeded for %s.\n", mut); break; end plierp[midx] = pliers[i]; candp[midx] = cands[i]; in_valid[midx] = 1; tv.tidx = i; tv.cycle_start = cycle; pi[mut].tests_active.push_back( tv ); @( negedge clock ); in_valid[midx] = 0; end pi[mut].all_tests_started = 1; end join_none; fork begin automatic string mut = muti; automatic int midx = pi[mut].idx; while ( 1 ) begin @( negedge clock ); while ( pi[mut].tests_active.size() == 0 && !pi[mut].all_tests_started ) @( negedge clock ); if ( pi[mut].tests_active.size() == 0 ) break; begin automatic Test_Vector tv = pi[mut].tests_active.pop_front(); automatic int i = tv.tidx; automatic logic [2*w-1:0] shadow_prod = pliers[i] * cands[i]; automatic int eta = tv.cycle_start + pi[mut].latency; automatic bit timing_err = 0; automatic int delta_t; if ( pi[mut].bpipe ) begin while ( !avail[midx] && cycle < eta ) @( negedge clock ); if ( !avail[midx] || cycle > eta ) begin timing_err = 1; if ( pi[mut].err_timing++ < err_limit ) $write("At cyc %4d (eta %0d) avail not set for %s (idx %0d) after %0d cycles for 0x%0h*0x%0h.\n", cycle, eta, mut, midx, cycle - tv.cycle_start, pliers[i], cands[i]); end end else begin wait ( cycle >= eta ); end delta_t = cycle - tv.cycle_start; if ( !timing_err ) begin pi[mut].ncompleted++; pi[mut].cyc_tot += delta_t; end if ( !timing_err && shadow_prod !== prod[midx] ) begin pi[mut].err_count++; if ( pi[mut].err_count < err_limit ) begin $write ("%-15s test %5d cyc %0d+%0d (%0d) wrong: 0x%0h * 0x%0h: 0x%0h != 0x%0h (correct)\n", mut, i, tv.cycle_start, delta_t, pi[mut].latency, pliers[i], cands[i], prod[midx], shadow_prod); end end end end awaiting--; end join_none; end wait( awaiting == 0 || cycle > cycle_limit ); $write("At cycle %0d. Error types: couldn't test / wrong result / timing\n",cycle); foreach ( pi[ mut ] ) $write("For %-18s ran %4d tests, %4d/%4d/%4d errors found. Avg cyc %.1f\n", mut, num_tests, num_tests - pi[mut].ncompleted, pi[mut].err_count, pi[mut].err_timing, pi[mut].seq ? real'(pi[mut].cyc_tot) / pi[mut].ncompleted : 1); done = 1; $finish(2); end endmodule // cadence translate_on