//////////////////////////////////////////////////////////////////////////////// // /// LSU EE 4755 Fall 2014 Homework 3 // /// SOLUTION /// Assignment http://www.ece.lsu.edu/koppel/v/2014/hw03.pdf /// Solution http://www.ece.lsu.edu/koppel/v/2014/hw03_sol.pdf /// Instructions: // // (1) Find the undergraduate workstation laboratory, room 126 EE // Building. // // (2) Locate your account. If you did not get an account please // E-mail: koppel@ece.lsu.edu // // (3) Log in to a Linux workstation. // The account should start up with a WIMP interface (windows, icons, // mouse, pull-down menus) ( :-) ) but one or two things need // to be done from a command-line shell. If you need to brush up // on Unix commands follow http://www.ece.lsu.edu/koppel/v/4ltrwrd/. // // (4) If you haven't already, follow the account setup instructions here: // http://www.ece.lsu.edu/koppel/v/proc.html // // (5) Copy this assignment, local path name // /home/faculty/koppel/pub/ee4755/hw/2014f/hw03 // to a directory ~/hw02 in your class account. (~ is your home // directory.) Use this file for your solution. // // (6) Find the problems in this file and solve them. // // Your entire solution should be in this file. // // Do not change module names. // // (7) Your solution will automatically be copied from your account by // the TA-bot. /// Additional Resources // // Verilog Documentation // The Verilog Standard // http://standards.ieee.org/getieee/1800/download/1800-2012.pdf // Introductory Treatment (Warning: Does not include SystemVerilog) // Brown & Vranesic, Fundamentals of Digital Logic with Verilog, 3rd Ed. // // Account Setup and Emacs (Text Editor) Instructions // http://www.ece.lsu.edu/koppel/v/proc.html // To learn Emacs look for Emacs tutorial. // // Unix Help // http://www.ece.lsu.edu/koppel/v/4ltrwrd/ ////////////////////////////////////////////////////////////////////////////// /// Behavioral Multiplier module mult_behav_1 #(int wid = 16) (output logic[2*wid-1:0] prod, input logic[wid-1:0] plier, cand); assign prod = plier * cand; endmodule ////////////////////////////////////////////////////////////////////////////// /// Simple m-Step Sequential Multiplier module mult_seq_m #( int wid = 16, int pp_per_cycle = 2 ) ( output logic [2*wid-1:0] prod, input logic [wid-1:0] plier, input logic [wid-1:0] cand, input clk); localparam int iterations = ( wid + pp_per_cycle - 1 ) / pp_per_cycle; localparam int iter_lg = $clog2(iterations); logic [iter_lg:1] iter; logic [2*wid-1:0] accum; // cadence translate_off initial iter = 0; // cadence translate_on always @( posedge clk ) begin if ( iter == iter_lg'(iterations) ) begin prod = accum; accum = 0; iter = 0; end for ( int i=0; i<pp_per_cycle; i++ ) begin int pos; pos = iter * pp_per_cycle + i; if ( cand[pos] ) accum += plier << pos; end iter++; end endmodule ////////////////////////////////////////////////////////////////////////////// /// An Sequential Multiplier using a Carry-Save Adder // Examine this module for Problem 1. // Don't modify the module. `include "/apps/linux/cadence/RC141/share/synth/lib/chipware/sim/verilog/CW/CW_csa.v" module mult_seq_csa #( int wid = 16 ) ( output logic [2*wid-1:0] prod, input logic [wid-1:0] plier, input logic [wid-1:0] cand, input clk); localparam int wlog = $clog2(wid); logic [wlog-1:0] pos; logic [2*wid-1:0] accum_sum_a_reg, accum_sum_b_reg; wire co; // cadence translate_off initial begin pos = 0; accum_sum_a_reg = 0; accum_sum_b_reg = 0; end // cadence translate_on wire [2*wid-1:0] accum_sum_a, accum_sum_b; wire [2*wid-1:0] pp = cand[pos] ? plier << pos : 0; CW_csa #(2*wid) csa ( .carry(accum_sum_a), .sum(accum_sum_b), .co(co), .a(accum_sum_a_reg), .b(accum_sum_b_reg), .c(pp), .ci(1'b0) ); always @( posedge clk ) pos <= pos + 1; always @( posedge clk ) begin if ( pos == wid-1 ) begin prod = accum_sum_a + accum_sum_b; accum_sum_a_reg = 0; accum_sum_b_reg = 0; end else begin accum_sum_a_reg = accum_sum_a; accum_sum_b_reg = accum_sum_b; end end endmodule ////////////////////////////////////////////////////////////////////////////// /// An m-bit Sequential Multiplier using a CSA /// Problem 2: Modify this module. module mult_seq_csa_m #( int wid = 16, int pp_per_cycle = 2 ) ( output logic [2*wid-1:0] prod, input logic [wid-1:0] plier, input logic [wid-1:0] cand, input clk); /// SOLUTION localparam int iterations = ( wid + pp_per_cycle - 1 ) / pp_per_cycle; localparam int iter_lg = $clog2(iterations); localparam int wid_lg = $clog2(wid); logic [iter_lg:0] iter; // cadence translate_off initial iter = 0; // cadence translate_on wire [2*wid-1:0] accum_sum_a[0:pp_per_cycle], accum_sum_b[0:pp_per_cycle]; logic [2*wid-1:0] accum_sum_a_reg, accum_sum_b_reg; assign accum_sum_a[0] = accum_sum_a_reg; assign accum_sum_b[0] = accum_sum_b_reg; for ( genvar i=0; i<pp_per_cycle; i++ ) begin wire [wid_lg:1] pos = iter * pp_per_cycle + i; wire co; // Unconnected. wire [2*wid-1:0] pp = pos < wid && cand[pos] ? plier << pos : 0; CW_csa #(2*wid) csa ( .sum(accum_sum_a[i+1]), .carry(accum_sum_b[i+1]), .co(co), .a(accum_sum_a[i]), .b(accum_sum_b[i]), .c(pp), .ci(1'b0) ); end always @( posedge clk ) begin if ( iter == iterations ) begin // The commented-out line below shows the wrong way of // designing this module. // // prod = accum_sum_a[pp_per_cycle] + accum_sum_b[pp_per_cycle]; // Note that the product is computed by using the register // outputs, rather than the output of the last CSA. // prod <= accum_sum_a_reg + accum_sum_b_reg; accum_sum_a_reg <= 0; accum_sum_b_reg <= 0; iter <= 0; end else begin accum_sum_a_reg <= accum_sum_a[pp_per_cycle]; accum_sum_b_reg <= accum_sum_b[pp_per_cycle]; iter <= iter + 1; end end endmodule ////////////////////////////////////////////////////////////////////////////// /// Pipelined Multiplier module mult_pipe #( int wid = 16, int pp_per_stage = 2 ) ( output logic [2*wid-1:0] prod, input logic [wid-1:0] plier, input logic [wid-1:0] cand, input clk); localparam int stages = ( wid + pp_per_stage - 1 ) / pp_per_stage; logic [2*wid-1:0] pl_accum[0:stages]; logic [wid-1:0] pl_plier[0:stages]; logic [wid-1:0] pl_cand[0:stages]; always @( posedge clk ) begin pl_accum[0] = 0; pl_plier[0] = plier; pl_cand[0] = cand; for ( int stage=0; stage<stages; stage++ ) begin logic [2*wid-1:0] accum; accum = pl_accum[stage]; for ( int j=0; j<pp_per_stage; j++ ) begin int pos; pos = stage * pp_per_stage + j; if ( pos < wid && pl_cand[stage][pos] ) accum += pl_plier[stage] << pos; end pl_accum[stage+1] <= accum; pl_cand[stage+1] <= pl_cand[stage]; pl_plier[stage+1] <= pl_plier[stage]; end end assign prod = pl_accum[stages]; endmodule ////////////////////////////////////////////////////////////////////////////// /// Pipelined Multiplier, Instantiated Stages module mult_pipe_stage #( int wid = 16, int pp_per_stage = 2, int stage = 0 ) ( output logic [2*wid-1:0] accum_out, input [2*wid-1:0] accum_in, input [wid-1:0] plier, input [wid-1:0] cand); always @* begin logic [2*wid-1:0] accum; accum = accum_in; for ( int j=0; j<pp_per_stage; j++ ) begin int pos; pos = stage * pp_per_stage + j; if ( pos < wid && cand[pos] ) accum += plier << pos; end accum_out = accum; end endmodule module mult_pipe_ia #( int wid = 16, int pp_per_stage = 2 ) ( output logic [2*wid-1:0] prod, input logic [wid-1:0] plier, input logic [wid-1:0] cand, input clk); localparam int stages = ( wid + pp_per_stage - 1 ) / pp_per_stage; logic [2*wid-1:0] pl_accum[0:stages]; logic [wid-1:0] pl_plier[0:stages]; logic [wid-1:0] pl_cand[0:stages]; always @* begin pl_accum[0] = 0; pl_plier[0] = plier; pl_cand[0] = cand; end for ( genvar stage = 0; stage < stages; stage++ ) begin wire logic [2*wid-1:0] accum; mult_pipe_stage_x #(wid, pp_per_stage, stage) this_stage ( accum, pl_accum[stage], pl_plier[stage], pl_cand[stage]); always @( posedge clk ) begin pl_accum[stage+1] <= accum; pl_plier[stage+1] <= pl_plier[stage]; pl_cand[stage+1] <= pl_cand[stage]; end end assign prod = pl_accum[stages]; endmodule ////////////////////////////////////////////////////////////////////////////// /// Testbench Code // cadence translate_off module testbench; localparam int wid = 16; localparam int num_tests = 1000; localparam int NUM_MULT = 10; localparam int err_limit = 7; localparam bit pipeline_test_exact = 1; logic clock; always #1 clock <= !clock; logic [wid-1:0] plier, cand; logic [wid-1:0] plierp, candp; logic [2*wid-1:0] prod[NUM_MULT]; logic [2*wid-1:0] prodp[NUM_MULT]; mult_behav_1 #(wid) mb1(prod[0], plier, cand); mult_seq_m #(wid,8) ms44(prod[1], plier, cand, clock); mult_seq_m #(wid,3) ms43(prod[2], plier, cand, clock); mult_seq_csa #(wid) mc(prod[3], plier, cand, clock); mult_seq_csa_m #(wid,4) mc4(prod[4], plier, cand, clock); mult_seq_csa_m #(wid,1) mc1(prod[5], plier, cand, clock); localparam int ppps_2 = 1; mult_pipe #(wid,4) mp4(prodp[6], plierp, candp, clock); mult_pipe #(wid,ppps_2) mp3(prodp[7], plierp, candp, clock); mult_pipe_ia #(wid,4) mpi4(prodp[8], plierp, candp, clock); mult_pipe_ia #(wid,ppps_2) mpi3(prodp[9], plierp, candp, clock); string names[] = '{"Behav_1", "Seq m4", "Seq m3", "Seq CSA", "Seq CSA m4", "Seq CSA m1", "Pipelined m4", "Pipelined m1", "Pipelined IA m4", "Pipelined IA m1" }; int err_cnt[NUM_MULT]; // Array of multiplier/multiplicand values to try out. // After these values are used a random number generator will be used. // int tests[$] = {1,1, 1,2, 2,1, 'h10,'h20, 1,32, 32, 1}; initial begin clock = 0; for ( int i=0; i<num_tests; i++ ) begin // Change input to pipelined units. // for ( int t=0; t<=wid; t++ ) begin plierp = t; candp = 256; #2; end // Set multiplier and multiplicand values for non-piped units. // plier = tests.size() ? tests.pop_front() : $random(); cand = tests.size() ? tests.pop_front() : $random(); // Set multiplier and multiplicand values for piped units. // plierp = plier; candp = cand; // For pipelined units, copy output at the time it should be ready. // fork #(2 * wid/4) prod[6] = prodp[8]; #(2 * wid/4) prod[8] = prodp[8]; #(2 * ((wid+ppps_2-1)/ppps_2)) prod[7] = prodp[7]; #(2 * ((wid+ppps_2-1)/ppps_2)) prod[9] = prodp[9]; join_none if ( pipeline_test_exact ) begin // Modify the inputs to the pipelined units in subsequent cycles. // for ( int t=0; t<=wid; t++ ) begin #2; plierp = t; candp = 1; end plierp = 0; candp = 0; end #1000; // Make sure each module's output is correct. // for ( int mut=1; mut<NUM_MULT; mut++ ) begin if ( prod[0] !== prod[mut] ) begin err_cnt[mut]++; if ( err_cnt[mut] < err_limit ) $display("Error in %s test %4d: %x != %x (correct)\n", names[mut], i, prod[mut], prod[0]); end end end // Tests completed, report error count for each device. // for ( int mut=1; mut<NUM_MULT; mut++ ) begin $display("Mut %s, %d errors (%.1f%% of tests)\n", names[mut], err_cnt[mut], 100.0 * err_cnt[mut]/real'(num_tests) ); end $finish(2); end endmodule // cadence translate_on