////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2014 Homework 2
//

 /// Assignment  http://www.ece.lsu.edu/koppel/v/2014f/hw02.pdf

 /// Instructions:
  //
  // (1) Find the undergraduate workstation laboratory, room 126 EE
  //     Building.
  //
  // (2) Locate your account.  If you did not get an account please
  //     E-mail: koppel@ece.lsu.edu
  //
  // (3) Log in to a Linux workstation.
  //     The account should start up with a WIMP interface (windows, icons,
  //     mouse, pull-down menus)  ( :-) ) but one or two things need
  //     to be done from a command-line shell.  If you need to brush up
  //     on Unix commands follow http://www.ece.lsu.edu/koppel/v/4ltrwrd/.
  //
  // (4) If you haven't already, follow the account setup instructions here:
  //     http://www.ece.lsu.edu/koppel/v/proc.html
  //
  // (5) Copy this assignment, local path name
  //     /home/faculty/koppel/pub/ee4755/hw/2014f/hw02
  //     to a directory ~/hw02 in your class account. (~ is your home
  //     directory.) Use this file for your solution.
  //
  // (6) Find the problems in this file and solve them.
  //
  //     Your entire solution should be in this file.
  //
  //     Do not change module names.
  //
  // (7) Your solution will automatically be copied from your account by
  //     the TA-bot.


 /// Additional Resources
  //
  // Verilog Documentation
  //    The Verilog Standard
  //      http://standards.ieee.org/getieee/1800/download/1800-2012.pdf
  //    Introductory Treatment (Warning: Does not include SystemVerilog)
  //      Brown & Vranesic, Fundamentals of Digital Logic with Verilog, 3rd Ed.
  //
  // Account Setup and Emacs (Text Editor) Instructions
  //      http://www.ece.lsu.edu/koppel/v/proc.html
  //      To learn Emacs look for Emacs tutorial.
  //
  // Unix Help
  //      http://www.ece.lsu.edu/koppel/v/4ltrwrd/


//////////////////////////////////////////////////////////////////////////////
/// Behavioral Multipliers


module mult_behav_1
  #(int wid = 16)
   (output logic[2*wid-1:0] prod, input logic[wid-1:0] plier, cand);
   assign prod = plier * cand;
endmodule


module mult_behav_2
  #(int wid = 16)
   (output logic[2*wid-1:0] prod, input logic[wid-1:0] plier, cand);

   always @* begin

      prod = 0;

      for ( int i=0; i<wid; i++ ) if ( plier[i] ) prod = prod + ( cand << i );

   end

endmodule


//////////////////////////////////////////////////////////////////////////////
/// Problem 2: Linear Multiplier

 /// Simple Adder, Don't Modify
module good_adder#(int w=16)(output [w:1] s, input [w:1] a,b);
   assign s = a + b;
endmodule


module mult_linear
  #(int wid = 16)
   (output logic[2*wid-1:0] prod, input logic[wid-1:0] plier, cand);

   /// Problem 2 Solution Goes Here.
   //  This module should be a structural version of mult_behav_2,
   //   using generate statements to instantiate good_adder.


   /// SOLUTION BELOW

   logic [2*wid-1:0] rsum [wid-1:-1];
   logic [2*wid-1:0] pp   [wid-1:0];

   assign rsum[-1] = 0;

   for ( genvar i=0; i<wid; i++ ) begin
      assign pp[i] = plier[i] ? cand << i : 0;
      good_adder #(2*wid) adder(rsum[i], rsum[i-1], pp[i] );
   end

   assign    prod = rsum[wid-1];

endmodule

//////////////////////////////////////////////////////////////////////////////
/// Problem 3: Tree Multiplier


 /// Problem 3 Solutions
 //
 //  Several solutions appear below to Problem 3. The easy to
 //  understand solution is mult_tree_simple. Module mult_tree is
 //  compact (does not require alot of Verilog code). Module
 //  mult_tree_rec shows a recursive implementation. Module
 //  mult_tree_better uses a cost saving technique.
 //
 /// mult_tree
 //  This one is the shortest.  The tree is constructed using
 //  a single loop.
 //
 /// mult_tree_rec
 //  A recursive version.  The cost and performance will
 //  not be very good unless synthesized with option "-effort high"
 //  because without that option the synthesis program synthesizes
 //  modules without taking into account how they are instantiated.
 //  Among other things, that means the synthesis program can't eliminate
 //  unused wires.
 //
 /// mult_tree_simple
 //  Maybe the easiest to understand. The tree is constructed using
 //  two nested loops, the outer loop iterates over tree levels
 //  and the inner loop iterates over adders within a level.
 //
 /// mult_tree_better
 //  This is like mult_tree_simple, but instead of shifting the
 //  multiplicand, the intermediate sums are shifted.


module mult_tree
  #(int wid = 16)
   (output logic[2*wid-1:0] prod, input logic[wid-1:0] plier, cand);

   localparam int widp2 = 1 << $clog2(wid);

   /// SOLUTION BELOW to Problem 3
   //
   //  This is one of several solutions to Problem 3.

   logic [2*wid-1:0] rsum [2*wid-1:0];
   localparam int mask = 2*wid-1;

   // Compute partial products.
   //
   for ( genvar i=0; i<wid; i++ )
     assign rsum[i] = plier[i] ? cand << i : 0;

   // Add partial products together.
   //
   for ( genvar i=wid; i<2*wid-1; i++ )
     good_adder #( 2*wid ) adder
                 ( rsum[i],
                   rsum[ mask &   (i<<1)       ],   // Left child.
                   rsum[ mask & ( (i<<1) + 1 ) ]    // Right child.
                   );

   assign    prod = rsum[2*wid-2];

endmodule

module mult_tree_rec
  #( int wid_plier = 16,
     int wid_cand = wid_plier )
   ( output logic [2*wid_plier-1:0] prod,
     input logic [wid_plier-1:0] plier,
     input logic [wid_cand-1:0] cand);

   localparam int wid_cr_h = wid_cand / 2;
   localparam int wid_cr_l = wid_cand - wid_cr_h;

   generate

     if ( wid_cand == 1 ) begin

        assign prod = cand[0] ? plier : 0;

     end else begin

        wire    logic [2*wid_plier-1:0] prod_h, prod_l;

        mult_tree_rec #(wid_plier, wid_cr_h) m_h
          (prod_h, plier, cand[wid_cand-1:wid_cr_l] );

        mult_tree_rec #(wid_plier, wid_cr_l) m_l
          (prod_l, plier, cand[wid_cr_l-1:0] );

        good_adder #(2*wid_plier) adder
          (prod, prod_h << wid_cr_l, prod_l);

     end

   endgenerate

endmodule



module mult_tree_simple
  #(int wid = 16)
   (output logic[2*wid-1:0] prod, input logic[wid-1:0] plier, cand);

   localparam int levels = $clog2(wid);

   logic [2*wid-1:0] rsum [2*wid-1:0][levels:0];

   for ( genvar lev=0; lev<levels; lev++ ) begin

      localparam int siblings = 1 << lev;

      for ( genvar i=0; i<siblings; i++ )

        good_adder #(2*wid) adder
                     ( rsum[i][lev],
                       rsum[i*2][lev+1],
                       rsum[i*2+1][lev+1] );

   end

   for ( genvar i=0; i<wid; i++ )
     assign rsum[i][levels] = plier[i] ? cand << i : 0;

   assign   prod = rsum[0][0];

endmodule


module mult_tree_better
  #(int wid = 16)
   (output logic[2*wid-1:0] prod, input logic[wid-1:0] plier, cand);

   localparam int levels = $clog2(wid);

   logic [2*wid-1:0] rsum [2*wid-1:0][levels:0];

   for ( genvar lev=0; lev<levels; lev++ ) begin

      localparam int siblings = 1 << lev;
      localparam int shift = 1 << levels - lev - 1;

      for ( genvar i=0; i<siblings; i++ )

        good_adder #(2*wid) adder
                     ( rsum[i][lev],
                       rsum[i*2+1][lev+1] << shift,
                       rsum[i*2][lev+1]);

   end

   // Notice that the multiplicand is not shifted here.
   //
   for ( genvar i=0; i<wid; i++ )
     assign rsum[i][levels] = plier[i] ? cand : 0;

   assign   prod = rsum[0][0];

endmodule


//////////////////////////////////////////////////////////////////////////////
/// Testbench Code

// cadence translate_off

module testbench;

   localparam int wid = 64;
   localparam int num_tests = 1000;
   localparam int NUM_MULT = 7;
   localparam int err_limit = 4;

   logic [wid-1:0] plier, cand;
   logic [2*wid-1:0] prod[NUM_MULT];

   mult_behav_1 #(wid) mb1(prod[0], plier, cand);
   mult_behav_2 #(wid) mb2(prod[1], plier, cand);
   mult_linear  #(wid) ms1(prod[2], plier, cand);
   mult_tree    #(wid) ms2(prod[3], plier, cand);
   mult_tree_rec #(wid) ms3(prod[4], plier, cand);
   mult_tree_simple #(wid) ms4(prod[5], plier, cand);
   mult_tree_better #(wid) ms5(prod[6], plier, cand);

   string names[] = '{"Behav_1","Behav_2","Linear", "Tree",
                      "Tree Rec", "Tree Simple", "Tree Average"};
   int err_cnt[NUM_MULT];
   int tests[$] = {1,1, 1,2,  1,32,  32, 1};

   initial begin

      for ( int i=0; i<num_tests; i++ ) begin

         plier = tests.size() ? tests.pop_front() : $random();
         cand = tests.size() ? tests.pop_front() : $random();

         #1;

         for ( int mut=1; mut<NUM_MULT; mut++ ) begin

            if ( prod[0] !== prod[mut] ) begin

               err_cnt[mut]++;

               if ( err_cnt[mut] < err_limit )
                 $display("Error in %s test %4d:  %d != %d (correct)\n",
                          names[mut], i, prod[mut], prod[0]);
            end

         end

      end

      for ( int mut=1; mut<NUM_MULT; mut++ ) begin

         $display("Mut %s, %d errors (%.1f%% of tests)\n",
                  names[mut], err_cnt[mut],
                  100.0 * err_cnt[mut]/real'(num_tests) );

      end


   end

endmodule

// cadence translate_on