////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2019 Homework 1 -- SOLUTION
//

 /// Assignment  https://www.ece.lsu.edu/koppel/v/2019/hw01.pdf


`default_nettype none

//////////////////////////////////////////////////////////////////////////////
///  Problem 1 -- SOLUTION
//
 /// Modify mult16_tree, mult8_tree, and mult4_tree to implement multiplier.
 ///
//
//     [✔] Make sure that the testbench does not report errors.
//     [✔] mult16_tree must use exactly two mult8_tree modules, etc.
//     [✔] Pay attention to port widths. Do not make them larger than needed.
//     [✔] Module must be synthesizable. Use command: genus -files syn.tcl


module mult16_tree
  #( int wa = 16, int wb = 16, int wp = wa + wb )
   ( output uwire [31:0] prod,
     input uwire [15:0] a,
     input uwire [15:0] b );

   /// Problem 1 solution goes here, and in other modules.
   //  [✔] Instantiate two mult8_tree's.
   //  [✔] Use implicit structural or behavioral code to combine their outputs.

   /// SOLUTION

   // Declare properly-sized connections to mult8_tree outputs.
   //
   uwire [23:0] prod_lo, prod_hi;
   //
   // They are 24 bits wide because that's the maximum size of the
   // product of an 8-bit unsigned integer (such as a[7:0]) and a
   // 16-bit unsigned integer (b): 8+16 =24.

   // Instantiate two mult8_tree multipliers, each handles 8 bits of a.
   //
   mult8_tree mlo( prod_lo, a[7:0],  b);
   mult8_tree mhi( prod_hi, a[15:8], b);

   // Compute the full product using the two partial products.
   //
   assign prod = prod_lo + ( prod_hi << 8 );
   //
   // Because prod is 32-bits wide the right-hand side computation
   // will be computed with a 32-bit precision.

endmodule

module mult8_tree
  ( output uwire [23:0] prod,
    input uwire [7:0] a,
    input uwire [15:0] b );
   //  [✔] Pay attention to port widths. Do not make them larger than needed.

   /// Problem 1 solution goes here, and in other modules.
   //  [✔] Instantiate two mult4_tree's.
   //  [✔] Use implicit structural or behavioral code to combine their outputs.

   /// SOLUTION
   //
   // See the solution comments description in mult16_tree.

   uwire [19:0] prod_lo, prod_hi;
   mult4_tree mlo( prod_lo, a[3:0], b);
   mult4_tree mhi( prod_hi, a[7:4], b);
   assign prod = prod_lo + ( prod_hi << 4 );

endmodule

module mult4_tree
  ( output uwire [19:0] prod,
    input uwire [3:0] a,
    input uwire [15:0] b );
   //  [✔] Pay attention to port widths. Do not make them larger than needed.

   /// Problem 1 solution goes here, and in other modules.
   //  [✔] Use implicit structural or behavioral code to combine their outputs.

   /// SOLUTION
   //
   // See the solution comments description in mult16_tree.

   uwire [17:0] prod_lo, prod_hi;

   mult2 mlo( prod_lo, a[1:0], b);
   mult2 mhi( prod_hi, a[3:2], b);
   assign prod = prod_lo + ( prod_hi << 2 );

endmodule


 /// Bonus Solution: 
module mult_tree
  #( int wa = 16, int wb = 16, int wp = wa + wb )
   ( output uwire [wp:1] prod,
     input uwire [wa:1] a,
     input uwire [wb:1] b );

   /// BONUS SOLUTION
   //
   //  This answers a question that was almost but not quite asked:
   //    Using generate statements design a single module that can be
   //    instantiated into a module equivalent to mult16_tree,
   //    mult8_tree, mult4_tree, and mult2, and also mult32_tree, etc.

   if ( wa == 1 ) begin

      // Terminal case: 1 bit partial product.
      //
      assign prod = a ? b : 0;
      //
      // Equivalent to: prod = a * b;

   end else begin

      // Split a in half and recursively instantiate a module for each
      // half.

      localparam int wn = wa / 2;
      localparam int wx = wb + wn;

      uwire [wx:1] prod_lo, prod_hi;

      mult_tree #(wn,wb) mlo( prod_lo, a[wn:1], b);
      mult_tree #(wn,wb) mhi( prod_hi, a[wa:wn+1], b);

      // Combine the partial products.
      //
      assign prod = prod_lo + ( prod_hi << wn );

   end

endmodule



 /// Do not modify the code below this point.

module mult2
  ( output uwire [17:0] prod, input uwire [1:0] a, input uwire [15:0] b );

   /// DO NOT MODIFY THIS ROUTINE.
   assign prod = a * b;

endmodule


module mult16_flat
  #( int wa = 16, int wb = 16, int wp = wa + wb )
   ( output uwire [31:0] prod, input uwire [15:0] a, b );

   /// DO NOT MODIFY THIS ROUTINE.

`ifdef NEVER_DEFINE_ME
   // Emacs Lisp code to generate Verilog code for mult16_flat.
   (cl-loop for i from 0 to 14 by 2
    concat (if (= i 0) "   assign prod = prod00"
             (format " + ( prod%02d << %d )" i i)) into prod
    concat (format "%s prod%02d" (if (= i 0) "" ",") i) into decl
    concat (format "   mult2 m%d( prod%02d, a[%d:%d], b);\n" i i (+ i 1) i)
    into inst
    finally (insert (concat "\n   uwire [17:0]" decl ";\n" inst "\n" prod ";\n")))
`endif

   uwire [17:0]  prod00, prod02, prod04, prod06, prod08, prod10, prod12, prod14;
   mult2 m0( prod00, a[1:0], b);
   mult2 m2( prod02, a[3:2], b);
   mult2 m4( prod04, a[5:4], b);
   mult2 m6( prod06, a[7:6], b);
   mult2 m8( prod08, a[9:8], b);
   mult2 m10( prod10, a[11:10], b);
   mult2 m12( prod12, a[13:12], b);
   mult2 m14( prod14, a[15:14], b);

   assign prod = prod00 + ( prod02 << 2 ) + ( prod04 << 4 ) + ( prod06 << 6 ) + ( prod08 << 8 ) + ( prod10 << 10 ) + ( prod12 << 12 ) + ( prod14 << 14 );

endmodule



module mult_operator
  #( int wa = 16, int wb = 16, int wp = wa + wb )
   ( output uwire [wp:1] prod, input uwire [wa:1] a, input uwire [wb:1] b );
   /// DO NOT MODIFY THIS ROUTINE.
   assign prod = a * b;
endmodule




//////////////////////////////////////////////////////////////////////////////
/// Testbench Code

// cadence translate_off

module testbench;

   localparam int wid = 16;
   localparam int num_tests = 1000;
   localparam int NUM_MULT = 4;
   localparam int err_limit = 7;

   logic [wid-1:0] plier, cand;
   logic [2*wid-1:0] prod[NUM_MULT], shadow_prod;

   mult_operator mb0(prod[0], plier, cand);
   mult16_flat mb1(prod[1], plier, cand);
   mult16_tree mb2(prod[2], plier, cand);
   multw_tree #(wid,wid) mb3(prod[3], plier, cand);

   string names[] = '{ "mult_operator", "mult16", "tree16", "treep"  };

   int err_cnt[NUM_MULT];

   // Array of multiplier/multiplicand values to try out.
   // After these values are used a random number generator will be used.
   //
   int tests[$] = {1,1, 1,2,  1,32,  32, 1};

   initial begin

      $display("Starting testbench.\n");

      for ( int i=0; i<num_tests; i++ ) begin

         // Set multiplier and multiplicand values.
         //
         plier = tests.size() ? tests.pop_front() : $random();
         cand = tests.size() ? tests.pop_front() : $random();

         shadow_prod = plier * cand;

         #10;

         // Make sure each module's output is correct.
         //
         for ( int mut=0; mut<NUM_MULT; mut++ ) begin

            if ( shadow_prod !== prod[mut] ) begin

               err_cnt[mut]++;

               if ( err_cnt[mut] < err_limit )
                 $display("Error in %s test %4d:  %x != %x (correct)\n",
                          names[mut], i, prod[mut], shadow_prod);
            end

         end

      end

      // Tests completed, report error count for each device.
      //
      for ( int mut=0; mut<NUM_MULT; mut++ ) begin

         $display("Mut %s, %d errors (%.1f%% of tests)\n",
                  names[mut], err_cnt[mut],
                  100.0 * err_cnt[mut]/real'(num_tests) );

      end

      $finish(2);

   end

endmodule

// cadence translate_on