/// EE 7722 - Digital Design Using HDLs
//
//  Classroom demo code.

// Time-stamp: <15 October 2014, 14:33:00 CDT, koppel@sky.ece.lsu.edu>


//////////////////////////////////////////////////////////////////////////////
/// Behavioral Multiplier


module mult_behav_1
  #(int wid = 16)
   (output logic[2*wid-1:0] prod, input logic[wid-1:0] plier, cand);

   assign prod = plier * cand;
endmodule


//////////////////////////////////////////////////////////////////////////////
/// Linear Multiplier

 /// Simple Adder, Don't Modify
module good_adder#(int w=16)(output [w:1] s, input [w:1] a,b);
   assign s = a + b;
endmodule


module mult_linear
  #(int wid = 16)
   (output logic[2*wid-1:0] prod, input logic[wid-1:0] plier, cand);

   logic [2*wid-1:0] rsum [wid-1:-1];
   logic [2*wid-1:0] pp   [wid-1:0];

   assign rsum[-1] = 0;

   for ( genvar i=0; i<wid; i++ ) begin
      assign pp[i] = plier[i] ? cand << i : 0;
      good_adder #(2*wid) adder(rsum[i], rsum[i-1], pp[i] );
   end

   assign    prod = rsum[wid-1];

endmodule

//////////////////////////////////////////////////////////////////////////////
/// Tree Multiplier


module mult_tree
  #(int wid = 16)
   (output logic[2*wid-1:0] prod, input logic[wid-1:0] plier, cand);

   localparam int widp2 = 1 << $clog2(wid);

   logic [2*wid-1:0] rsum [2*wid-1:0];
   localparam int mask = 2*wid-1;

   // Compute partial products.
   //
   for ( genvar i=0; i<wid; i++ )
     assign rsum[i] = plier[i] ? cand << i : 0;

   // Add partial products together.
   //
   for ( genvar i=wid; i<2*wid-1; i++ )
     good_adder #( 2*wid ) adder
                 ( rsum[i],
                   rsum[ mask &   (i<<1)       ],   // Left child.
                   rsum[ mask & ( (i<<1) + 1 ) ]    // Right child.
                   );

   assign    prod = rsum[2*wid-2];

endmodule


//////////////////////////////////////////////////////////////////////////////
/// Simple Sequential Multiplier


module mult_seq #( int wid = 16 )
   ( output logic [2*wid-1:0] prod,
     input logic [wid-1:0] plier,
     input logic [wid-1:0] cand,
     input clk);

   localparam int wlog = $clog2(wid);

   logic [wlog-1:0] pos;
   logic [2*wid-1:0] accum;

   // cadence translate_off
   initial pos = 0;
   // cadence translate_on

   always @( posedge clk ) begin

      if ( pos == 0 ) begin

         prod = accum;
         accum = 0;

      end

      if ( cand[pos] == 1 ) accum += plier << pos;

      pos++;

   end

endmodule


//////////////////////////////////////////////////////////////////////////////
/// Sequential Multiplier, Using Instantiated Adder


module mult_seq_ga #( int wid = 16 )
   ( output logic [2*wid-1:0] prod,
     input logic [wid-1:0] plier,
     input logic [wid-1:0] cand,
     input clk);

   localparam int wlog = $clog2(wid-1);

   logic [wlog-1:0] pos;
   logic [2*wid-1:0] accum;

   // cadence translate_off
   initial begin pos = 0; accum = 0; end
   // cadence translate_on

   wire [2*wid-1:0] pp = cand[pos] ? plier << pos : 0;
   wire [2*wid-1:0] sum;

   good_adder #(2*wid) ga( sum, accum, pp );

   always @( posedge clk ) pos <= pos + 1;

   always @( posedge clk ) begin

      if ( pos == wid-1 ) begin

         prod = sum;
         accum = 0;

      end else begin

         accum = sum;

      end

   end

endmodule



//////////////////////////////////////////////////////////////////////////////
/// Streamlined Sequential Multiplier

 /// Techniques For Lowering Cost
//
//   Instead of shifting the multiplier, shift the accumulator.
//   Use part of the accumulator to store the multiplicand.

module mult_seq_stream #( int wid = 16 )
   ( output logic [2*wid-1:0] prod,
     input logic [wid-1:0] plier,
     input logic [wid-1:0] cand,
     input clk);

   localparam int wlog = $clog2(wid-1);

   logic [wlog-1:0] pos;
   logic [2*wid-1:0] accum;

   // cadence translate_off
   initial pos = 0;
   // cadence translate_on

   always @( posedge clk ) begin

      logic [wid:0] pp;

      if ( pos == wid - 1 ) begin

         prod = accum;
         accum = cand;
         pos = 0;

      end else begin

         pos++;

      end

      // Note: the multiplicand is in the lower bits of the accumulator.
      //
      pp = accum[0] ? { 1'b0, plier } : 0;

      // Add on the partial product and shift the accumulator.
      //
      accum = { { 1'b0, accum[2*wid-1:wid] } + pp, accum[wid-1:1] };

   end

endmodule


//////////////////////////////////////////////////////////////////////////////
/// Simple m-Step Sequential Multiplier

// Add on m partial products in each iteration.
//
// Will the synthesis program figure it out?


module mult_seq_m #( int wid = 16, int pp_per_cycle = 2 )
   ( output logic [2*wid-1:0] prod,
     input logic [wid-1:0] plier,
     input logic [wid-1:0] cand,
     input clk);

   localparam int iterations = ( wid + pp_per_cycle - 1 ) / pp_per_cycle;
   localparam int iter_lg = $clog2(iterations-1);

   logic [iter_lg:1] iter;
   logic [2*wid-1:0] accum;

   // cadence translate_off
   initial iter = 0;
   // cadence translate_on

   always @( posedge clk ) begin

      if ( iter == iter_lg'(iterations) ) begin

         prod = accum;
         accum = 0;
         iter = 0;

      end

      for ( int i=0; i<pp_per_cycle; i++ )
        begin
           int pos;  pos = iter * pp_per_cycle + i;
           if ( cand[pos] ) accum += plier << pos;
        end

      iter++;

   end

endmodule


//////////////////////////////////////////////////////////////////////////////
/// Simple Degree-m Sequential Multiplier


module mult_seq_dm
  #( int wid = 16,
     int pp_per_cycle = 2 )
   ( output logic [2*wid-1:0] prod,
     input logic [wid-1:0] plier,
     input logic [wid-1:0] cand,
     input clk);

   localparam int iterations = ( wid + pp_per_cycle - 1 ) / pp_per_cycle;
   localparam int iter_lg = $clog2(iterations-1);

   wire [iterations-1:0][pp_per_cycle-1:0] cand_2d = cand;

   logic [iter_lg:1] iter;
   logic [2*wid-1:0] accum;

   // cadence translate_off
   initial iter = 0;
   // cadence translate_on

   always @( posedge clk ) begin

      if ( iter == iter_lg'(iterations) ) begin

         prod = accum;
         accum = 0;
         iter = 0;

      end

      accum += plier * cand_2d[iter] << ( iter * pp_per_cycle );

      iter++;

   end

endmodule


//////////////////////////////////////////////////////////////////////////////
/// Pipelined Multiplier


module mult_pipe #( int wid = 16, int pp_per_stage = 2 )
   ( output logic [2*wid-1:0] prod,
     input logic [wid-1:0] plier,
     input logic [wid-1:0] cand,
     input clk);

   localparam int stages = ( wid + pp_per_stage - 1 ) / pp_per_stage;

   logic [2*wid-1:0] pl_accum[0:stages];
   logic [wid-1:0] pl_plier[0:stages];
   logic [wid-1:0] pl_cand[0:stages];

   always @( posedge clk ) begin

      pl_accum[0] = 0;
      pl_plier[0] = plier;
      pl_cand[0] = cand;

      for ( int stage=0; stage<stages; stage++ ) begin

         logic [2*wid-1:0] accum;  accum = pl_accum[stage];

         for ( int j=0; j<pp_per_stage; j++ ) begin

            int pos;  pos = stage * pp_per_stage + j;

            if ( pos < wid && pl_cand[stage][pos] )
              accum += pl_plier[stage] << pos;

         end

         pl_accum[stage+1] <= accum;
         pl_cand[stage+1] <= pl_cand[stage];
         pl_plier[stage+1] <= pl_plier[stage];

      end

   end

   assign prod = pl_accum[stages];

endmodule




//////////////////////////////////////////////////////////////////////////////
/// Testbench Code

// cadence translate_off

module testbench;

   localparam int wid = 16;
   localparam int num_tests = 1000;
   localparam int NUM_MULT = 12;
   localparam int err_limit = 7;
   localparam bit pipeline_test_exact = 0;

   logic clock;

   always #1 clock <= !clock;

   logic [wid-1:0] plier, cand;
   logic [wid-1:0] plierp, candp;
   logic [2*wid-1:0] prod[NUM_MULT];
   logic [2*wid-1:0] prodp[NUM_MULT];

   mult_behav_1 #(wid) mb1(prod[0], plier, cand);

   mult_linear  #(wid) ms1(prod[1], plier, cand);
   mult_tree    #(wid) ms2(prod[2], plier, cand);
   mult_seq     #(wid) ms3(prod[3], plier, cand, clock);
   mult_seq_ga  #(wid) msga1(prod[11], plier, cand, clock);
   mult_seq_stream #(wid) mss1(prod[4], plier, cand, clock);
   mult_seq_m   #(wid,4) ms44(prod[5], plier, cand, clock);
   mult_seq_m   #(wid,3) ms43(prod[6], plier, cand, clock);
   mult_seq_dm  #(wid,4) msd44(prod[9], plier, cand, clock);
   mult_seq_dm  #(wid,3) msd43(prod[10], plier, cand, clock);
   mult_pipe    #(wid,4) ms54(prodp[7], plierp, candp, clock);
   mult_pipe    #(wid,3) ms53(prodp[8], plierp, candp, clock);

   string names[] = '{"Behav_1","Linear", "Tree",
                      "Seq",
                      "Seq Stream",
                      "Seq MPP m4",
                      "Seq MPP m3",
                      "Pipelined m4",
                      "Pipelined m3",
                      "Seq Deg m4",
                      "Seq Deg m3",
                      "Seq GA"
                      };

   int err_cnt[NUM_MULT];

   // Array of multiplier/multiplicand values to try out.
   // After these values are used a random number generator will be used.
   //
   int tests[$] = {1,1, 1,2,  1,32,  32, 1};

   initial begin

      clock = 0;

      for ( int i=0; i<num_tests; i++ ) begin

         // Change input to pipelined units.
         //
         for ( int t=0; t<=wid; t++ ) begin
            plierp = t;
            candp = 256;
            #2;
         end

         // Set multiplier and multiplicand values for non-piped units.
         //
         plier = tests.size() ? tests.pop_front() : $random();
         cand = tests.size() ? tests.pop_front() : $random();

         // Set multiplier and multiplicand values for piped units.
         //
         plierp = plier;
         candp = cand;

         // For pipelined units, copy output at the time it should be ready.
         //
         fork
            #(2 * wid/4) prod[7] = prodp[7];
            #(2 * ((wid+2)/3)) prod[8] = prodp[8];
         join_none

         if ( pipeline_test_exact ) begin

            // Modify the inputs to the pipelined units in subsequent cycles.
            //
            for ( int t=0; t<=wid; t++ ) begin
               #2;
               plierp = t;
               candp = 1;
            end

            plierp = 0;
            candp = 0;

         end

         #1000;

         // Make sure each module's output is correct.
         //
         for ( int mut=1; mut<NUM_MULT; mut++ ) begin

            if ( prod[0] !== prod[mut] ) begin

               err_cnt[mut]++;

               if ( err_cnt[mut] < err_limit )
                 $display("Error in %s test %4d:  %x != %x (correct)\n",
                          names[mut], i, prod[mut], prod[0]);
            end

         end

      end

      // Tests completed, report error count for each device.
      //
      for ( int mut=1; mut<NUM_MULT; mut++ ) begin

         $display("Mut %s, %d errors (%.1f%% of tests)\n",
                  names[mut], err_cnt[mut],
                  100.0 * err_cnt[mut]/real'(num_tests) );

      end

      $finish(2);

   end

endmodule

// cadence translate_on