////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2024 Homework 1 -- SOLUTION
//

 /// Assignment  https://www.ece.lsu.edu/koppel/v/2024/hw01.pdf

`default_nettype none

//////////////////////////////////////////////////////////////////////////////
///  Problem 1
//
  ///  Complete the dot product modules as described below.
//
//     [✔] Complete dot2.
//     [✔] Complete dot3.
//     [✔] Complete dot4.
//     [✔] None of your modules can use assign statements or procedural code.
//
//     [✔] Make sure that the testbench does not report errors.
//     [✔] Module must be synthesizable. Use command: genus -files syn.tcl
//
//     [✔] Don't assume any particular parameter values.
//
//     [✔] Code must be written clearly.


module mult
  #( int w = 5 ) ( output uwire [w-1:0] p, input uwire [w-1:0] a, b );
   // Do not modify this module.
   assign p = a * b;
endmodule

module add
  #( int w = 5 ) ( output uwire [w-1:0] s, input uwire [w-1:0] a, b );
   // Do not modify this module.
   assign s = a + b;
endmodule


module dot2
  #( int w = 5 )
   ( output uwire [w-1:0] dp,
     input uwire [w-1:0] a[1:0], b[1:0] );

   // Compute
   //
   //   dp = a[0] * b[0] + a[1] * b[1];
   //
   // [✔] Use as many instantiations of mult and add as are needed, if any.
   // [✔] Try to minimize the number of instantiations.
   // [✔] DO NOT use assign, and DO NOT use procedural code (always, etc.).
   //
   // It will also be necessary to declare uwire objects to interconnect
   // the instantiations.

   /// SOLUTION

   uwire [w-1:0] p0, p1;

   mult #(w) m0(p0, a[0], b[0] );
   mult #(w) m1(p1, a[1], b[1] );
   add #(w) ad(dp, p0, p1 );

endmodule

module dot3
  #( int w = 5 )
   ( output uwire [w-1:0] dp,
     input uwire [w-1:0] a[2:0], b[2:0] );

   // Compute
   //
   //   dp = a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
   //
   // [✔] Use at least one instantiation of dot2.
   // [✔] Use as many instantiations of mult and add as are needed, if any.
   // [✔] Try to minimize the number of instantiations.
   // [✔] DO NOT use assign, and DO NOT use procedural code (always, etc.).

   /// SOLUTION
   uwire [w-1:0] p0, p2;

   dot2 #(w) d0( p0, a[1:0], b[1:0] );
   mult #(w) m2( p2, a[2], b[2] );
   add #(w) a2(dp, p0, p2 );

endmodule

module dot4
  #( int w = 5 )
   ( output uwire [w-1:0] dp,
     input uwire [w-1:0] a[3:0], b[3:0] );

   // Compute
   //
   //   dp = a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
   //
   // [✔] Use as many instantiations of dot2 and dot3 as are needed, if any.
   // [✔] Use as many instantiations of mult and add as are needed, if any.
   // [✔] Try to minimize the number of instantiations.
   // [✔] Try to minimize the time to compute the operation based on the
   //     longest path from inputs to outputs.
   // [✔] DO NOT use assign, and DO NOT use procedural code (always, etc.).

   /// SOLUTION
   uwire [w-1:0] p0, p1;

   dot2 #(w) d0( p0, a[1:0], b[1:0] );
   dot2 #(w) d1( p1, a[3:2], b[3:2] );
   add #(w) ad(dp, p0, p1 );

endmodule



//////////////////////////////////////////////////////////////////////////////
///  Problem 2
//
  ///  Complete the MADD Dot Product Modules
 ///
//
//     [✔] Complete dot2m.
//     [✔] Complete dot4m.
//     [✔] Complete dot6m.
//
//     [✔] Make sure that the testbench does not report errors.
//     [✔] Module must be synthesizable. Use command: genus -files syn.tcl
//
//     [✔] Don't assume any particular parameter values.
//
//     [✔] Code must be written clearly.


module madd
  #( int w = 8 )
   ( output uwire [w-1:0] s,  input uwire [w-1:0] si, a, b );
   // Do not modify this module.
   assign s = si + a * b;  // Perform a Multiply Add Operation.
endmodule

module dot2m
  #( int w = 5 )
   ( output uwire [w-1:0] dp,
     input uwire [w-1:0] si, a[1:0], b[1:0] );

   // Compute
   //
   //   dp = si + a[0] * b[0] + a[1] * b[1];
   //
   // [✔] Use as many instantiations of madd as are needed, if any.
   // [✔] Try to minimize the number of instantiations.
   // [✔] DO NOT use assign, and DO NOT use procedural code (always, etc.).

   /// SOLUTION
   uwire [w-1:0] p0;

   madd #(w) m0(p0, si, a[0], b[0] );
   madd #(w) m1(dp, p0, a[1], b[1] );

endmodule


module dot4m
    #( int w = 6 )
   ( output uwire [w-1:0] dp,
     input uwire [w-1:0] si, a[3:0], b[3:0] );

   // Compute
   //
   //   dp = si + a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
   //
   // [✔] Use as many instantiations of dot2m and madd as are needed, if any.
   // [✔] Try to minimize the number of instantiations.
   // [✔] DO NOT use assign, and DO NOT use procedural code (always, etc.).

   /// SOLUTION
   uwire [w-1:0] p0;

   dot2m #(w) d0( p0, si, a[1:0], b[1:0] );
   dot2m #(w) d1( dp, p0, a[3:2], b[3:2] );

endmodule

module dot6m
    #( int w = 7 )
   ( output uwire [w-1:0] dp,
     input uwire [w-1:0] si, a[5:0], b[5:0] );

   // Compute
   //
   //   dp = si+a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3]+a[4]*b[4]+a[5]*b[5];
   // Using the minimum number of instantiations of:
   //   dot2m, dot4m, madd.
   //
   // [✔] Use as many instantiations of dot2m,dot4m, and madd as needed, if any.
   // [✔] Try to minimize the number of instantiations.
   // [✔] DO NOT use assign, and DO NOT use procedural code (always, etc.).

   /// SOLUTION
   uwire [w-1:0] p0;

   dot4m #(w) d0( p0, si, a[3:0], b[3:0] );
   dot2m #(w) d1( dp, p0, a[5:4], b[5:4] );

endmodule




//////////////////////////////////////////////////////////////////////////////
///  Problem 3
//
  ///  Complete  hybrid sizes
 ///
//
//     [✔] Complete dot2y.
//     [✔] Complete dot4y.
//
//     [✔] Make sure that the testbench does not report errors.
//     [✔] Module must be synthesizable. Use command: genus -files syn.tcl
//
//     [✔] Don't assume any particular parameter values for wa and wb.
//
//     [✔] Code must be written clearly.




module multy
  #( int wa = 1, wb = 2, wo = wa + wb )
   ( output uwire [wo-1:0] p,
     input uwire [wa-1:0] a,
     input uwire [wb-1:0] b );

   /// SOLUTION
   //
   //  Each port has its own size!

   // [✔] Modify the connections to this module.  (The stuff above this line.)

   assign p = a * b;

endmodule

module addy
  #( int wi = 1, wo = wi + 1 )
   ( output uwire [wo-1:0] s,
     input uwire [wi-1:0] a, b );

   /// SOLUTION
   //
   //  The output can be a different size, wo, than the input, wi.

   // [✔] Modify the connections to this module. (The stuff above this line.)

   assign s = a + b;

endmodule


module dot2y
  #( int wa = 5, wb = 6, wo = wa + wb + 1 )
   ( output uwire [wo-1:0] dp,
     input uwire [wa-1:0] a[1:0],
     input uwire [wb-1:0] b[1:0] );

   // Compute
   //
   //   dp = a[0] * b[0] + a[1] * b[1];
   //
   // Note: Product of a wa-bit and wb-bit unsigned integer needs wa+wb bits.
   //       Sum of a wa-bit and wb-bit integer needs max(wa,wb)+1 bits.
   //
   // [✔] Modify addy and multy so that in can be instantiated ..
   //     .. with ports set to the needed size.
   // [✔] Do not make the ports larger than are needed.
   // [✔] Use as many instantiations of multy and addy as are needed, if any.
   // [✔] DO NOT use assign, and DO NOT use procedural code (always, etc.).

   /// SOLUTION


   // Compute the size needed to hold the product a[i] * b[i].
   //
   localparam int wr = wa + wb;
   //
   // Use that size for the dot product terms.
   //
   uwire [wr-1:0] p0, p1;

   multy #(wa,wb,wr) m0( p0, a[0], b[0] );
   multy #(wa,wb,wr) m1( p1, a[1], b[1] );
   addy #(wr,wr+1) ad( dp, p0, p1 );

endmodule


module dot4y
  #( int wa = 5, wb = wa, wo = wa + wb + 2)
   ( output uwire [wo-1:0] dp,
     input uwire [wa-1:0] a[3:0],
     input uwire [wb-1:0] b[3:0] );

   // Compute
   //
   //   dp = a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
   //
   //   addy, multy, dot2y
   // Note: Product of a wa-bit and wb-bit unsigned integer needs wa+wb bits.
   //       Sum of a wa-bit and wb-bit integer needs max(wa,wb)+1 bits.
   //
   // [✔] Use as many instantiations of multy,addy, and dot2y as needed, if any.
   // [✔] DO NOT create a new version of multy or addy just for dot4y.
   // [✔] Do not make the ports larger than are needed.
   // [✔] DO NOT use assign, and DO NOT use procedural code (always, etc.).

   /// SOLUTION
   //
   // Compute the size needed to hold the dot2 results.
   //
   localparam int wr = wa + wb + 1;

   uwire [wr-1:0] p0, p1;

   dot2y #(wa,wb) d0( p0, a[1:0], b[1:0] );
   dot2y #(wa,wb) d1( p1, a[3:2], b[3:2] );
   addy #(wr,wo) ad(dp, p0, p1 );

endmodule



//////////////////////////////////////////////////////////////////////////////
/// Testbench Code
//
// It is okay to modify the testbench code to facilitate the coding
// and debugging of your modules. Keep in mind that your submission
// will be tested using a different testbench, so on the one hand no
// one will be accused of dishonesty for modifying the testbench
// below. However be sure to restore any changes to make sure that
// your code passes the original testbench.


// cadence translate_off


module testbench;

   localparam int npsets = 6; // This MUST be set to the size of pset.
   // { variation wa wb delta }
   localparam int pset[npsets][4] =
              '{
                { 0, 2, 0, 0 },
                { 0, 8, 0, 0 },
                { 1, 3, 0, 0 },
                { 1, 7, 0, 0 },
                { 2, 5, 3, 0 },
                { 2, 8, 4, 0 }
                };

   logic d[npsets:-1]; // Start / Done signals.

   int t_errs[npsets];
   int t_errs_mod[string];
   int t_n_tests_mod[string];
   int t_n_tests[npsets];
   string sname[] = { "Prob 1", "Prob 2", "Prob 3" };

   initial begin
      for ( int i=0; i<npsets; i++ ) begin
         t_errs[i] = 0;
         t_n_tests[i] = 0;
      end
      d[-1] = 1;

      wait( d[npsets-1] );
      $write("\n");
      for ( int p=0; p<0; p++ ) begin
         automatic string wline =
           pset[p][0] < 2 ? $sformatf("w=%0d", pset[p][1]) :
             $sformatf("wa=%0d,wb=%0d", pset[p][1],pset[p][2]);
        $write("End of tests %s, %s: %0d errors out of %0d tests.\n",
               sname[ pset[p][0] ], wline, t_errs[p], t_n_tests[p]);
      end

      foreach ( t_errs_mod[mname] ) begin
         $write("End of tests. For %s:  %0d errors out of %0d tests.\n",
                mname, t_errs_mod[mname], t_n_tests_mod[mname] );
      end

   end

   for ( genvar p=0; p<npsets; p++ ) begin
      testbench_n #( .idx(p), .variation(pset[p][0]), .wap(pset[p][1]), .wbp(pset[p][2]), .wo_delta(p) )
      tb( .done(d[p]), .tstart(d[p-1]) );
   end

endmodule

module testbench_n
  #( int idx, variation = 0, wap = 2, wbp = 3, wo_delta = 1 )
   ( output logic done, input uwire tstart );

   localparam bit normal = variation == 0;
   localparam bit ripple = variation == 1;
   localparam bit hybrid = variation == 2;

   localparam int wa = wap;
   localparam int wb = hybrid ? wbp : wap;
   localparam int wo = hybrid ? wa + wb + 2 : wap;

   localparam int n_tests = 1000;
   localparam int ms = 7; // Maximum array size

   logic [wa-1:0] a[ms-1:0];
   logic [wb-1:0] b[ms-1:0];
   uwire [wo-1:0] dp[ms];
   logic [wo-1:0] z;

   int sizes[$];
   string npre, npost;

   localparam int wo2 = wa + wb + 1;

   if ( ripple ) begin
      dot2m #(wa) d2( dp[2], z, a[1:0], b[1:0] );
      dot4m #(wa) d4( dp[4], z, a[3:0], b[3:0] );
      dot6m #(wa) d6( dp[6], z, a[5:0], b[5:0] );
      initial begin sizes = { 2, 4, 6 }; npre = "dot"; npost = "m"; end
   end else if ( normal ) begin
      dot2 #(wa) d2( dp[2], a[1:0], b[1:0] );
      dot3 #(wa) d3( dp[3], a[2:0], b[2:0] );
      dot4 #(wa) d4( dp[4], a[3:0], b[3:0] );
      initial begin sizes = { 2, 3, 4 }; npre = "dot"; npost = ""; end
   end else begin
      uwire [wo2-1:0] dp2o;
      dot2y #(wa,wb) d2( dp2o, a[1:0], b[1:0] );
      assign dp[2] = { 1'b0, dp2o };
      dot4y #(wa,wb) d4( dp[4], a[3:0], b[3:0] );
      initial begin sizes = { 2, 4 }; npre = "dot"; npost = "y"; end
   end

   string names[$];

   initial begin

      automatic int n_err_dp[ms] = '{ms{0}};
      automatic string wline = hybrid
        ? $sformatf("wa=%0d,wb=%0d", wa, wb) : $sformatf("w=%0d", wa);

      wait( tstart );

      $write("Starting tests for %s %s.\n", testbench.sname[variation], wline);

      foreach ( sizes[sidx] )
        names.push_back( $sformatf("%s%0d%s",npre,sizes[sidx],npost) );

      foreach ( sizes[sidx] ) begin

         automatic int sz = sizes[sidx];

         for ( int t=0; t<n_tests; t++ ) begin

            automatic logic [wo-1:0] shadow_dp[ms-1:-1];
            z = {$random};
            shadow_dp[0] = ripple ? z : 0;

            for ( int i=0; i<=sz; i++ ) begin
               automatic int bits = wa + wb + $clog2(i+1);
               // Test Patterns. Okay to Modify.
               case (t)
                 0: begin a[i]=0; b[i]=0; end
                 1: begin a[i]=0; b[i]=1; end
                 2: begin a[i]=1; b[i]=0; end
                 3: begin a[i]=1; b[i]=1; end
                 4: begin a[i]=1; b[i]=i; end
                 5: begin a[i]=1; b[i]=(1<<wb)-1; end
                 6: begin a[i]=(1<<wa)-1; b[i]=1; end
                 default: begin a[i] = {$random}; b[i] = {$random}; end
               endcase
               shadow_dp[i+1] = ( shadow_dp[i] + a[i] * b[i] ) % 2**bits;
            end

            #1;

            if ( shadow_dp[sz] !== dp[sz] ) begin

               n_err_dp[sz]++;
               if ( n_err_dp[sz] < 6 ) begin
                  $write( "Error, %s, %s, %0d != %0d (correct)\n",
                          names[sidx], wline, dp[sz], shadow_dp[sz] );
                  for ( int j=0; j<sz; j++ )
                    $write(" %0d * %0d%s",
                           a[j], b[j], j == sz-1 ? "\n" : " +");
                  for ( int j=0; j<sz; j++ )
                    $write(" 0x%0x * 0x%0x%s",
                           a[j], b[j], j == sz-1 ? "\n" : " +");
               end
            end
         end
      end

      testbench.t_errs[idx] = 0;
      $write("Tests complete %s %s: ",
             testbench.sname[variation], wline);

      foreach ( sizes[sidx] ) begin

         automatic int sz = sizes[sidx];
         testbench.t_errs_mod[names[sidx]] += n_err_dp[sz];
         testbench.t_n_tests_mod[names[sidx]] += n_tests;

         testbench.t_errs[idx] += n_err_dp[sz];
         testbench.t_n_tests[idx] += n_tests;
         $write("%s %0d errors, ", names[sidx],n_err_dp[sidx]);
      end
      $write("\n");



      done = 1;
   end

endmodule

// cadence translate_on