///  LSU EE 4755 -- Fall 2019 -- Digital Design / HDL
///
///  Elaboration, Parameters, and Generate Constructs

 // Under construction.

/// References

// :SV12: IEEE 1800-2012 -- The SystemVerilog Standard
// :SV17: IEEE 1800-2017 -- The SystemVerilog Standard
//        https://ieeexplore.ieee.org/document/8299595/
//        This is for those already familiar with Verilog.
//
// :BV3:  Brown & Vranesic, Fundamentals of Digital Logic with Verilog, 3rd Ed.
//        The text used in LSU EE 2740.


//////////////////////////////////////////////////////////////////////////////
/// Elaboration

 // :Def: Elaboration [of a module]
 // The process of locating instantiated modules, applying parameter
 // values, and running generate expressions. It is roughly analogous
 // to preprocessig in C.

 // :Def: Design Hierarchy
 // Details about the design elaborated from some top-level modules
 // constructed and used by a Verilog tool, such as simulator, synthesis
 // program, etc.
 //
 // Cadence Incisive uses program ncelab for elaboration. (It is
 // automatically called by other tools.)
 //
 // Using Genus, the design hierarchy can be explored in the
 // command-line interface using filesystem-like commands vls and vcd,
 // which, after the initial 'v' is removed, work like their Unix
 // counterparts. Using the Genus GUI the design hierarchy is visible
 // in the Design Browser pane.

 // :Example:
 //
 // Module sat_add is instantiated three times by elab_demo, at two
 // different sizes.
 //
 // After elaborating elab_demo, the design hierarchy will have four
 // modules: elab_demo, two sat_add's at 5 bits, and one at 9 bits.

module sat_add
  #( int width = 8 )
   ( output uwire [width:1] sum,
     input uwire [width:1] a, b );

   localparam logic [width:1] val_max = -1; // Or ( 1 << width ) - 1

   uwire [width:1] raw_sum;
   uwire           carry_out;
   assign         { carry_out, raw_sum } = a + b;
   assign         sum = carry_out ? val_max : raw_sum;

endmodule

module elab_demo
  ( output uwire [4:0] s1, s3,  output uwire [8:0] s2, output uwire [8:0] d,
    input uwire [4:0] a, b,
    input uwire [8:0] c );

   sat_add #(5) a1( s1, a,  b);
   sat_add #(5) a2( s3, a, -b);
   sat_add #(9) a3( s2, {9'b01110} , c);

   assign d = s2 - s1;

endmodule

//////////////////////////////////////////////////////////////////////////////
/// Elaboration-Time Constants

 // :SV12: Section 6.20 Constants

 // :Def: Elaboration-Time Constant
 //  - An object declared with the qualifiers:
 //    :Keywords: parameter,localparam,specparam,genvar
 //  - A parameter declared in a module header (using #).
 //  - An expression consisting only of constants and a subset
 //    of functions.

 // :Example:
 //
 // Consider the saturating adder from above.

module sat_add_from_above
  #( int width = 8 )
  // width is an elaboration-time constant because it is a parameter.

   ( output uwire [width:1] sum,

     // input a IS NOT an elaboration-time constant, even for instantiation a2.
     input uwire [width:1] a,

     input uwire [width:1] b );

   // Var val_max is an elaboration-time constant because it is
   // declared localparam.
   //
   localparam logic [width:1] val_max = -1;

   // The right-hand-side, width - 1, is an elaboration-time constant
   // because it is an expression consisting only of elaboration-time
   // constants (just width in this case), operators (just - in this
   // case), and constant functions (no functions here).
   //
   localparam int wm1 = width - 1;

   uwire [wm1:0] raw_sum;
   uwire           carry_out;
   assign         { carry_out, raw_sum } = a + b;
   assign         sum = carry_out ? val_max : raw_sum;

endmodule



//////////////////////////////////////////////////////////////////////////////
/// Generate Constructs

// :SV12: Chapter 27

 /// :Def: Generate Construct
 //  Verilog statements that are executed during elaboration and
 //  which essentially write new Verilog code.
 //
 /// Important Concepts
 //
 //  Generate statements execute before simulation or synthesis.
 //
 //  Expressions in generate statements can only refer to generate-time
 //    constants (expressions that can be computed in terms of parameters,
 //    literals, and genvars.)

 ///  The Generate Constructs
 //
 //   - Integer object kind for use in generate loops.
 //       :Keyword: genvar
 //        Declares an integer-valued object that can be used as
 //         an iterator in generate loops.
 //        See the "for" construct.
 //
 //   - Loop Generate Constructs
 //       :Keyword: for
 //       :Syntax:  for ( genvar IDENT = INIT; TEST; INCR ) STATEMENT;
 //
 //       Iterator must be of kind genvar.
 //       INIT, TEST, and INCR must be expressions that consist
 //        of genvar's and elaboration-time constants.
 //
 //       STATEMENT can contain assign's and instantiations.
 //       :Sample: for ( genvar i=0; i<9; i++ ) begin assign a[i] = b[w-i]; end
 //
 //   - Conditional Generate Constructs
 //       :Keywords: if,else,case,endcase
 //       if / else
 //       case / endcase

 /// :Practice Problems:
 //
//   2017 Midterm Exam Problem 5b -- Correct the use of generate statements.
//    https://www.ece.lsu.edu/v/2017/mt.pdf
//    https://www.ece.lsu.edu/v/2017/mt_sol.pdf



// :Example:
//
// Use of generate for loop to construct a ripple adder.
//
// First, define a BFA in the usual way.
//
module bfa_structural( output uwire sum, cout, input uwire a, b, cin );
   assign  sum = a ^ b ^ cin;
   assign  cout = a & b | a & cin | b & cin;
endmodule
//
// Here is how a ripple adder is constructed without generate statements:
//
module ripple_4
   ( output uwire [3:0] sum,  output uwire cout,
     input uwire [3:0] a, b,  input uwire cin);

   uwire         c0, c1, c2;

   bfa_structural bfa0( sum[0], c0,   a[0], b[0], cin );
   bfa_structural bfa1( sum[1], c1,   a[1], b[1], c0  );
   bfa_structural bfa2( sum[2], c2,   a[2], b[2], c1  );
   bfa_structural bfa3( sum[3], cout, a[3], b[3], c2  );
   //
   // How tedious!  What if we need a 16-bit ripple adder? Or 64? 128?

endmodule
//
// Use of generate statements to instantiate BFA's ..
// .. to construct 4-bit ripple adder.
//
module ripple_4_elab
  ( output uwire [3:0] sum,  output uwire cout,
    input uwire [3:0] a, b,  input uwire cin);

   uwire          carry[3:-1];
   assign        carry[-1] = cin;
   assign        cout = carry[3];

   for ( genvar i=0; i<4; i++ )
      bfa_structural bfa( sum[i], carry[i], a[i], b[i], carry[i-1] );
   //
   /// Please make sure that you understand what the loop above is doing:
   ///   It is instantiating (creating) four BFA's ..
   ///   .. IT IS NOT using the same BFA four times.

endmodule
//
// Use of generate statements to instantiate BFA's ..
// .. to construct a ripple adder of any size (using parameter w).
//
module ripple_w
  #( int w = 4 )
   ( output uwire [w-1:0] sum,   output uwire cout,
     input uwire [w-1:0] a, b,   input uwire cin);

   uwire           carry[w-1:-1];
   assign        carry[-1] = cin;
   assign        cout = carry[w-1];

   for ( genvar i = 0; i<w; i++ )
     bfa_structural bfa( sum[i], carry[i], a[i], b[i], carry[i-1] );

endmodule


// :Example:
//
// Miscellaneous correct and incorrect usage of generate statements. 
//
module gen_good
  #( int option = 0 )
   ( output uwire logic [7:0] ab, abc,
     output var logic [7:0] vab, vabc,
     input uwire logic [7:0] a, b, c );

   for ( genvar i=0; i<8; i += 2 )
     // This *IS NOT PROCEDURAL* code ... that's why assign is okay.
     begin
        assign ab[i] = a[i];
        assign ab[i+1] = b[i+1];
     end

   `ifdef XXX
   always_comb
     // This is procedural code (because of the always_comb) ...
     // ... that's why assign is *NOT OKAY*.
     for ( int i=0; i<8; i+=2 ) begin
        assign ab[i] = a[i];
        assign ab[i+1] = b[i+1];
     end
   `endif

   always_comb
     // This is procedural code.
     for ( int i=0; i<8; i+=2 ) begin
        vab[i] = a[i];
        vab[i+1] = b[i+1];
     end

   for ( genvar i=0; i<8; i++ )
     begin:myblock
        uwire aab;
        and a1(aab,a[i],b[i]);
        if ( option == 0 )
          or  o1(abc[i],aab,c[i]); // Instantiation okay because in genblock.
        else
          xor o1(abc[i],aab,c[i]);
     end

endmodule
//
module gen_bad
   ( output uwire [7:0] ab,abc,
     input uwire option,
     input uwire [7:0] a, b, c );

   for ( genvar i=0; i<8; i += 2 )
     begin
        assign ab[i] = a[i];
        assign ab[i+1] = b[i+1];
     end

   `ifdef SHOW_BAD_CODE
   for ( genvar i=0; i<8; i++ )
     begin:myblock
        uwire aab;
        and a1(aab,a[i],b[i]);
        if ( option == 0 )  /// Bad, because option not constant.
          or  o1(abc[i],aab,c[i]);
        else
          xor x1(abc[i],aab,c[i]);
     end
    `endif

   for ( genvar i=0; i<8; i++ )
     begin:myotherblock
        uwire aab, ior, eor;
        and a1(aab,a[i],b[i]);
        //  if ( option == 0 )  /// Bad, because option not constant.
          or  o1(ior,aab,c[i]);
                //  else
          xor x1(eor,aab,c[i]);
        assign abc[i] = option == 0 ? ior : eor;
     end

endmodule


//////////////////////////////////////////////////////////////////////////////
/// Using Generate Constructs

 /// Recursive Definition of a Module
//
//   Sometimes it's easier to describe something recursively.
//   Sometimes it's not.
//
 /// General Idea
//
//   A module instantiates one or more smaller modules ..
//   .. splits work between these smaller modules ..
//   .. and combines their outputs.

 /// :Practice Problems:
//
//   2016 Homework 4 -- A Table Lookup
//    https://www.ece.lsu.edu/v/2016/hw04.pdf
//    https://www.ece.lsu.edu/v/2016/hw04.v.html
//    https://www.ece.lsu.edu/v/2016/hw04_sol.pdf
//    https://www.ece.lsu.edu/v/2016/hw04-sol.v.html
//
//   2017 Midterm Exam Problem 2 -- Recursively Described Multiplexor
//    https://www.ece.lsu.edu/v/2017/mt.pdf
//    https://www.ece.lsu.edu/v/2017/mt_sol.pdf
//
//   2014 Homework 2 -- Integer Multiplier
//    https://www.ece.lsu.edu/v/2014/hw02.pdf
//    https://www.ece.lsu.edu/v/2014/hw02.v.html
//    https://www.ece.lsu.edu/v/2014/hw02-sol.v.html
//
//   2018 Homework 5 -- Batcher Odd/Even Sorting Network
//    https://www.ece.lsu.edu/v/2018/hw05.pdf
//    https://www.ece.lsu.edu/v/2018/hw05.v.html
//    https://www.ece.lsu.edu/v/2018/hw05-sol.v.html

// :Example:
//
// Recursive description of a ripple adder.
// Here, recursion may not be worth the trouble ..
// .. see ripple_w above which avoids recursion.
//
module ripple_w_r
  #( int w = 16 )
   ( output uwire [w-1:0] sum,   output uwire cout,
     input uwire [w-1:0] a, b,   input uwire cin);

   uwire c;

   // Instantiate a BFA to handle least-significant bit.
   //
   bfa_structural bfa( sum[0], c, a[0], b[0], cin );

   if ( w == 1 )
     // If just one bit, we're done.
     //
     assign cout = c;
   else
     // Recursively instantiate this module to handle remaining bits.
     //
     ripple_w_r #(w-1) r(sum[w-1:1], cout, a[w-1:1], b[w-1:1], c);

endmodule



//////////////////////////////////////////////////////////////////////////////
/// Elaboration of Tree Structures
//
 /// Typical Steps: (See simple_tree, below)
//
//  - Start with a module that has some kind of size parameter.
//  - Split work (usually inputs) into two parts, call the parts lo and hi.
//  - Recursively instantiate two copies of the module ..
//    .. connecting lo to one module and hi to the other module ..
//    .. making sure that at least one size parameter 
//    .. is smaller that its value in the enclosing module.
//  - Combine the outputs of the two modules.
//  - Provide a non-recursive case for when size is minimum ..
//    .. this might be called the termination condition.
//
module simple_tree
  #( int n = 16, int w = 32 )
   ( output uwire [w-1:0] x, input uwire [w-1:0] a[n] );

   // Note: n is the size parameter.

   if ( n > 1 ) begin

      // Recursive case.

      uwire [w-1:0] xlo, xhi;

      // Work, a, is split into a[n/2-1:0] and a[n-1:n/2}.

      // Recursive instantiations.
      //
      simple_tree #(n/2,w)   st_lo( xlo, a[0:n/2-1] );
      simple_tree #(n-n/2,w) st_hi( xhi, a[n/2:n-1] );
      //
      // n/2 is smaller than n (since n > 1)
      // n-n/2 is also smaller than n (since n > 1)

      // Combine outputs.
      assign x = xlo + xhi;
      //
      // The combination here is simple.

   end else begin

      // Non-recursive case for minimum size, n == 1
      //
      assign x = a[0];

   end

endmodule


// :Example:
//
// Find the minimum of n numbers. Use generate constructs to
// describe linear- and tree-structured modules.

module min_2
  #( int w = 4 )
   ( output uwire [w-1:0] e_min,
     input  uwire [w-1:0] e_0, e_1 );

   assign e_min = e_0 < e_1 ? e_0 : e_1;

endmodule

 // 

module min_n
  #( int w = 4,
     int n = 8 )
   ( output uwire [w-1:0] e_min,
     input  uwire [w-1:0] e[n] );

   uwire [w-1:0] im[n];

   assign im[0] = e[0];

   for ( genvar i = 1; i < n; i++ )
     min_2 #(w) m( im[i], e[i], im[i-1] );

   assign e_min = im[n-1];

endmodule



module min_t_live
  #( int w = 4,
     int n = 8 )
   ( output uwire [w-1:0] e_min,
     input uwire [w-1:0] e [ n-1:0 ] );

   if ( n == 2 ) begin

      min_2 #(w) m2(e_min, e[1], e[0] );

   end else begin

      uwire [w-1:0] e_min_lo, e_min_hi;
      localparam int nlo = n/2;
      localparam int nhi = n - nlo;

      min_t_live #(w,nlo) mtlo(e_min_lo, e[nlo-1:0] );
      min_t_live #(w,nhi) mthi(e_min_hi, e[n-1:nlo] );
      min_2 #(w) m2(e_min, e_min_lo, e_min_hi);

   end

endmodule

 // 

module min_t
  #( int w = 4,
     int n = 8 )
   ( output uwire [w-1:0] e_min,
     input uwire [w-1:0] e [ n-1:0 ] );

   if ( n == 1 ) begin

      assign e_min = e[0];

   end else begin

      localparam int n_lo = n / 2;
      localparam int n_hi = n - n_lo;

      uwire [w-1:0] m_lo, m_hi;

      min_t #(w,n_lo) mlo( m_lo, e[n_lo-1:0] );
      min_t #(w,n_hi) mhi( m_hi, e[n-1:n_lo] );

      min_2 #(w) m2( e_min, m_lo, m_hi);

   end

endmodule

 // 

 // 

 // 

 // 


module pop
  #( int w = 16, int wl = $clog2(w) )
   ( output logic [wl-1:0] p,
     input uwire [w-1:0] a );

   always_comb begin
      p=0;
      for ( int i=0; i<w; i++ ) p += a[i];
   end

endmodule

module pop_t
  #( int w = 16,
     int wl = $clog2(w) )
   ( output uwire [wl-1:0] p,
     input uwire [w-1:0] a );

   if ( w == 1 ) begin

      assign p = a[0];

   end else begin
      localparam int wlo = w/2, whi = w-wlo;
      pop_t #(wlo) plo(p_lo,a[wlo-1:0]);
      pop_t #(whi) phi(p_hi,a[w-1:wlo]);
      assign p = p_lo + p_hi;
      
   end
   

endmodule










   


// :Example:
//
// Implement a population count module using recursion.
//
// Note: The population of a bit vector (which might hold an integer)
// is the number of bits equal to 1. For example, the population of
// 8'b00010100 is 2 and the population of 8'b10010011 is 4.

// Live module, not yet finished. (20 September 2018, 12:33:26 CDT)
module pop
  #( int w = 5,
     int pw = $clog2(w) )
   (output var logic [pw-1:0] p,
    input uwire [w-1:0] a );

   if ( w == 1 ) begin

      assign p = a[0];

   end else begin
     
   uwire [pw-1:0] p_lo, p_hi;
   localparam int wlo = w / 2;
   localparam int whi = w - wlo;
   pop #(wlo,pw) plo(p_lo, a[wlo-1:0]);
   pop #(whi,pw) phi(p_hi, a[w-1:wlo]);
   assign p = p_lo + p_hi;
      end
   `ifdef justincaseineeditlater
   always_comb begin

      p = 0;
      for ( int i=0; i<w; i++ ) p += a[i];

   end
   `endif

endmodule








// A population count module using procedural code.
//
module pop_1
  (output logic [4:0] p,
   input uwire [15:0] v );

   always_comb begin
      p = 0;
      for ( int i=0; i<16; i++ ) p += v[i];
   end

endmodule

// A population count module using recursion to describe a tree
// structure.
//
module pop_n
  #( int bits = 16 )
   ( output uwire [4:0] p,
     input uwire [bits-1:0] v );

   if ( bits == 1 ) begin

      assign p = v[0];

   end else begin

      uwire [4:0]         p1, p2;

      // Split work in half between two modules.
      //
      pop_n #(bits/2)      pi1( p1, v[bits/2-1:0] );
      pop_n #(bits-bits/2) pi2( p2, v[bits-1:bits/2] );

      // And then combine the results of each.
      //
      assign p = p1 + p2;

   end

endmodule