////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2019 Homework 2 -- SOLUTION
//

 /// Assignment  https://www.ece.lsu.edu/koppel/v/2019/hw02.pdf
 /// Solution Problem 2:  https://www.ece.lsu.edu/koppel/v/2019/hw02_sol.pdf


`default_nettype none

//////////////////////////////////////////////////////////////////////////////
///  Problem 1
//
 ///    Complete clz_tree so that it computes the clz of its input recursively.
//
//     [✔] Split the input between two recursive instantiations ..
//         .. and properly combine the results.
//     [✔] Don't forget the terminal case, maybe for w == 1.
//     [✔] For maximum credit, avoid any use of adders ..
//         .. by making the width of the "hi" module a power of 2.
//
//     [✔] Make sure that port connections are the correct size ..
//         .. mismatched ports are Verilog errors in this assignment.
//     [✔] Do not make port widths larger than needed.
//     [✔] Make sure that the testbench does not report errors.
//     [✔] Module must be synthesizable. Use command: genus -files syn.tcl
//
//     [✔] As always, avoid costly, slow, and confusing code.

 /// Solution Discussion
//
// Two solutions appear below.
//
// The clz_tree_fat module is much simpler, but it uses an adder.
// Two points would be deducted if this were given as a solution
//
// The clz_tree module does not use an adder. It would get full credit.


 /// SOLUTION -- With Adder. Two points would be deducted.
module clz_tree_fat
  #( int w = 19,
     int ww = $clog2(w+1) )
   ( output uwire [ww:1] nlz,
     input uwire [w:1] a );

   if ( w == 1 ) begin

      assign nlz = ~ a;

   end else begin

      localparam int wlo = w/2;
      localparam int whi = w - wlo;
      localparam int wwlo = $clog2(wlo+1);
      localparam int wwhi = $clog2(whi+1);

      uwire [wwlo:1] lz_lo;
      uwire [wwhi:1] lz_hi;

      clz_tree_fat #(wlo) clo( lz_lo, a[wlo:1]   );
      clz_tree_fat #(whi) chi( lz_hi, a[w:wlo+1] );

      assign nlz = lz_hi < whi ? lz_hi : whi + lz_lo;

   end

endmodule

 /// SOLUTION -- Without Adder
module clz_tree
  #( int w = 19,
     int ww = $clog2(w+1) )
   ( output uwire [ww-1:0] nlz,
     input uwire [w-1:0] a );

   if ( w == 1 ) begin

      assign nlz = !a[0];

   end else if ( w == 2 ) begin

      assign nlz = { !a[0] && !a[1], !a[1] && a[0] };

   end else begin

      // Set whi to the largest power of 2 strictly less than w.
      //
      localparam int lhi = $clog2(w) - 1;
      localparam int whi = 1 << lhi;
      localparam int wwhi = lhi + 1;

      // Then set wlo to the number of remaining bits.
      //
      localparam int wlo = w - whi;
      localparam int wwlo = $clog2(wlo+1);

      uwire [wwlo-1:0] nlz_lo;
      uwire [wwhi-1:0] nlz_hi;

      // Instantiate recursive modules.
      //
      clz_tree #(wlo) clo( nlz_lo, a[wlo-1:0] );
      clz_tree #(whi) chi( nlz_hi, a[w-1:wlo] );

      // Split the nlz_lo and nlz_hi outputs into "overflow" bits,
      // ov_lo and ov_hi, and the remaining bits lz_lo and lz_hi.
      // There is no overflow bit if the size is not a power of two.
      // Because whi is always a power of two it will always have an
      // overflow bit.
      //
      uwire ov_lo, ov_hi;
      uwire [lhi-1:0] lz_lo, lz_hi;

      // The hi bits are easy because there's always an overflow.
      assign {ov_hi,lz_hi} = nlz_hi;

      if ( wlo == whi ) assign {ov_lo,lz_lo} = nlz_lo;
      else              assign lz_lo = nlz_lo;

      // If the high bits are all zeros, use the count from the lo
      // bits and then further below effectively add whi to nlz.
      //
      assign nlz[lhi-1:0] = ov_hi ? lz_lo : lz_hi;

      // Next, add on the overflow bits without using an adder. 
      //
      // The first case, wlo==whi, is for when both wlo and whi (and
      // w) are all powers of two. In that case we must compute two
      // more bits of nlz. If ov_hi is zero they are both zero because
      // the number of leading zeros will be strictly less than whi.
      // If ov_hi is one then the high two bits will be 1 + ov_lo.
      //
      // In the second case, (the else which means whi>wlo), there is
      // no ov_lo bit. So the most significant bit is 0 if ov_hi is
      // zero and 1 if ov_hi is one.

      if ( wlo == whi )
        assign nlz[lhi+1:lhi] = ov_hi ? { ov_lo, !ov_lo } : 2'b0;
      else
        assign nlz[lhi] = ov_hi;

   end

endmodule


 /// A Behavioral CLZ Description
module clz
  #( int w = 19,
     int ww = $clog2(w+1) )
   ( output var logic [ww-1:0] nlz,
     input uwire logic [w-1:0] a );

   uwire [w:0] aa = { a, 1'b1 };
   always_comb for ( int i=0; i<=w; i++ ) if ( aa[i] ) nlz = w-i;

endmodule

//////////////////////////////////////////////////////////////////////////////
/// Testbench Code

// cadence translate_off

module testbench;

   // The widths (values of w) at which the modules will be instantiated.
   //
   localparam int widths[] = { 1, 2, 5, 8, 13, 15, 17 };

   //  localparam int nw = widths.size();
   localparam int nw = 7; // Cadence, please fix this.
   initial if ( nw != widths.size() )
     $fatal(1,"Constant nw should be %0d.\n",widths.size() );

   int t_errs;     // Total number of errors.
   initial t_errs = 0;
   final $write("Total number of errors: %0d\n",t_errs);

   uwire d[nw:-1];    // Start / Done signals.
   assign d[-1] = 1;  // Initialize first at true.

   // Instantiate a testbench at each size.
   //
   for ( genvar i=0; i<nw; i++ )
     testbench_n #(widths[i]) t2( .done(d[i]), .start(d[i-1]) );

endmodule

module testbench_n
  #( int w = 20 )
   ( output logic done, input uwire start );

   localparam int ww = $clog2(w+1);

   localparam int n_tests = w * 10;

   uwire [ww:1] nlz;
   logic [w-1:0] a;
   clz_tree_fat #(w) c0(nlz,a);
   //  clz #(w) c0(nlz,a);

   initial begin

      automatic int n_errs = 0;

      wait( start );

      $write("** Starting tests for width %0d.\n",w);

      for ( int t=0; t<n_tests; t++ ) begin

         automatic int lz = {t} % ( w + 1 );
         a = { 1'b1, (w)'({$random}) } >> ( lz + 1 );

         #1;

         if ( nlz !== lz ) begin
            n_errs++; testbench.t_errs++;
            if ( testbench.t_errs < 5 || n_errs < 2 )
              $write("Error for width %2d: input %h:  %d != %0d (correct).\n",
                     w, a, nlz, lz);
         end

      end

      $write("Width %0d, done with %0d tests, %0d errors.\n",w,n_tests,n_errs);

      done = 1;

   end

endmodule

// cadence translate_on