////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2019 Homework 2 -- SOLUTION
//

 /// Assignment  https://www.ece.lsu.edu/koppel/v/2019/hw02.pdf
 /// Solution Problem 2:  https://www.ece.lsu.edu/koppel/v/2019/hw02_sol.pdf


`default_nettype none

//////////////////////////////////////////////////////////////////////////////
///  Problem 1
//
 ///    Complete clz_tree so that it computes the clz of its input recursively.
//
//     [✔] Split the input between two recursive instantiations ..
//         .. and properly combine the results.
//     [✔] Don't forget the terminal case, maybe for w == 1.
//     [✔] For maximum credit, avoid any use of adders ..
//         .. by making the width of the "hi" module a power of 2.
//
//     [✔] Make sure that port connections are the correct size ..
//         .. mismatched ports are Verilog errors in this assignment.
//     [✔] Do not make port widths larger than needed.
//     [✔] Make sure that the testbench does not report errors.
//     [✔] Module must be synthesizable. Use command: genus -files syn.tcl
//
//     [✔] As always, avoid costly, slow, and confusing code.

 /// Solution Discussion
//
// Two solutions appear below.
//
// The clz_tree_fat module is much simpler, but it uses an adder.
// Two points would be deducted if this were given as a solution
//
// The clz_tree module does not use an adder. It would get full credit.


 /// SOLUTION -- With Adder. Two points would be deducted.
module clz_tree_fat
  #( int w = 19,
     int ww = $clog2(w+1) )
   ( output uwire [ww:1] nlz,
     input uwire [w:1] a );

   if ( w == 1 ) begin

      assign nlz = ~ a;

   end else begin

      localparam int wlo = w/2;
      localparam int whi = w - wlo;
      localparam int wwlo = $clog2(wlo+1);
      localparam int wwhi = $clog2(whi+1);

      uwire [wwlo:1] lz_lo;
      uwire [wwhi:1] lz_hi;

      clz_tree_fat #(wlo) clo( lz_lo, a[wlo:1]   );
      clz_tree_fat #(whi) chi( lz_hi, a[w:wlo+1] );

      assign nlz = lz_hi < whi ? lz_hi : whi + lz_lo;

   end

endmodule


 /// SOLUTION -- Without Adder
module clz_tree
  #( int w = 19,
     int ww = $clog2(w+1) )
   ( output uwire [ww-1:0] nlz,
     input uwire [w-1:0] a );

   if ( w == 1 ) begin

      assign nlz = !a[0];

   end else if ( w == 2 ) begin

      assign nlz = { !a[0] && !a[1], !a[1] && a[0] };

   end else begin

      // Set whi to the largest power of 2 strictly less than w.
      //
      localparam int lhi = $clog2(w) - 1;
      localparam int whi = 1 << lhi;
      localparam int wwhi = lhi + 1;

      // Then set wlo to the number of remaining bits.
      //
      localparam int wlo = w - whi;

      uwire [wwhi-1:0] nlz_lo, nlz_hi;
      // Note: nlz_lo may be one bit wider than needed.

      // Instantiate recursive modules.
      //
      clz_tree #(wlo,wwhi) clz_lo( nlz_lo, a[wlo-1:0] );
      clz_tree #(whi,wwhi) clz_hi( nlz_hi, a[w-1:wlo] );

      // Split the nlz_lo and nlz_hi outputs into "overflow" (MSB) bits,
      // ov_lo and ov_hi, and the remaining bits lz_lo and lz_hi.
      //
      uwire ov_lo, ov_hi;
      uwire [lhi-1:0] lz_lo, lz_hi;
      //
      assign { ov_lo, lz_lo } = nlz_lo;
      assign { ov_hi, lz_hi } = nlz_hi;

      // Assemble nlz in one of three ways (Case 0, Case 1, Case 1)
      //
      assign nlz = !ov_hi ? { 2'b00, lz_hi } :  // Case 0
                   !ov_lo ? { 2'b01, lz_lo } :  // Case 1
                            { 2'b10, lz_lo };   // Case 2
      //
      // Case 0:
      //   Input to clz_hi has a 1, so just use nlz_hi.
      //   This case occurs when the MSB of nlz_hi is 0.
      //   For this case just set nlz to nlz_hi.
      //
      // Case 1:
      //   Input to clz_hi is all zeros, and  wlo < whi  or  nlz_lo < whi.
      //   For this case set nlz = whi + nlz_lo = { 2'b01, lz_lo }.
      //
      // Case 2:
      //   Input to clz_hi is all zeros, and  nlz_lo == whi.
      //   If this condition is true then ov_lo = 1
      //   For this case set 
      //     nlz = whi + nlz_lo = { 2'b1 + ov_lo, lz_lo } = { 2'b10, lz_lo }
   end

endmodule


 /// A Behavioral CLZ Description
module clz
  #( int w = 19,
     int ww = $clog2(w+1) )
   ( output var logic [ww-1:0] nlz,
     input uwire logic [w-1:0] a );

   uwire [w:0] aa = { a, 1'b1 };
   always_comb for ( int i=0; i<=w; i++ ) if ( aa[i] ) nlz = w-i;

endmodule

//////////////////////////////////////////////////////////////////////////////
/// Testbench Code

// cadence translate_off

module testbench;

   // The widths (values of w) at which the modules will be instantiated.
   //
   localparam int widths[] = { 1, 2, 5, 8, 13, 15, 17 };

   //  localparam int nw = widths.size();
   localparam int nw = 7; // Cadence, please fix this.
   initial if ( nw != widths.size() )
     $fatal(1,"Constant nw should be %0d.\n",widths.size() );

   int t_errs;     // Total number of errors.
   initial t_errs = 0;
   final $write("Total number of errors: %0d\n",t_errs);

   uwire d[nw:-1];    // Start / Done signals.
   assign d[-1] = 1;  // Initialize first at true.

   // Instantiate a testbench at each size.
   //
   for ( genvar i=0; i<nw; i++ )
     testbench_n #(widths[i]) t2( .done(d[i]), .start(d[i-1]) );

endmodule

module testbench_n
  #( int w = 20 )
   ( output logic done, input uwire start );

   localparam int ww = $clog2(w+1);

   localparam int n_tests = w * 10;

   uwire [ww:1] nlz;
   logic [w-1:0] a;
   clz_tree #(w) c0(nlz,a);
   //  clz #(w) c0(nlz,a);

   initial begin

      automatic int n_errs = 0;

      wait( start );

      $write("** Starting tests for width %0d.\n",w);

      for ( int t=0; t<n_tests; t++ ) begin

         automatic int lz = {t} % ( w + 1 );
         a = { 1'b1, (w)'({$random}) } >> ( lz + 1 );

         #1;

         if ( nlz !== lz ) begin
            n_errs++; testbench.t_errs++;
            if ( testbench.t_errs < 5 || n_errs < 2 )
              $write("Error for width %2d: input %h:  %d != %0d (correct).\n",
                     w, a, nlz, lz);
         end

      end

      $write("Width %0d, done with %0d tests, %0d errors.\n",w,n_tests,n_errs);

      done = 1;

   end

endmodule

// cadence translate_on