////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2019 Homework 2 -- Live Solution
//

 /// Assignment  https://www.ece.lsu.edu/koppel/v/2019/hw02.pdf

`default_nettype none

//////////////////////////////////////////////////////////////////////////////
///  Problem 1
//
 ///    Complete clz_tree so that it computes the clz of its input recursively.
//
//     [✔] Split the input between two recursive instantiations ..
//         .. and properly combine the results.
//     [✔] Don't forget the terminal case, maybe for w == 1.
//     [ ] For maximum credit, avoid any use of adders ..
//         .. by making the width of the "lo" module a power of 2.
//
//     [✔] Make sure that port connections are the correct size ..
//         .. mismatched ports are Verilog errors in this assignment.
//     [✔] Do not make port widths larger than needed.
//     [ ] Make sure that the testbench does not report errors.
//     [ ] Module must be synthesizable. Use command: genus -files syn.tcl
//
//     [✔] As always, avoid costly, slow, and confusing code.

module clz_tree
  #( int w = 19,
     int ww = $clog2(w+1) )
   ( output uwire [ww-1:0] nlz,
     input uwire [w-1:0] a );

   if ( w == 1 ) begin

      assign nlz = a == 1 ? 0 : 1;

   end else if ( w == 2 ) begin
      assign nlz = a[0] == 1 ? 0 : a[1] == 1 ? 1 : 2;
   end else begin

      localparam int wlo = w / 2;
      localparam int wwlo = $clog2( wlo + 1 );
      localparam int whi = w - wlo;
      localparam int wwhi = $clog2(whi+1);
      uwire [wwlo-1:0] nlz_lo;
      uwire [wwhi-1:0] nlz_hi;

      clz_tree #(wlo) tlo( nlz_lo, a[wlo-1:0] );
      clz_tree #(whi) thi( nlz_hi, a[w-1:wlo] );

      // a = 4'b0110   nlz = 1
      // alo: 2'b10    ahi 2'b01
      // nlzlo:   1    nlzhi   0
      // nlz = nlzo

      // a = 4'b1000   nlz = 3
      // alo: 2'b00    ahi 2'b10
      // nlzlo:   2    nlzhi   1
      // nlz = nlzlo + nlzhi = 2 + 1

      assign nlz = nlz_lo == wlo ? wlo + nlz_hi : nlz_lo;

   end

endmodule

module clz_tree_better
  #( int w = 19,
     int ww = $clog2(w+1) )
   ( output uwire [ww-1:0] nlz,
     input uwire [w-1:0] a );

   if ( w == 1 ) begin

      assign nlz = a == 1 ? 0 : 1;

   end else if ( w == 2 ) begin
      assign nlz = a[0] == 1 ? 0 : a[1] == 1 ? 1 : 2;
   end else begin

      localparam int wwwlo = $clog2( w ) - 1;
      localparam int wlo = 1 << wwwlo;
      localparam int wwlo = $clog2( wlo + 1 );
      localparam int whi = w - wlo;
      localparam int wwhi = $clog2(whi+1);
      uwire [wwlo-1:0] nlz_lo;
      uwire [wwlo-1:0] nlz_hi;

      clz_tree_better #(wlo) tlo( nlz_lo, a[wlo-1:0] );
      clz_tree_better #(whi,wwlo) thi( nlz_hi, a[w-1:wlo] );

   // a = 4'b0110   nlz = 1
   // alo: 2'b10    ahi 2'b01
   // nlzlo:   1    nlzhi   0
   // nlz = nlzo
   
   // a = 4'b1000   nlz = 3
   // alo: 2'b00    ahi 2'b10
   // nlzlo:   2    nlzhi   1
   // nlz = nlzlo + nlzhi = 2 + 1

      //  assign nlz = nlz_lo == wlo ? wlo + nlz_hi : nlz_lo;

      /// Expression below is incorrect. It can be fixed by changing one little thing.
      assign nlz = nlz_lo == wlo ? { 1'b1 + nlz_hi[wwlo-1], nlz_hi[wwlo-2:0] } : nlz_lo;

   end

endmodule


 /// A Behavioral CLZ Description
module clz
  #( int w = 19,
     int ww = $clog2(w+1) )
   ( output var logic [ww-1:0] nlz,
     input uwire logic [w-1:0] a );

   uwire [w:0] aa = { 1'b1, a };
   always_comb for ( int i=w; i>=0; i-- ) if ( aa[i] ) nlz = i;

endmodule


 /// Solution To Homework 1 Using Generate Statements
module mult_tree
  #( int wa = 16, int wb = 16, int wp = wa + wb )
   ( output uwire [wp:1] prod,
     input uwire [wa:1] a,
     input uwire [wb:1] b );

   if ( wa == 1 ) begin

      assign prod = a ? b : 0;

   end else begin

      localparam int wn = wa / 2;
      localparam int wx = wb + wn;

      uwire [wx:1] prod_lo, prod_hi;

      mult_tree #(wn,wb) mlo( prod_lo, a[wn:1], b);
      mult_tree #(wn,wb) mhi( prod_hi, a[wa:wn+1], b);
      assign prod = prod_lo + ( prod_hi << wn );

   end

endmodule




//////////////////////////////////////////////////////////////////////////////
/// Testbench Code

// cadence translate_off

module testbench;

   // The widths (values of w) at which the modules will be instantiated.
   //
   localparam int widths[] = { 1, 2, 5, 8, 13, 15, 17 };

   //  localparam int nw = widths.size();
   localparam int nw = 7; // Cadence, please fix this.
   initial if ( nw != widths.size() )
     $fatal(1,"Constant nw should be %0d.\n",widths.size() );

   int t_errs;     // Total number of errors.
   initial t_errs = 0;
   final $write("Total number of errors: %0d\n",t_errs);

   uwire d[2*nw:-1];    // Start / Done signals.
   assign d[-1] = 1;  // Initialize first at true.

   // Instantiate a testbench at each size.
   //
   for ( genvar i=0; i<nw; i++ ) begin
      testbench_n #(widths[i],0) t2( .done(d[i]), .start(d[i-1]) );
      testbench_n #(widths[i],1) t3( .done(d[nw+i]), .start(d[nw+i-1]) );
   end

endmodule

module testbench_n
  #( int w = 20, bit better = 0 )
   ( output logic done, input uwire start );

   localparam int ww = $clog2(w+1);

   localparam int n_tests = w * 10;

   uwire [ww:1] nlz;
   logic [w-1:0] a;
   if ( better )
     clz_tree_better #(w) c0(nlz,a);
   else
     clz_tree #(w) c0(nlz,a);
   localparam string mod_name = better ? "clz_tree_better" : "clz_tree";

   initial begin

      automatic int n_errs = 0;

      wait( start );

      $write("** Starting tests for width %0d.\n",w);

      for ( int t=0; t<n_tests; t++ ) begin

         automatic int lz = {t} % ( w + 1 );
         a = { $random | 1 } << lz;

         #1;

         if ( nlz !== lz ) begin
            n_errs++; testbench.t_errs++;
            if ( testbench.t_errs < 5 || n_errs < 2 )
              $write
                ("Error for %s at width %2d: input %h:  %d != %0d (correct).\n",
                 mod_name, w, a, nlz, lz);
         end

      end

      $write("Mod %s, width %0d, done with %0d tests, %0d errors.\n",
             mod_name,w,n_tests,n_errs);

      done = 1;

   end

endmodule

// cadence translate_on