////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2016 Homework 4 --- SOLUTION
//

 /// Assignment  http://www.ece.lsu.edu/koppel/v/2016/hw04.pdf
 /// Solution: http://www.ece.lsu.edu/koppel/v/2016/hw04_sol.pdf

`default_nettype none

//////////////////////////////////////////////////////////////////////////////
///  Problem 0
//
 //  Look over but don't modify this module.


module lookup_behav
  #( int charsz = 8,
     int nelts = 15,
     int idx_sz = $clog2(nelts) )
   ( output logic found,
     output logic [idx_sz-1:0] index,
     input uwire [charsz-1:0] char,
     input uwire [charsz-1:0] chars[nelts] );

   always_comb begin
      found = 0;
      index = 0;
      for ( int i=0; i<nelts; i++ )
        if ( chars[i] == char ) begin
           index = i;
           found = 1;
        end
   end

endmodule


//////////////////////////////////////////////////////////////////////////////
///  Problem 1
//
 /// Complete lookup_linear so that it does the lookup using instantiated lookup_elt.
//
//     [✔] The code must be synthesizable.
//     [✔] The code must synthesize to combinational logic. (No latches.)
//     [✔] Make sure that the testbench does not report errors.
//     [✔] Can use behavioral or implicit structural code.
//     [✔] Do not rename modules or change ports.


module lookup_elt
  #( int charsz = 4 )
   ( output logic match,
     input uwire [charsz-1:0] char_lookup,
     input uwire [charsz-1:0] char_elt);
   /// Don't modify this module.
   always_comb match = char_lookup == char_elt;

endmodule

module lookup_linear
  #( int charsz = 8,
     int nelts = 15,  // Pronounced en-elts.
     int idx_sz = $clog2(nelts) )
   ( output logic found,
     output logic [idx_sz-1:0] index,
     input uwire [charsz-1:0] char,
     input uwire [charsz-1:0] chars[nelts-1:0] );

   /// SOLUTION -- Easy
   //
   //  Instantiate nelts modules, but use use behavioral code to examine
   //  their found (match) outputs.

   // Declare wires to connect to the found outputs of the instantiated modules.
   //
   uwire    [nelts-1:0] match;

   for ( genvar i=0; i<nelts; i++ )
     lookup_elt #(charsz) le(match[i],char,chars[i]);

   always_comb begin
      found = 0;
      index = 0;
      for ( int i=0; i<nelts; i++ )
        if ( match[i] ) begin index = i; found = 1; end
   end

endmodule

module lookup_linear_alt
  #( int charsz = 8,
     int nelts = 15,
     int idx_sz = $clog2(nelts) )
   ( output logic found,
     output logic [idx_sz-1:0] index,
     input uwire [charsz-1:0] char,
     input uwire [charsz-1:0] chars[nelts-1:0] );

   /// SOLUTION -- Alternative
   //
   //  Use generate statements to instantiate the modules and to
   //  generate logic to find the index.


   // Instantiate nelts lookup_elt modules and compute found.
   //
   uwire    [nelts-1:0] match;

   for ( genvar i=0; i<nelts; i++ )
     lookup_elt #(charsz) le(match[i],char,chars[i]);

   assign found = | match;


   // Instantiate logic to find the index of the last matching character.
   //

   uwire [idx_sz-1:0]   idx_i[nelts-1:-1];

   assign idx_i[-1] = 0;

   for ( genvar i=0; i<nelts; i++ )
     // If no match pass along previous idx_i, otherwise replace it with i.
     assign idx_i[i] = match[i] ? i : idx_i[i-1];

   assign   index = idx_i[nelts-1];

endmodule


//////////////////////////////////////////////////////////////////////////////
///  Problem 2
//
 /// Complete lookup_tree so that it does the lookup using recursive
 /// instantiations of itself.
//
//     [✔] The code must be synthesizable.
//     [✔] The code must synthesize to combinational logic. (No latches.)
//     [✔] Make sure that the testbench does not report errors.
//     [✔] Can use behavioral or implicit structural code.
//     [✔] Do not rename modules or change ports.


module lookup_tree_simple
  #( int charsz = 8,
     int nelts = 15,
     int idx_sz = $clog2(nelts) )
   ( output uwire found,
     output uwire [idx_sz-1:0] index,
     input uwire [charsz-1:0] char,
     input uwire [charsz-1:0] chars[nelts] );

   /// SOLUTION -- Unoptimized

   if ( nelts == 1 ) begin

      assign found = char == chars[0];
      assign index = 0;

   end else begin

      // Split the character array between recursive instantiations.
      //
      localparam int lo_sz = nelts / 2;
      localparam int lo_bits = $clog2(lo_sz);
      localparam int hi_sz = nelts - lo_sz;
      localparam int hi_bits = $clog2(hi_sz);
      //
      // Note that we need to compute lo_bits and hi_bits correctly so
      // that we can declare index connections, lo_idx and hi_idx, of
      // the correct size.

      uwire       lo_f, hi_f;
      uwire [lo_bits-1:0] lo_idx;
      uwire [hi_bits-1:0] hi_idx;

      lookup_tree #(charsz,lo_sz) lo( lo_f, lo_idx, char, chars[ 0:lo_sz-1 ] );
      lookup_tree #(charsz,hi_sz) hi( hi_f, hi_idx, char, chars[lo_sz:nelts-1]);

      assign     found = lo_f || hi_f;

      assign     index = hi_f ? lo_sz + hi_idx : lo_idx;
      //
      /// Notes:
      //
      //  It's okay to use lo_idx if hi_f is false, because if
      //  lo_f is false too then lo_idx must be zero.
      //
      //  This solution is less efficient because an adder is required
      //  to compute lo_sz + hi_idx. In the preferred solution lo_sz
      //  is chosen so that it is always a power of 2, avoiding the
      //  need for an addition.

   end

endmodule


module lookup_tree
  #( int charsz = 8,
     int nelts = 15,
     int idx_sz = $clog2(nelts) )
   ( output uwire found,
     output uwire [idx_sz-1:0] index,
     input uwire [charsz-1:0] char,
     input uwire [charsz-1:0] chars[nelts] );

   /// SOLUTION -- Preferred

   if ( nelts == 1 ) begin

      assign found = char == chars[0];
      assign index = 0;

   end else begin

      // Make the size of the first lookup_tree, lo, a power of two.
      //
      localparam int lo_bits = idx_sz - 1;
      localparam int lo_sz = 1 << lo_bits;

      // Compute the size of the second lookup_tree, hi.
      //
      localparam int hi_sz = nelts - lo_sz;
      localparam int hi_bits = $clog2(hi_sz);

      uwire       lo_f, hi_f;
      uwire [lo_bits-1:0] lo_idx;
      uwire [hi_bits-1:0] hi_idx;

      lookup_tree #(charsz,lo_sz) lo( lo_f, lo_idx, char, chars[ 0:lo_sz-1 ] );
      lookup_tree #(charsz,hi_sz) hi( hi_f, hi_idx, char, chars[lo_sz:nelts-1]);

      assign     found = lo_f || hi_f;

      if ( lo_bits == 0 ) assign index = hi_f;
      else                assign index = { hi_f, hi_idx | lo_idx };
      //
      /// Notes:
      //
      //  Because char can be found in at most one location and because
      //  index is zero if the char is not found, we can compute
      //  index as:
      //
      //     index = ( hi_f ? lo_sz : 0 ) + lo_idx + hi_idx;
      //
      //  or even better:
      //
      //     index = ( hi_f ? lo_sz : 0 ) + ( lo_idx | hi_idx );
      //
      //  Because lo_sz is a power of two, and because lo_sz > hi_idx,
      //  lo_sz > lo_idx, and lo_sz >= hi_sz we can use concatenation
      //  to avoid the add:
      //
      //     index =  ( hi_f << lo_bits ) + ( lo_idx | hi_idx );      
      //     index =  { hi_f, lo_idx | hi_idx };
      //
      /// Example:
      //
      //  nelts = 14, lo_sz = 8, lo_bits = 3, hi_sz = 6, hi_bits = 3;
      //  chars = { 100, 101, ... , 113 }
      //
      //  Suppose: char = 106.  We want index = 6.
      //   lo_f = 1, lo_idx = 6 = 3'b110
      //   hi_f = 0, hi_idx = 0.
      //   idx = { 1'b0, 3'b110 + 3'b0 } = { 1'b0, 3'b110 } = 4'b0110 = 6
      //
      //  Suppose: char = 112.  We want index = 12.
      //   lo_f = 0, lo_idx = 0.
      //   hi_f = 1, hi_idx = 4 = 3'b100
      //   idx = { 1'b1, 3'b000 + 3'b100 } = { 1'b1, 3'b100 } = 4'b1100 = 12
      //
      /// Synthesized Hardware:
      //
      // 


   end

endmodule

`ifdef xxx
Module Name                             Area   Delay   Delay
                                              Actual  Target
lookup_behav_charsz8_nelts4             9152     927   10000
lookup_linear_charsz8_nelts4            9012     990   10000
lookup_tree_charsz8_nelts4              8916    1026   10000
lookup_tree_opt_charsz8_nelts4          8988     952   10000
lookup_behav_charsz8_nelts15           35444    2348   10000
lookup_linear_charsz8_nelts15          34996    2338   10000
lookup_tree_charsz8_nelts15            34280    2606   10000
lookup_tree_opt_charsz8_nelts15        33532    2238   10000
lookup_behav_charsz8_nelts32           74648    3691   10000
lookup_linear_charsz8_nelts32          74212    3257   10000
lookup_tree_charsz8_nelts32            70932    2480   10000
lookup_tree_opt_charsz8_nelts32        71084    2443   10000
lookup_behav_charsz8_nelts40           94028    3862   10000
lookup_linear_charsz8_nelts40          94288    2585   10000
lookup_tree_charsz8_nelts40            95996    3501   10000
lookup_tree_opt_charsz8_nelts40        89292    2778   10000
lookup_behav_charsz8_nelts60          143268    5913   10000
lookup_linear_charsz8_nelts60         141792    5638   10000
lookup_tree_charsz8_nelts60           142828    3963   10000
lookup_tree_opt_charsz8_nelts60       138288    3501   10000
lookup_behav_charsz8_nelts4            12304     621     100
lookup_linear_charsz8_nelts4           13344     594     100
lookup_tree_charsz8_nelts4             13280     598     100
lookup_tree_opt_charsz8_nelts4         10888     640     100
lookup_behav_charsz8_nelts15           46896    1136     100
lookup_linear_charsz8_nelts15          47528    1120     100
lookup_tree_charsz8_nelts15            45268    1151     100
lookup_tree_opt_charsz8_nelts15        41696    1003     100
lookup_behav_charsz8_nelts32          105032    1247     100
lookup_linear_charsz8_nelts32         108688    1288     100
lookup_tree_charsz8_nelts32            96980    1093     100
lookup_tree_opt_charsz8_nelts32        96408    1056     100
lookup_behav_charsz8_nelts40          120132    1523     100
lookup_linear_charsz8_nelts40         131344    1114     100
lookup_tree_charsz8_nelts40           134444    1260     100
lookup_tree_opt_charsz8_nelts40       116320    1144     100
lookup_behav_charsz8_nelts60          184892    1726     100
lookup_linear_charsz8_nelts60         210512    1461     100
lookup_tree_charsz8_nelts60           185628    1890     100
lookup_tree_opt_charsz8_nelts60       176544    1500     100

`endif

//////////////////////////////////////////////////////////////////////////////
/// Testbench Code
//
//  The code below instantiates some of the modules above,
//  provides test inputs, and verifies the outputs.
//
//  The testbench may be modified to facilitate your solution. Of
//  course, the removal of tests which your module fails is not a
//  method of fixing a broken module. (One might modify the testbench
//  so that the first tests it performs are thoe which make it easier
//  to determine what the problem is, for example, test inputs that
//  are all 0's or all 1's.)



// cadence translate_off

function automatic int min( int a, int b );
         min = a <= b ? a : b;
endfunction

module testbench();

   localparam int nelts[] = { 4, 5, 10, 15,  16, 30, 40, 64 };
   localparam int nnelts = 8;

   logic      start_done[-1:nnelts];

   for ( genvar i = 0; i < nnelts; i++ )
     testbench_sz #(nelts[i]) aa(start_done[i],start_done[i-1]);

   initial begin

      if ( nnelts != nelts.size() ) begin
        $write("Value of nnelts, %0d, different than number of elts in nelts, %0d. (See module testbench.\n",
               nnelts, nelts.size());
         $fatal(1);
      end

      start_done[-1] = 1;

   end

endmodule

module testbench_sz
  #( int nelts = 100 )
   ( output logic done, input uwire start );

   localparam int telts = nelts * 2;
   localparam int idx_sz = $clog2(nelts);
   localparam int charsz = 8;
   localparam int charmk = ( 1 << charsz ) - 1;

   localparam int num_tests = min(nelts,500);
   localparam int stride = nelts/num_tests;
   localparam int nmuts = 3;

   logic [charsz-1:0] char, chars[telts-1:0];

   uwire               found[nmuts];
   uwire [idx_sz-1:0]  idx[nmuts];
   logic              shadow_found;
   logic [idx_sz-1:0] shadow_idx;

   lookup_behav #(charsz,nelts) l00(found[0],idx[0],char,chars[nelts-1:0]);
   lookup_linear #(charsz,nelts) l01(found[1],idx[1],char,chars[nelts-1:0]);
   lookup_tree #(charsz,nelts) l02(found[2],idx[2],char,chars[nelts-1:0]);

   string mutnames[] = { "lookup_behav", "lookup_linear", "lookup_tree" };

   int err[nmuts];

   initial begin

      automatic int tot_errors = 0;
      localparam int gap = charmk / telts;
      chars[0] = {$random} % gap;
      for ( int i=1; i<telts; i++ )
        chars[i] = chars[i-1] + 1 + {$random} % gap;
      for ( int i=0; i<telts; i++ ) begin
         automatic int idx = {$random} % telts;
         {chars[i],chars[idx]} = {chars[idx],chars[i]};
      end

      wait ( start == 1 );

      for ( int i=0; i<num_tests; i++ ) begin

         automatic int idx_try = {$random} % telts;
         char = chars[idx_try];
         shadow_found = idx_try < nelts;
         shadow_idx = idx_try;
         #1;

         for ( int mut=0; mut<3; mut++ ) begin

            automatic int cr_fnd = shadow_found === found[mut];
            automatic int cr_idx = shadow_idx === idx[mut];

            if ( cr_fnd && ( shadow_found == 0 || cr_idx ) ) continue;
            if ( err[mut] > 100 ) break;

            $write("Mod %s nelts %0d test %3d char %h: wrong %s. Found %h%s%h (correct) idx %4d %s %4d (correct)\n",
                   mutnames[mut], nelts, i, char,
                   cr_idx ? "found" : "index",
                   found[mut],
                   cr_fnd ? "==" : "!=",
                   shadow_found,
                   idx[mut], cr_fnd ? "!=" : "??", shadow_idx);

            err[mut]++;

         end

      end
      for ( int i=0; i<num_tests; i++ ) tot_errors += err[i];
      $write("For nelts %0d performed %0d tests, %0d errors found.\n",
             nelts, num_tests, tot_errors);

      done = 1;
   end

endmodule

// cadence translate_on