```////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2016 Homework 4 --- SOLUTION
//

/// Assignment  http://www.ece.lsu.edu/koppel/v/2016/hw04.pdf
/// Solution: http://www.ece.lsu.edu/koppel/v/2016/hw04_sol.pdf

`default_nettype none

//////////////////////////////////////////////////////////////////////////////
///  Problem 0
//
//  Look over but don't modify this module.

module lookup_behav
#( int charsz = 8,
int nelts = 15,
int idx_sz = \$clog2(nelts) )
( output logic found,
output logic [idx_sz-1:0] index,
input uwire [charsz-1:0] char,
input uwire [charsz-1:0] chars[nelts] );

always_comb begin
found = 0;
index = 0;
for ( int i=0; i<nelts; i++ )
if ( chars[i] == char ) begin
index = i;
found = 1;
end
end

endmodule

//////////////////////////////////////////////////////////////////////////////
///  Problem 1
//
/// Complete lookup_linear so that it does the lookup using instantiated lookup_elt.
//
//     [✔] The code must be synthesizable.
//     [✔] The code must synthesize to combinational logic. (No latches.)
//     [✔] Can use behavioral or implicit structural code.
//     [✔] Do not rename modules or change ports.

module lookup_elt
#( int charsz = 4 )
( output logic match,
input uwire [charsz-1:0] char_lookup,
input uwire [charsz-1:0] char_elt);
/// Don't modify this module.
always_comb match = char_lookup == char_elt;

endmodule

module lookup_linear
#( int charsz = 8,
int nelts = 15,  // Pronounced en-elts.
int idx_sz = \$clog2(nelts) )
( output logic found,
output logic [idx_sz-1:0] index,
input uwire [charsz-1:0] char,
input uwire [charsz-1:0] chars[nelts-1:0] );

/// SOLUTION -- Easy
//
//  Instantiate nelts modules, but use use behavioral code to examine
//  their found (match) outputs.

// Declare wires to connect to the found outputs of the instantiated modules.
//
uwire    [nelts-1:0] match;

for ( genvar i=0; i<nelts; i++ )
lookup_elt #(charsz) le(match[i],char,chars[i]);

always_comb begin
found = 0;
index = 0;
for ( int i=0; i<nelts; i++ )
if ( match[i] ) begin index = i; found = 1; end
end

endmodule

module lookup_linear_alt
#( int charsz = 8,
int nelts = 15,
int idx_sz = \$clog2(nelts) )
( output logic found,
output logic [idx_sz-1:0] index,
input uwire [charsz-1:0] char,
input uwire [charsz-1:0] chars[nelts-1:0] );

/// SOLUTION -- Alternative
//
//  Use generate statements to instantiate the modules and to
//  generate logic to find the index.

// Instantiate nelts lookup_elt modules and compute found.
//
uwire    [nelts-1:0] match;

for ( genvar i=0; i<nelts; i++ )
lookup_elt #(charsz) le(match[i],char,chars[i]);

assign found = | match;

// Instantiate logic to find the index of the last matching character.
//

uwire [idx_sz-1:0]   idx_i[nelts-1:-1];

assign idx_i[-1] = 0;

for ( genvar i=0; i<nelts; i++ )
// If no match pass along previous idx_i, otherwise replace it with i.
assign idx_i[i] = match[i] ? i : idx_i[i-1];

assign   index = idx_i[nelts-1];

endmodule

//////////////////////////////////////////////////////////////////////////////
///  Problem 2
//
/// Complete lookup_tree so that it does the lookup using recursive
/// instantiations of itself.
//
//     [✔] The code must be synthesizable.
//     [✔] The code must synthesize to combinational logic. (No latches.)
//     [✔] Can use behavioral or implicit structural code.
//     [✔] Do not rename modules or change ports.

module lookup_tree_simple
#( int charsz = 8,
int nelts = 15,
int idx_sz = \$clog2(nelts) )
( output uwire found,
output uwire [idx_sz-1:0] index,
input uwire [charsz-1:0] char,
input uwire [charsz-1:0] chars[nelts] );

/// SOLUTION -- Unoptimized

if ( nelts == 1 ) begin

assign found = char == chars[0];

end else begin

// Split the character array between recursive instantiations.
//
localparam int lo_sz = nelts / 2;
localparam int lo_bits = \$clog2(lo_sz);
localparam int hi_sz = nelts - lo_sz;
localparam int hi_bits = \$clog2(hi_sz);
//
// Note that we need to compute lo_bits and hi_bits correctly so
// that we can declare index connections, lo_idx and hi_idx, of
// the correct size.

uwire       lo_f, hi_f;
uwire [lo_bits-1:0] lo_idx;
uwire [hi_bits-1:0] hi_idx;

lookup_tree #(charsz,lo_sz) lo( lo_f, lo_idx, char, chars[ 0:lo_sz-1 ] );
lookup_tree #(charsz,hi_sz) hi( hi_f, hi_idx, char, chars[lo_sz:nelts-1]);

assign     found = lo_f || hi_f;

assign     index = hi_f ? lo_sz + hi_idx : lo_idx;
//
/// Notes:
//
//  It's okay to use lo_idx if hi_f is false, because if
//  lo_f is false too then lo_idx must be zero.
//
//  This solution is less efficient because an adder is required
//  to compute lo_sz + hi_idx. In the preferred solution lo_sz
//  is chosen so that it is always a power of 2, avoiding the

end

endmodule

module lookup_tree
#( int charsz = 8,
int nelts = 15,
int idx_sz = \$clog2(nelts) )
( output uwire found,
output uwire [idx_sz-1:0] index,
input uwire [charsz-1:0] char,
input uwire [charsz-1:0] chars[nelts] );

/// SOLUTION -- Preferred

if ( nelts == 1 ) begin

assign found = char == chars[0];

end else begin

// Make the size of the first lookup_tree, lo, a power of two.
//
localparam int lo_bits = idx_sz - 1;
localparam int lo_sz = 1 << lo_bits;

// Compute the size of the second lookup_tree, hi.
//
localparam int hi_sz = nelts - lo_sz;
localparam int hi_bits = \$clog2(hi_sz);

uwire       lo_f, hi_f;
uwire [lo_bits-1:0] lo_idx;
uwire [hi_bits-1:0] hi_idx;

lookup_tree #(charsz,lo_sz) lo( lo_f, lo_idx, char, chars[ 0:lo_sz-1 ] );
lookup_tree #(charsz,hi_sz) hi( hi_f, hi_idx, char, chars[lo_sz:nelts-1]);

assign     found = lo_f || hi_f;

//
/// Notes:
//
//  Because char can be found in at most one location and because
//  index is zero if the char is not found, we can compute
//  index as:
//
//     index = ( hi_f ? lo_sz : 0 ) + lo_idx + hi_idx;
//
//  or even better:
//
//     index = ( hi_f ? lo_sz : 0 ) + ( lo_idx | hi_idx );
//
//  Because lo_sz is a power of two, and because lo_sz > hi_idx,
//  lo_sz > lo_idx, and lo_sz >= hi_sz we can use concatenation
//
//     index =  ( hi_f << lo_bits ) + ( lo_idx | hi_idx );
//     index =  { hi_f, lo_idx | hi_idx };
//
/// Example:
//
//  nelts = 14, lo_sz = 8, lo_bits = 3, hi_sz = 6, hi_bits = 3;
//  chars = { 100, 101, ... , 113 }
//
//  Suppose: char = 106.  We want index = 6.
//   lo_f = 1, lo_idx = 6 = 3'b110
//   hi_f = 0, hi_idx = 0.
//   idx = { 1'b0, 3'b110 + 3'b0 } = { 1'b0, 3'b110 } = 4'b0110 = 6
//
//  Suppose: char = 112.  We want index = 12.
//   lo_f = 0, lo_idx = 0.
//   hi_f = 1, hi_idx = 4 = 3'b100
//   idx = { 1'b1, 3'b000 + 3'b100 } = { 1'b1, 3'b100 } = 4'b1100 = 12
//
/// Synthesized Hardware:
//
//

end

endmodule

`ifdef xxx
Module Name                             Area   Delay   Delay
Actual  Target
lookup_behav_charsz8_nelts4             9152     927   10000
lookup_linear_charsz8_nelts4            9012     990   10000
lookup_tree_charsz8_nelts4              8916    1026   10000
lookup_tree_opt_charsz8_nelts4          8988     952   10000
lookup_behav_charsz8_nelts15           35444    2348   10000
lookup_linear_charsz8_nelts15          34996    2338   10000
lookup_tree_charsz8_nelts15            34280    2606   10000
lookup_tree_opt_charsz8_nelts15        33532    2238   10000
lookup_behav_charsz8_nelts32           74648    3691   10000
lookup_linear_charsz8_nelts32          74212    3257   10000
lookup_tree_charsz8_nelts32            70932    2480   10000
lookup_tree_opt_charsz8_nelts32        71084    2443   10000
lookup_behav_charsz8_nelts40           94028    3862   10000
lookup_linear_charsz8_nelts40          94288    2585   10000
lookup_tree_charsz8_nelts40            95996    3501   10000
lookup_tree_opt_charsz8_nelts40        89292    2778   10000
lookup_behav_charsz8_nelts60          143268    5913   10000
lookup_linear_charsz8_nelts60         141792    5638   10000
lookup_tree_charsz8_nelts60           142828    3963   10000
lookup_tree_opt_charsz8_nelts60       138288    3501   10000
lookup_behav_charsz8_nelts4            12304     621     100
lookup_linear_charsz8_nelts4           13344     594     100
lookup_tree_charsz8_nelts4             13280     598     100
lookup_tree_opt_charsz8_nelts4         10888     640     100
lookup_behav_charsz8_nelts15           46896    1136     100
lookup_linear_charsz8_nelts15          47528    1120     100
lookup_tree_charsz8_nelts15            45268    1151     100
lookup_tree_opt_charsz8_nelts15        41696    1003     100
lookup_behav_charsz8_nelts32          105032    1247     100
lookup_linear_charsz8_nelts32         108688    1288     100
lookup_tree_charsz8_nelts32            96980    1093     100
lookup_tree_opt_charsz8_nelts32        96408    1056     100
lookup_behav_charsz8_nelts40          120132    1523     100
lookup_linear_charsz8_nelts40         131344    1114     100
lookup_tree_charsz8_nelts40           134444    1260     100
lookup_tree_opt_charsz8_nelts40       116320    1144     100
lookup_behav_charsz8_nelts60          184892    1726     100
lookup_linear_charsz8_nelts60         210512    1461     100
lookup_tree_charsz8_nelts60           185628    1890     100
lookup_tree_opt_charsz8_nelts60       176544    1500     100

`endif

//////////////////////////////////////////////////////////////////////////////
/// Testbench Code
//
//  The code below instantiates some of the modules above,
//  provides test inputs, and verifies the outputs.
//
//  The testbench may be modified to facilitate your solution. Of
//  course, the removal of tests which your module fails is not a
//  method of fixing a broken module. (One might modify the testbench
//  so that the first tests it performs are thoe which make it easier
//  to determine what the problem is, for example, test inputs that
//  are all 0's or all 1's.)

function automatic int min( int a, int b );
min = a <= b ? a : b;
endfunction

module testbench();

localparam int nelts[] = { 4, 5, 10, 15,  16, 30, 40, 64 };
localparam int nnelts = 8;

logic      start_done[-1:nnelts];

for ( genvar i = 0; i < nnelts; i++ )
testbench_sz #(nelts[i]) aa(start_done[i],start_done[i-1]);

initial begin

if ( nnelts != nelts.size() ) begin
\$write("Value of nnelts, %0d, different than number of elts in nelts, %0d. (See module testbench.\n",
nnelts, nelts.size());
\$fatal(1);
end

start_done[-1] = 1;

end

endmodule

module testbench_sz
#( int nelts = 100 )
( output logic done, input uwire start );

localparam int telts = nelts * 2;
localparam int idx_sz = \$clog2(nelts);
localparam int charsz = 8;
localparam int charmk = ( 1 << charsz ) - 1;

localparam int num_tests = min(nelts,500);
localparam int stride = nelts/num_tests;
localparam int nmuts = 3;

logic [charsz-1:0] char, chars[telts-1:0];

uwire               found[nmuts];
uwire [idx_sz-1:0]  idx[nmuts];

lookup_behav #(charsz,nelts) l00(found[0],idx[0],char,chars[nelts-1:0]);
lookup_linear #(charsz,nelts) l01(found[1],idx[1],char,chars[nelts-1:0]);
lookup_tree #(charsz,nelts) l02(found[2],idx[2],char,chars[nelts-1:0]);

string mutnames[] = { "lookup_behav", "lookup_linear", "lookup_tree" };

int err[nmuts];

initial begin

automatic int tot_errors = 0;
localparam int gap = charmk / telts;
chars[0] = {\$random} % gap;
for ( int i=1; i<telts; i++ )
chars[i] = chars[i-1] + 1 + {\$random} % gap;
for ( int i=0; i<telts; i++ ) begin
automatic int idx = {\$random} % telts;
{chars[i],chars[idx]} = {chars[idx],chars[i]};
end

wait ( start == 1 );

for ( int i=0; i<num_tests; i++ ) begin

automatic int idx_try = {\$random} % telts;
char = chars[idx_try];
#1;

for ( int mut=0; mut<3; mut++ ) begin

automatic int cr_fnd = shadow_found === found[mut];
automatic int cr_idx = shadow_idx === idx[mut];

if ( cr_fnd && ( shadow_found == 0 || cr_idx ) ) continue;
if ( err[mut] > 100 ) break;

\$write("Mod %s nelts %0d test %3d char %h: wrong %s. Found %h%s%h (correct) idx %4d %s %4d (correct)\n",
mutnames[mut], nelts, i, char,
cr_idx ? "found" : "index",
found[mut],
cr_fnd ? "==" : "!=",
idx[mut], cr_fnd ? "!=" : "??", shadow_idx);

err[mut]++;

end

end
for ( int i=0; i<num_tests; i++ ) tot_errors += err[i];
\$write("For nelts %0d performed %0d tests, %0d errors found.\n",
nelts, num_tests, tot_errors);

done = 1;
end

endmodule