//////////////////////////////////////////////////////////////////////////////// // /// LSU EE 4755 Fall 2016 Homework 4 --- SOLUTION // /// Assignment http://www.ece.lsu.edu/koppel/v/2016/hw04.pdf /// Solution: http://www.ece.lsu.edu/koppel/v/2016/hw04_sol.pdf `default_nettype none ////////////////////////////////////////////////////////////////////////////// /// Problem 0 // // Look over but don't modify this module. module lookup_behav #( int charsz = 8, int nelts = 15, int idx_sz = $clog2(nelts) ) ( output logic found, output logic [idx_sz-1:0] index, input uwire [charsz-1:0] char, input uwire [charsz-1:0] chars[nelts] ); always_comb begin found = 0; index = 0; for ( int i=0; i<nelts; i++ ) if ( chars[i] == char ) begin index = i; found = 1; end end endmodule ////////////////////////////////////////////////////////////////////////////// /// Problem 1 // /// Complete lookup_linear so that it does the lookup using instantiated lookup_elt. // // [✔] The code must be synthesizable. // [✔] The code must synthesize to combinational logic. (No latches.) // [✔] Make sure that the testbench does not report errors. // [✔] Can use behavioral or implicit structural code. // [✔] Do not rename modules or change ports. module lookup_elt #( int charsz = 4 ) ( output logic match, input uwire [charsz-1:0] char_lookup, input uwire [charsz-1:0] char_elt); /// Don't modify this module. always_comb match = char_lookup == char_elt; endmodule module lookup_linear #( int charsz = 8, int nelts = 15, // Pronounced en-elts. int idx_sz = $clog2(nelts) ) ( output logic found, output logic [idx_sz-1:0] index, input uwire [charsz-1:0] char, input uwire [charsz-1:0] chars[nelts-1:0] ); /// SOLUTION -- Easy // // Instantiate nelts modules, but use use behavioral code to examine // their found (match) outputs. // Declare wires to connect to the found outputs of the instantiated modules. // uwire [nelts-1:0] match; for ( genvar i=0; i<nelts; i++ ) lookup_elt #(charsz) le(match[i],char,chars[i]); always_comb begin found = 0; index = 0; for ( int i=0; i<nelts; i++ ) if ( match[i] ) begin index = i; found = 1; end end endmodule module lookup_linear_alt #( int charsz = 8, int nelts = 15, int idx_sz = $clog2(nelts) ) ( output logic found, output logic [idx_sz-1:0] index, input uwire [charsz-1:0] char, input uwire [charsz-1:0] chars[nelts-1:0] ); /// SOLUTION -- Alternative // // Use generate statements to instantiate the modules and to // generate logic to find the index. // Instantiate nelts lookup_elt modules and compute found. // uwire [nelts-1:0] match; for ( genvar i=0; i<nelts; i++ ) lookup_elt #(charsz) le(match[i],char,chars[i]); assign found = | match; // Instantiate logic to find the index of the last matching character. // uwire [idx_sz-1:0] idx_i[nelts-1:-1]; assign idx_i[-1] = 0; for ( genvar i=0; i<nelts; i++ ) // If no match pass along previous idx_i, otherwise replace it with i. assign idx_i[i] = match[i] ? i : idx_i[i-1]; assign index = idx_i[nelts-1]; endmodule ////////////////////////////////////////////////////////////////////////////// /// Problem 2 // /// Complete lookup_tree so that it does the lookup using recursive /// instantiations of itself. // // [✔] The code must be synthesizable. // [✔] The code must synthesize to combinational logic. (No latches.) // [✔] Make sure that the testbench does not report errors. // [✔] Can use behavioral or implicit structural code. // [✔] Do not rename modules or change ports. module lookup_tree_simple #( int charsz = 8, int nelts = 15, int idx_sz = $clog2(nelts) ) ( output uwire found, output uwire [idx_sz-1:0] index, input uwire [charsz-1:0] char, input uwire [charsz-1:0] chars[nelts] ); /// SOLUTION -- Unoptimized if ( nelts == 1 ) begin assign found = char == chars[0]; assign index = 0; end else begin // Split the character array between recursive instantiations. // localparam int lo_sz = nelts / 2; localparam int lo_bits = $clog2(lo_sz); localparam int hi_sz = nelts - lo_sz; localparam int hi_bits = $clog2(hi_sz); // // Note that we need to compute lo_bits and hi_bits correctly so // that we can declare index connections, lo_idx and hi_idx, of // the correct size. uwire lo_f, hi_f; uwire [lo_bits-1:0] lo_idx; uwire [hi_bits-1:0] hi_idx; lookup_tree #(charsz,lo_sz) lo( lo_f, lo_idx, char, chars[ 0:lo_sz-1 ] ); lookup_tree #(charsz,hi_sz) hi( hi_f, hi_idx, char, chars[lo_sz:nelts-1]); assign found = lo_f || hi_f; assign index = hi_f ? lo_sz + hi_idx : lo_idx; // /// Notes: // // It's okay to use lo_idx if hi_f is false, because if // lo_f is false too then lo_idx must be zero. // // This solution is less efficient because an adder is required // to compute lo_sz + hi_idx. In the preferred solution lo_sz // is chosen so that it is always a power of 2, avoiding the // need for an addition. end endmodule module lookup_tree #( int charsz = 8, int nelts = 15, int idx_sz = $clog2(nelts) ) ( output uwire found, output uwire [idx_sz-1:0] index, input uwire [charsz-1:0] char, input uwire [charsz-1:0] chars[nelts] ); /// SOLUTION -- Preferred if ( nelts == 1 ) begin assign found = char == chars[0]; assign index = 0; end else begin // Make the size of the first lookup_tree, lo, a power of two. // localparam int lo_bits = idx_sz - 1; localparam int lo_sz = 1 << lo_bits; // Compute the size of the second lookup_tree, hi. // localparam int hi_sz = nelts - lo_sz; localparam int hi_bits = $clog2(hi_sz); uwire lo_f, hi_f; uwire [lo_bits-1:0] lo_idx; uwire [hi_bits-1:0] hi_idx; lookup_tree #(charsz,lo_sz) lo( lo_f, lo_idx, char, chars[ 0:lo_sz-1 ] ); lookup_tree #(charsz,hi_sz) hi( hi_f, hi_idx, char, chars[lo_sz:nelts-1]); assign found = lo_f || hi_f; if ( lo_bits == 0 ) assign index = hi_f; else assign index = { hi_f, hi_idx | lo_idx }; // /// Notes: // // Because char can be found in at most one location and because // index is zero if the char is not found, we can compute // index as: // // index = ( hi_f ? lo_sz : 0 ) + lo_idx + hi_idx; // // or even better: // // index = ( hi_f ? lo_sz : 0 ) + ( lo_idx | hi_idx ); // // Because lo_sz is a power of two, and because lo_sz > hi_idx, // lo_sz > lo_idx, and lo_sz >= hi_sz we can use concatenation // to avoid the add: // // index = ( hi_f << lo_bits ) + ( lo_idx | hi_idx ); // index = { hi_f, lo_idx | hi_idx }; // /// Example: // // nelts = 14, lo_sz = 8, lo_bits = 3, hi_sz = 6, hi_bits = 3; // chars = { 100, 101, ... , 113 } // // Suppose: char = 106. We want index = 6. // lo_f = 1, lo_idx = 6 = 3'b110 // hi_f = 0, hi_idx = 0. // idx = { 1'b0, 3'b110 + 3'b0 } = { 1'b0, 3'b110 } = 4'b0110 = 6 // // Suppose: char = 112. We want index = 12. // lo_f = 0, lo_idx = 0. // hi_f = 1, hi_idx = 4 = 3'b100 // idx = { 1'b1, 3'b000 + 3'b100 } = { 1'b1, 3'b100 } = 4'b1100 = 12 // /// Synthesized Hardware: // // end endmodule `ifdef xxx Module Name Area Delay Delay Actual Target lookup_behav_charsz8_nelts4 9152 927 10000 lookup_linear_charsz8_nelts4 9012 990 10000 lookup_tree_charsz8_nelts4 8916 1026 10000 lookup_tree_opt_charsz8_nelts4 8988 952 10000 lookup_behav_charsz8_nelts15 35444 2348 10000 lookup_linear_charsz8_nelts15 34996 2338 10000 lookup_tree_charsz8_nelts15 34280 2606 10000 lookup_tree_opt_charsz8_nelts15 33532 2238 10000 lookup_behav_charsz8_nelts32 74648 3691 10000 lookup_linear_charsz8_nelts32 74212 3257 10000 lookup_tree_charsz8_nelts32 70932 2480 10000 lookup_tree_opt_charsz8_nelts32 71084 2443 10000 lookup_behav_charsz8_nelts40 94028 3862 10000 lookup_linear_charsz8_nelts40 94288 2585 10000 lookup_tree_charsz8_nelts40 95996 3501 10000 lookup_tree_opt_charsz8_nelts40 89292 2778 10000 lookup_behav_charsz8_nelts60 143268 5913 10000 lookup_linear_charsz8_nelts60 141792 5638 10000 lookup_tree_charsz8_nelts60 142828 3963 10000 lookup_tree_opt_charsz8_nelts60 138288 3501 10000 lookup_behav_charsz8_nelts4 12304 621 100 lookup_linear_charsz8_nelts4 13344 594 100 lookup_tree_charsz8_nelts4 13280 598 100 lookup_tree_opt_charsz8_nelts4 10888 640 100 lookup_behav_charsz8_nelts15 46896 1136 100 lookup_linear_charsz8_nelts15 47528 1120 100 lookup_tree_charsz8_nelts15 45268 1151 100 lookup_tree_opt_charsz8_nelts15 41696 1003 100 lookup_behav_charsz8_nelts32 105032 1247 100 lookup_linear_charsz8_nelts32 108688 1288 100 lookup_tree_charsz8_nelts32 96980 1093 100 lookup_tree_opt_charsz8_nelts32 96408 1056 100 lookup_behav_charsz8_nelts40 120132 1523 100 lookup_linear_charsz8_nelts40 131344 1114 100 lookup_tree_charsz8_nelts40 134444 1260 100 lookup_tree_opt_charsz8_nelts40 116320 1144 100 lookup_behav_charsz8_nelts60 184892 1726 100 lookup_linear_charsz8_nelts60 210512 1461 100 lookup_tree_charsz8_nelts60 185628 1890 100 lookup_tree_opt_charsz8_nelts60 176544 1500 100 `endif ////////////////////////////////////////////////////////////////////////////// /// Testbench Code // // The code below instantiates some of the modules above, // provides test inputs, and verifies the outputs. // // The testbench may be modified to facilitate your solution. Of // course, the removal of tests which your module fails is not a // method of fixing a broken module. (One might modify the testbench // so that the first tests it performs are thoe which make it easier // to determine what the problem is, for example, test inputs that // are all 0's or all 1's.) // cadence translate_off function automatic int min( int a, int b ); min = a <= b ? a : b; endfunction module testbench(); localparam int nelts[] = { 4, 5, 10, 15, 16, 30, 40, 64 }; localparam int nnelts = 8; logic start_done[-1:nnelts]; for ( genvar i = 0; i < nnelts; i++ ) testbench_sz #(nelts[i]) aa(start_done[i],start_done[i-1]); initial begin if ( nnelts != nelts.size() ) begin $write("Value of nnelts, %0d, different than number of elts in nelts, %0d. (See module testbench.\n", nnelts, nelts.size()); $fatal(1); end start_done[-1] = 1; end endmodule module testbench_sz #( int nelts = 100 ) ( output logic done, input uwire start ); localparam int telts = nelts * 2; localparam int idx_sz = $clog2(nelts); localparam int charsz = 8; localparam int charmk = ( 1 << charsz ) - 1; localparam int num_tests = min(nelts,500); localparam int stride = nelts/num_tests; localparam int nmuts = 3; logic [charsz-1:0] char, chars[telts-1:0]; uwire found[nmuts]; uwire [idx_sz-1:0] idx[nmuts]; logic shadow_found; logic [idx_sz-1:0] shadow_idx; lookup_behav #(charsz,nelts) l00(found[0],idx[0],char,chars[nelts-1:0]); lookup_linear #(charsz,nelts) l01(found[1],idx[1],char,chars[nelts-1:0]); lookup_tree #(charsz,nelts) l02(found[2],idx[2],char,chars[nelts-1:0]); string mutnames[] = { "lookup_behav", "lookup_linear", "lookup_tree" }; int err[nmuts]; initial begin automatic int tot_errors = 0; localparam int gap = charmk / telts; chars[0] = {$random} % gap; for ( int i=1; i<telts; i++ ) chars[i] = chars[i-1] + 1 + {$random} % gap; for ( int i=0; i<telts; i++ ) begin automatic int idx = {$random} % telts; {chars[i],chars[idx]} = {chars[idx],chars[i]}; end wait ( start == 1 ); for ( int i=0; i<num_tests; i++ ) begin automatic int idx_try = {$random} % telts; char = chars[idx_try]; shadow_found = idx_try < nelts; shadow_idx = idx_try; #1; for ( int mut=0; mut<3; mut++ ) begin automatic int cr_fnd = shadow_found === found[mut]; automatic int cr_idx = shadow_idx === idx[mut]; if ( cr_fnd && ( shadow_found == 0 || cr_idx ) ) continue; if ( err[mut] > 100 ) break; $write("Mod %s nelts %0d test %3d char %h: wrong %s. Found %h%s%h (correct) idx %4d %s %4d (correct)\n", mutnames[mut], nelts, i, char, cr_idx ? "found" : "index", found[mut], cr_fnd ? "==" : "!=", shadow_found, idx[mut], cr_fnd ? "!=" : "??", shadow_idx); err[mut]++; end end for ( int i=0; i<num_tests; i++ ) tot_errors += err[i]; $write("For nelts %0d performed %0d tests, %0d errors found.\n", nelts, num_tests, tot_errors); done = 1; end endmodule // cadence translate_on