```////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2019 Homework 1 -- SOLUTION
//

/// Assignment  https://www.ece.lsu.edu/koppel/v/2019/hw01.pdf

`default_nettype none

//////////////////////////////////////////////////////////////////////////////
///  Problem 1 -- SOLUTION
//
/// Modify mult16_tree, mult8_tree, and mult4_tree to implement multiplier.
///
//
//     [✔] mult16_tree must use exactly two mult8_tree modules, etc.
//     [✔] Pay attention to port widths. Do not make them larger than needed.
//     [✔] Module must be synthesizable. Use command: genus -files syn.tcl

module mult16_tree
#( int wa = 16, int wb = 16, int wp = wa + wb )
( output uwire [31:0] prod,
input uwire [15:0] a,
input uwire [15:0] b );

/// Problem 1 solution goes here, and in other modules.
//  [✔] Instantiate two mult8_tree's.
//  [✔] Use implicit structural or behavioral code to combine their outputs.

/// SOLUTION

// Declare properly-sized connections to mult8_tree outputs.
//
uwire [23:0] prod_lo, prod_hi;
//
// They are 24 bits wide because that's the maximum size of the
// product of an 8-bit unsigned integer (such as a[7:0]) and a
// 16-bit unsigned integer (b): 8+16 =24.

// Instantiate two mult8_tree multipliers, each handles 8 bits of a.
//
mult8_tree mlo( prod_lo, a[7:0],  b);
mult8_tree mhi( prod_hi, a[15:8], b);

// Compute the full product using the two partial products.
//
assign prod = prod_lo + ( prod_hi << 8 );
//
// Because prod is 32-bits wide the right-hand side computation
// will be computed with a 32-bit precision.

endmodule

module mult8_tree
( output uwire [23:0] prod,
input uwire [7:0] a,
input uwire [15:0] b );
//  [✔] Pay attention to port widths. Do not make them larger than needed.

/// Problem 1 solution goes here, and in other modules.
//  [✔] Instantiate two mult4_tree's.
//  [✔] Use implicit structural or behavioral code to combine their outputs.

/// SOLUTION
//
// See the solution comments description in mult16_tree.

uwire [19:0] prod_lo, prod_hi;
mult4_tree mlo( prod_lo, a[3:0], b);
mult4_tree mhi( prod_hi, a[7:4], b);
assign prod = prod_lo + ( prod_hi << 4 );

endmodule

module mult4_tree
( output uwire [19:0] prod,
input uwire [3:0] a,
input uwire [15:0] b );
//  [✔] Pay attention to port widths. Do not make them larger than needed.

/// Problem 1 solution goes here, and in other modules.
//  [✔] Use implicit structural or behavioral code to combine their outputs.

/// SOLUTION
//
// See the solution comments description in mult16_tree.

uwire [17:0] prod_lo, prod_hi;

mult2 mlo( prod_lo, a[1:0], b);
mult2 mhi( prod_hi, a[3:2], b);
assign prod = prod_lo + ( prod_hi << 2 );

endmodule

/// Bonus Solution:
module mult_tree
#( int wa = 16, int wb = 16, int wp = wa + wb )
( output uwire [wp:1] prod,
input uwire [wa:1] a,
input uwire [wb:1] b );

/// BONUS SOLUTION
//
//  This answers a question that was almost but not quite asked:
//    Using generate statements design a single module that can be
//    instantiated into a module equivalent to mult16_tree,
//    mult8_tree, mult4_tree, and mult2, and also mult32_tree, etc.

if ( wa == 1 ) begin

// Terminal case: 1 bit partial product.
//
assign prod = a ? b : 0;
//
// Equivalent to: prod = a * b;

end else begin

// Split a in half and recursively instantiate a module for each
// half.

localparam int wn = wa / 2;
localparam int wx = wb + wn;

uwire [wx:1] prod_lo, prod_hi;

mult_tree #(wn,wb) mlo( prod_lo, a[wn:1], b);
mult_tree #(wn,wb) mhi( prod_hi, a[wa:wn+1], b);

// Combine the partial products.
//
assign prod = prod_lo + ( prod_hi << wn );

end

endmodule

/// Do not modify the code below this point.

module mult2
( output uwire [17:0] prod, input uwire [1:0] a, input uwire [15:0] b );

/// DO NOT MODIFY THIS ROUTINE.
assign prod = a * b;

endmodule

module mult16_flat
#( int wa = 16, int wb = 16, int wp = wa + wb )
( output uwire [31:0] prod, input uwire [15:0] a, b );

/// DO NOT MODIFY THIS ROUTINE.

`ifdef NEVER_DEFINE_ME
// Emacs Lisp code to generate Verilog code for mult16_flat.
(cl-loop for i from 0 to 14 by 2
concat (if (= i 0) "   assign prod = prod00"
(format " + ( prod%02d << %d )" i i)) into prod
concat (format "%s prod%02d" (if (= i 0) "" ",") i) into decl
concat (format "   mult2 m%d( prod%02d, a[%d:%d], b);\n" i i (+ i 1) i)
into inst
finally (insert (concat "\n   uwire [17:0]" decl ";\n" inst "\n" prod ";\n")))
`endif

uwire [17:0]  prod00, prod02, prod04, prod06, prod08, prod10, prod12, prod14;
mult2 m0( prod00, a[1:0], b);
mult2 m2( prod02, a[3:2], b);
mult2 m4( prod04, a[5:4], b);
mult2 m6( prod06, a[7:6], b);
mult2 m8( prod08, a[9:8], b);
mult2 m10( prod10, a[11:10], b);
mult2 m12( prod12, a[13:12], b);
mult2 m14( prod14, a[15:14], b);

assign prod = prod00 + ( prod02 << 2 ) + ( prod04 << 4 ) + ( prod06 << 6 ) + ( prod08 << 8 ) + ( prod10 << 10 ) + ( prod12 << 12 ) + ( prod14 << 14 );

endmodule

module mult_operator
#( int wa = 16, int wb = 16, int wp = wa + wb )
( output uwire [wp:1] prod, input uwire [wa:1] a, input uwire [wb:1] b );
/// DO NOT MODIFY THIS ROUTINE.
assign prod = a * b;
endmodule

//////////////////////////////////////////////////////////////////////////////
/// Testbench Code

module testbench;

localparam int wid = 16;
localparam int num_tests = 1000;
localparam int NUM_MULT = 4;
localparam int err_limit = 7;

logic [wid-1:0] plier, cand;

mult_operator mb0(prod[0], plier, cand);
mult16_flat mb1(prod[1], plier, cand);
mult16_tree mb2(prod[2], plier, cand);
multw_tree #(wid,wid) mb3(prod[3], plier, cand);

string names[] = '{ "mult_operator", "mult16", "tree16", "treep"  };

int err_cnt[NUM_MULT];

// Array of multiplier/multiplicand values to try out.
// After these values are used a random number generator will be used.
//
int tests[\$] = {1,1, 1,2,  1,32,  32, 1};

initial begin

\$display("Starting testbench.\n");

for ( int i=0; i<num_tests; i++ ) begin

// Set multiplier and multiplicand values.
//
plier = tests.size() ? tests.pop_front() : \$random();
cand = tests.size() ? tests.pop_front() : \$random();

#10;

// Make sure each module's output is correct.
//
for ( int mut=0; mut<NUM_MULT; mut++ ) begin

if ( shadow_prod !== prod[mut] ) begin

err_cnt[mut]++;

if ( err_cnt[mut] < err_limit )
\$display("Error in %s test %4d:  %x != %x (correct)\n",
end

end

end

//
for ( int mut=0; mut<NUM_MULT; mut++ ) begin

\$display("Mut %s, %d errors (%.1f%% of tests)\n",
names[mut], err_cnt[mut],
100.0 * err_cnt[mut]/real'(num_tests) );

end

\$finish(2);

end

endmodule