```////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2018 Homework 1 -- SOLUTION
//

/// Assignment  https://www.ece.lsu.edu/koppel/v/2018/hw01.pdf

`default_nettype none

//////////////////////////////////////////////////////////////////////////////
///  Problem 1
//
/// Modify sort2 so that it implements a 2-input sorting network ..
/// .. using explicity structural code only.
//
//     [✔] Use structural code only: including mux2 and compare_le.
//     [✔] Pay attention to bit widths.
//     [✔] Module must be synthesizable.

module sort2
#( int w = 8 )
( output uwire [w-1:0] x0, x1,
input uwire [w-1:0] a0, a1 );

/// SOLUTION
//
//  The only common problem was forgetting to specify the width.

uwire c;
compare_le #(w) comp(c, a0, a1);

mux2 #(w) m0(x1,c,a0,a1);
mux2 #(w) m1(x0,c,a1,a0);

endmodule

module mux2
#( int w = 4 )
( output uwire [w-1:0] x,
input uwire select,
input uwire [w-1:0] a0, a1 );
assign x = select ? a1 : a0;
endmodule

module compare_le
#( int w = 2 )
( output uwire c,
input uwire [w-1:0] a, b );
assign c = a <= b;
endmodule

module sort2_is
#( int w = 8 )
( output uwire [w-1:0] x0, x1,
input uwire [w-1:0] a0, a1 );

assign {x0, x1} = a0 <= a1 ? { a0, a1 } : { a1, a0 };

endmodule

//////////////////////////////////////////////////////////////////////////////
///  Problem 2
//
/// Modify sort4 so that it implements a 4-input sorting network ..
/// .. using explicity structural code only.
//
//     [✔] Use structural code only: including sort2 and sort3 modules.
//     [✔] Pay attention to bit widths.
//     [✔] Module must be synthesizable.
//     [✔] The critical path length should be 4 or fewer sort2 modules.

module sort4
#( int w = 8 )
( output uwire [w-1:0] x0, x1, x2, x3,
input uwire [w-1:0] a0, a1, a2, a3 );

/// SOLUTION

uwire [w-1:0] s10, s11, s12, s13, s20;

// Connect sort2 modules into a tree, so that the longest path
// is through 2 sort2 modules.
//
sort2 #(w) s2( s10, s11,  a0, a1 );
sort2 #(w) s3( s12, s13,  a2, a3 );
sort2 #(w) s4( s20, x3,  s11, s13 );

// Note that s20 will be available later than s10 and s12 because
// the path from a0 - a3 to s20 go through two sort2 modules,
// whereas the paths from a0 - a3 to s10 and s12 only pass through
// one sort2 module. Since s20 will be available later connect it
// to sort3 port a2, which is closer to the outputs.
//
sort3 #(w) s1( x0, x1, x2,  s10, s12, s20 );
//
// Note that module would be slower if s12 and s20 connections above
// were reversed:
//  sort3 #(w) s1( x0, x1, x2,  s10, s20, s12 );

endmodule

module sort3
#( int w = 8 )
( output uwire [w-1:0] x0, x1, x2,
input uwire [w-1:0] a0, a1, a2 );

uwire [w-1:0] i10, i11, i21;

sort2 #(w) s0_01( i10, i11,   a0, a1 );
sort2 #(w) s1_12( i21, x2,  i11, a2  );
sort2 #(w) s2_01( x0, x1,  i10, i21  );

endmodule

//////////////////////////////////////////////////////////////////////////////
/// Testbench Code
//
//  The code below instantiates some of the modules above,
//  provides test inputs, and verifies the outputs.
//
//  The testbench may be modified to facilitate your solution. For
//  example, one might modify the testbench so that the first tests it
//  performs are those which make it easier to determine what the
//  problem is, for example, test inputs that are all 0's or all 1's.
//
//  Of course, the removal of tests which your module fails is not a
//  method of fixing a broken module.  The TA-bot will test your
//  code using a fresh copy of the testbench, not the one below.

module sortx
#( int max_size = 5,
int modnum = 0,
int w = 8,
int max_muts = 3)
( output uwire [w-1:0] xlong[max_muts][max_size],
input uwire [w-1:0] a[max_size] );

uwire [w-1:0] x[max_size];
assign xlong[modnum] = x;

if ( modnum == 0 ) begin:A

localparam int s_size = 2;
localparam string name = "sort2";
sort2 #(w) s(x[0],x[1],a[0],a[1]);

end else if ( modnum == 1 ) begin:A

localparam int s_size = 2;
localparam string name = "sort2_is";
sort2_is #(w) s(x[0],x[1],a[0],a[1]);

end else if ( modnum == 2 ) begin:A

localparam int s_size = 3;
localparam string name = "sort3";
sort3 #(w) s(x[0],x[1],x[2],a[0],a[1],a[2]);

end else begin:A

localparam int s_size = 4;
localparam string name = "sort4";
sort4 #(w) s(x[0],x[1],x[2],x[3],a[0],a[1],a[2],a[3]);

end

endmodule

module testbench;

localparam int w = 8;
localparam int n_tests = 100;
localparam int max_size = 4;
localparam int max_muts = 4;

logic [w-1:0] a[max_size];
uwire [w-1:0] x[max_muts][max_size];

typedef struct { int idx; string name; int s_size; } Info;
Info pi[\$];

for ( genvar i=0; i<4; i++ ) begin
sortx #(max_size,i,w,max_muts) s(x,a);
initial pi.push_back( '{ i, s.A.name, s.A.s_size} );
end

initial begin

automatic int g_elt_err_count = 0;
automatic int g_sort_err_count = 0;

\$write("Starting testbench.");

// Initialize the input to a recognizable pattern, which should
// be overwritten but if not, we can tell. If we print the value in
// hex.
for ( int e = 0; e < max_size; e++ ) a[e] = 'haaaaaaaa;

foreach ( pi[idx] ) begin

automatic Info p = pi[idx];
automatic string mut = p.name;
automatic int s_size = p.s_size;
automatic logic [w-1:0] shadow[] = new[s_size];

for ( int i = 0;  i < n_tests;  i++ ) begin

automatic int this_elt_err_count = 0;

// To make sure that the comparison is correct restrict the
// key to a subset of bits.
automatic int n_bits = {\$random} % w + 1;
automatic int mask = ( 1 << n_bits ) - 1;

for ( int i=0; i<w; i++ ) begin
automatic int b = {\$random} % w;
end

for ( int e = 0; e < s_size; e++ )

#1;

for ( int e = 0; e < s_size; e++ ) begin
automatic logic [w-1:0] elt = x[p.idx][e];
if ( shadow[e] === elt ) continue;
this_elt_err_count++;
g_elt_err_count++;
if ( g_elt_err_count > 5 ) continue;
\$write
("Mod %s, sort %2d index %2d, wrong elt %d != %d (correct)\n",
end

if ( this_elt_err_count ) g_sort_err_count++;

end

\$write("Tests for %s done, errors in %d of %d sorts.\n",
mut,  g_sort_err_count, n_tests);

end

end

endmodule