////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2018 Homework 1 -- SOLUTION
//

 /// Assignment  https://www.ece.lsu.edu/koppel/v/2018/hw01.pdf

`default_nettype none

//////////////////////////////////////////////////////////////////////////////
///  Problem 1
//
 /// Modify sort2 so that it implements a 2-input sorting network ..
 /// .. using explicity structural code only.
//
//     [✔] Make sure that the testbench does not report errors.
//     [✔] Use structural code only: including mux2 and compare_le.
//     [✔] Pay attention to bit widths.
//     [✔] Module must be synthesizable.

module sort2
  #( int w = 8 )
   ( output uwire [w-1:0] x0, x1,
     input uwire [w-1:0] a0, a1 );

   /// SOLUTION
   //
   //  The only common problem was forgetting to specify the width.

   uwire c;
   compare_le #(w) comp(c, a0, a1);

   mux2 #(w) m0(x1,c,a0,a1);
   mux2 #(w) m1(x0,c,a1,a0);

endmodule

module mux2
  #( int w = 4 )
   ( output uwire [w-1:0] x,
     input uwire select,
     input uwire [w-1:0] a0, a1 );
   assign x = select ? a1 : a0;
endmodule

module compare_le
  #( int w = 2 )
   ( output uwire c,
     input uwire [w-1:0] a, b );
   assign c = a <= b;
endmodule


module sort2_is
  #( int w = 8 )
   ( output uwire [w-1:0] x0, x1,
     input uwire [w-1:0] a0, a1 );

   assign {x0, x1} = a0 <= a1 ? { a0, a1 } : { a1, a0 };

endmodule


//////////////////////////////////////////////////////////////////////////////
///  Problem 2
//
 /// Modify sort4 so that it implements a 4-input sorting network ..
 /// .. using explicity structural code only.
//
//     [✔] Make sure that the testbench does not report errors.
//     [✔] Use structural code only: including sort2 and sort3 modules.
//     [✔] Pay attention to bit widths.
//     [✔] Module must be synthesizable.
//     [✔] The critical path length should be 4 or fewer sort2 modules.


module sort4
  #( int w = 8 )
   ( output uwire [w-1:0] x0, x1, x2, x3,
     input uwire [w-1:0] a0, a1, a2, a3 );

   /// SOLUTION

   uwire [w-1:0] s10, s11, s12, s13, s20;

   // Connect sort2 modules into a tree, so that the longest path
   // is through 2 sort2 modules.
   //
   sort2 #(w) s2( s10, s11,  a0, a1 );
   sort2 #(w) s3( s12, s13,  a2, a3 );
   sort2 #(w) s4( s20, x3,  s11, s13 );

   // Note that s20 will be available later than s10 and s12 because
   // the path from a0 - a3 to s20 go through two sort2 modules,
   // whereas the paths from a0 - a3 to s10 and s12 only pass through
   // one sort2 module. Since s20 will be available later connect it
   // to sort3 port a2, which is closer to the outputs.
   //
   sort3 #(w) s1( x0, x1, x2,  s10, s12, s20 );
   //
   // Note that module would be slower if s12 and s20 connections above
   // were reversed:
   //  sort3 #(w) s1( x0, x1, x2,  s10, s20, s12 );

endmodule


module sort3
  #( int w = 8 )
   ( output uwire [w-1:0] x0, x1, x2,
     input uwire [w-1:0] a0, a1, a2 );

   uwire [w-1:0] i10, i11, i21;

   sort2 #(w) s0_01( i10, i11,   a0, a1 );
   sort2 #(w) s1_12( i21, x2,  i11, a2  );
   sort2 #(w) s2_01( x0, x1,  i10, i21  );

endmodule


//////////////////////////////////////////////////////////////////////////////
/// Testbench Code
//
//  The code below instantiates some of the modules above,
//  provides test inputs, and verifies the outputs.
//
//  The testbench may be modified to facilitate your solution. For
//  example, one might modify the testbench so that the first tests it
//  performs are those which make it easier to determine what the
//  problem is, for example, test inputs that are all 0's or all 1's.
//
//  Of course, the removal of tests which your module fails is not a
//  method of fixing a broken module.  The TA-bot will test your
//  code using a fresh copy of the testbench, not the one below.


// cadence translate_off

module sortx
  #( int max_size = 5,
     int modnum = 0,
     int w = 8,
     int max_muts = 3)
   ( output uwire [w-1:0] xlong[max_muts][max_size],
     input uwire [w-1:0] a[max_size] );

   uwire [w-1:0] x[max_size];
   assign xlong[modnum] = x;

   if ( modnum == 0 ) begin:A

      localparam int s_size = 2;
      localparam string name = "sort2";
      sort2 #(w) s(x[0],x[1],a[0],a[1]);

   end else if ( modnum == 1 ) begin:A

      localparam int s_size = 2;
      localparam string name = "sort2_is";
      sort2_is #(w) s(x[0],x[1],a[0],a[1]);

   end else if ( modnum == 2 ) begin:A

      localparam int s_size = 3;
      localparam string name = "sort3";
      sort3 #(w) s(x[0],x[1],x[2],a[0],a[1],a[2]);

   end else begin:A

      localparam int s_size = 4;
      localparam string name = "sort4";
      sort4 #(w) s(x[0],x[1],x[2],x[3],a[0],a[1],a[2],a[3]);

   end

endmodule


module testbench;

   localparam int w = 8;
   localparam int n_tests = 100;
   localparam int max_size = 4;
   localparam int max_muts = 4;

   logic [w-1:0] a[max_size];
   uwire [w-1:0] x[max_muts][max_size];

   typedef struct { int idx; string name; int s_size; } Info;
   Info pi[$];

   for ( genvar i=0; i<4; i++ ) begin
      sortx #(max_size,i,w,max_muts) s(x,a);
      initial pi.push_back( '{ i, s.A.name, s.A.s_size} );
   end

   initial begin

      automatic int g_elt_err_count = 0;
      automatic int g_sort_err_count = 0;

      $write("Starting testbench.");

      // Initialize the input to a recognizable pattern, which should
      // be overwritten but if not, we can tell. If we print the value in
      // hex.
      for ( int e = 0; e < max_size; e++ ) a[e] = 'haaaaaaaa;

      foreach ( pi[idx] ) begin

         automatic Info p = pi[idx];
         automatic string mut = p.name;
         automatic int s_size = p.s_size;
         automatic logic [w-1:0] shadow[] = new[s_size];

         for ( int i = 0;  i < n_tests;  i++ ) begin

            automatic int this_elt_err_count = 0;

            // To make sure that the comparison is correct restrict the
            // key to a subset of bits.
            automatic int n_bits = {$random} % w + 1;
            automatic int mask = ( 1 << n_bits ) - 1;

            for ( int i=0; i<w; i++ ) begin
               automatic int b = {$random} % w;
               {mask[b],mask[i]} = {mask[i],mask[b]};
            end

            for ( int e = 0; e < s_size; e++ )
              begin a[e] = {$random} & mask; shadow[e] = a[e]; end

            #1;

            shadow.sort();

            for ( int e = 0; e < s_size; e++ ) begin
               automatic logic [w-1:0] elt = x[p.idx][e];
               if ( shadow[e] === elt ) continue;
               this_elt_err_count++;
               g_elt_err_count++;
               if ( g_elt_err_count > 5 ) continue;
               $write
                 ("Mod %s, sort %2d index %2d, wrong elt %d != %d (correct)\n",
                  mut,i, e, elt, shadow[e]);
            end

            if ( this_elt_err_count ) g_sort_err_count++;

         end

         $write("Tests for %s done, errors in %d of %d sorts.\n",
                mut,  g_sort_err_count, n_tests);

      end

   end

endmodule

// cadence translate_on