////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2018 Homework 3 -- SOLUTION
//

 /// Assignment  https://www.ece.lsu.edu/koppel/v/2018/hw03.pdf

`default_nettype none

//////////////////////////////////////////////////////////////////////////////
///  All Problems
//
 /// Modify sort2 so that it implements a 2-input sorting network ..
 /// .. using explicit and implicit structural code only.
//
//     [✔] Easy:   Modify to compare keys, not data.
//     [✔] Easy:   Modify to sort pairs of signed integer keys.
//     [✔] Medium: Modify to sort pairs of floating-point keys.
//     [✔] Medium: Modify to sort one signed int and one FP key.
//     [✔] Hard:   Keep cost and critical path low.
//
//     [✔] Use implicit and explicit structural code only.
//     [✔] Use mux2 to swap the values.
//     [✔] Add ChipWare Verilog modules to includes at the end of this file.
//
//     [✔] Module must be synthesizable. Use command: genus -files syn.tcl
//     [✔] Make sure that the testbench does not report errors.
//
//     [✔] Use SimVision for debugging.
//         DMK: Yes, I used SimVision.
//     [✔] Modify testbench to facilitate solution ..
//         .. but code must pass original testbench.

module sort2
  #( int w = 30,
     int k = 16,
     int exp = 5,
     int sig = k - exp - 1 )
   ( output uwire [w-1:0] x0, x1,
     input uwire [w-1:0] a0, a1 );

   /// Encoding of a0 and a1
   //
   // Bits        Contents
   // ---------   ------------------------------------------
   // w-1 : k+1   Data
   // k           Key type: 0, integer;  1, floating point.
   // k-1 : 0     Key, a signed integer; if Key type = 0.
   // k-1         Sign bit, if key type = 1.
   // k-2 : sig   Exponent, if key type = 1.
   // sig-1 : 0   Significand, if key type = 1.

   /// SOLUTION OVERVIEW
   //
   //  Compare integer/integer keys using <= operator.
   //  Compare fp/fp and fp/integer keys using ChipWare comparison operator.

   //  For fp/integer keys that are reported equal use the inexact
   //  status bit.

   // Extract key portion of inputs and assign to a logic signed type.
   //
   uwire signed [k-1:0] k0 = a0[k-1:0];
   uwire signed [k-1:0] k1 = a1[k-1:0];

   // Integer / Integer Comparison
   //
   uwire ci = k0 <= k1;
   //
   // Note that ci is ignored if either input is FP.


   //
   /// Convert Keys to FP
   //

   // Floating-point version of keys. Only valid if key is an integer.
   //
   uwire [k-1:0] kif0, kif1; // Key Integer converted to Float.

   uwire [7:0] si0, si1;  // Status output of integer-to-FP modules.
   localparam logic [2:0] rnd_to_0 = 3'b1;

   // Convert using ChipWare integer two floating-point modules.
   //
   CW_fp_i2flt #( .sig_width(sig), .exp_width(exp), .isize(k), .isign(1) )
     itof0( .z(kif0), .status(si0), .a(k0), .rnd(rnd_to_0) );
   CW_fp_i2flt #( .sig_width(sig), .exp_width(exp), .isize(k), .isign(1) )
     itof1( .z(kif1), .status(si1), .a(k1), .rnd(rnd_to_0) );

   // Extract the inexact bit.
   //
   uwire inexact0 = si0[5], inexact1 = si1[5];
   //
   // If this bit is 1 the converted value is slightly less in
   // magnitude than the original integer value. Less because the
   // round-towards-zero (rnd_to_0) rounding option was selected.

   // Select the FP version of the key.
   //
   uwire [k-1:0] fk0 = a0[k] ? k0 : kif0;
   uwire [k-1:0] fk1 = a1[k] ? k1 : kif1;
   //
   // Note that k0 is selected if the key in a is already FP,
   // otherwise use the output of the conversion module.


   //
   /// Floating Point Comparison
   //
   uwire gt, lt, eq, un;
   uwire [k-1:0] z0, z1; // Unused
   uwire [7:0] s0, s1;

   CW_fp_cmp #( .sig_width(sig), .exp_width(exp), .ieee_compliance(0) )
   fp_comp(.a(fk0),.b(fk1),.agtb(gt),.altb(lt),.aeqb(eq),
           .zctr(1'b0), .unordered(un),.z0(z0),.z1(z1),
           .status0(s0), .status1(s1));

   /// Check whether kif0 <= kif1.
   //
   //  Approximate method:
   //
   uwire cf_approx = lt || eq;  // Unused, shown to explain solution.
   //
   // cf_approx can be wrong when eq is true because of rounding
   // errors when converting an integer to FP. Wire inexact0 is 1 when
   // there was a rounding error converting k0, etc. If eq is 1 then
   // a0 <= a1 iff one of the two cases below is true:
   //
   // Case 1: 
   //   k0 is negative
   //     k0 is FP (so no rounding)
   //     or k0 is an integer and no rounding error in the conversion.
   //     -- otherwise kif0 is larger than the value in a0
   //
   // Case 2: 
   //   k0 is positive
   //     k1 is FP (so no rounding)
   //     or k1 is an integer and no rounding error in the conversion.
   //     -- otherwise kif1 is smaller than the value in a1.

   // Determine whether a0 <= a1 accounting for rounding errors, as
   // described above.
   //
   uwire cf = lt || eq &&
         (   !kif0[k-1] && ( a0[k] || !inexact0 ) // Case 1
           || kif0[k-1] && ( a1[k] || !inexact1 ) // Case 2
             );

   // If at least one input is FP use FP comparison result, else int result.
   //
   uwire c = a0[k] || a1[k] ? cf : ci;

   mux2 #(w) m0(x0,c,a1,a0);
   mux2 #(w) m1(x1,c,a0,a1);

endmodule

module mux2
  #( int w = 4 )
   ( output uwire [w-1:0] x,
     input uwire select,
     input uwire [w-1:0] a0, a1 );
   assign x = select ? a1 : a0;
endmodule



//////////////////////////////////////////////////////////////////////////////
/// Testbench Code
//
//  The code below instantiates some of the modules above,
//  provides test inputs, and verifies the outputs.
//
//  The testbench may be modified to facilitate your solution. For
//  example, one might modify the testbench so that the first tests it
//  performs are those which make it easier to determine what the
//  problem is, for example, test inputs that are all 0's or all 1's.
//
//  Of course, the removal of tests which your module fails is not a
//  method of fixing a broken module.  The TA-bot will test your
//  code using a fresh copy of the testbench, not the one below.


// cadence translate_off

module testbench;

   var bit s[3];
   testbench_size #(32,16,6) t1(s[1],s[0]);
   testbench_size #(24,14,5) t2(s[2],s[1]);

   initial begin
      s[0] = 1;
      wait( s[2] );
      $write("\nAll done.\n");
   end

endmodule



module testbench_size
  #( int w = 30,
     int k = 16,
     int exp = 6,
     int sig_width = k - exp - 1 )
   ( output var bit done, input var bit start );

   localparam int s_pos = k - 1;
   localparam int exp_hi = s_pos - 1;
   localparam int exp_lo = s_pos - exp;
   localparam int sig_hi = exp_lo - 1;
   localparam int bias = ( 1 << exp - 1 ) - 1;
   localparam int exp_i_max = bias + k - 1;
   localparam int exp_max = ( 1 << exp ) - 2;
   localparam int exp_range_ini = exp_i_max - bias;
   localparam int exp_range_gti = exp_max - exp_i_max - 1;

   function real fp_to_val(input logic [k-1:0] a);
      fp_to_val =
        a[exp_hi:0] == 0 ? 0.0 :
          ( ( 1.0 + a[sig_hi:0] / real'( 1 << sig_width ) )
            * 2 ** ( 0.0 + a[exp_hi:exp_lo]-bias )
            * ( a[s_pos] ? -1 : 1 ) );
   endfunction

   localparam int num_tests = 3000000;
   localparam int test_ff_start = num_tests / 3;
   localparam int test_if_start = test_ff_start * 2;

   uwire [w-1:0] x0, x1;
   logic [w-1:0] a[2];
   sort2 #(w,k,exp,sig_width) s2(x0,x1, a[0], a[1]);

   initial begin

      automatic int err_count[string] = '{"ii":0, "ff":0, "if":0 };

      automatic logic [1:0][w-1:0] tests[$];
      /// Add tests below by copying output of testbench.
      //  Note: Tests only work at a particular value of exp, k. 
      //  Put in correct place.
      case ( k )
        14: begin
           tests.push_back('h2a823); tests.push_back('h77b7e);
        end
        16: begin
           tests.push_back('hdc641209); tests.push_back('ha0935641);
        end
      endcase

      wait( start );
      $write("Starting testbench for w=%0d, k=%0d, exp=%0d  sig width=%0d...\n",
             w, k, exp, exp_lo);

      for ( int i=0; i<num_tests; i++ ) begin

         automatic logic [k-1:0] i_1_mask = 1 << {$random} % k;
         automatic string test_type =
           i < test_ff_start ? "ii" :
             i < test_if_start ? "ff" : "if";

         bit fp[2];
         real val[2];
         bit swap;
         logic [w-1:0] shadow_x0, shadow_x1;

         for ( int j=0; j<2; j++ )
           fp[j] = test_type == "ff" || test_type == "if" && ( (i+j) & 1 );

         // FP Options: 0, (0,1), int range, > int range, int, int, int, int

         for ( int j=0; j<2; j++ ) begin
            automatic int fp_sz = fp[j] ? {$random} % 4 : 4;
            a[j][w-1:0] = {$random};
            a[j][k] = fp[j];

            case ( fp_sz )
              0: a[j][exp_hi:0] = 0;
              1: a[j][exp_hi:exp_lo] = 1 + {$random} % bias;
              2: a[j][exp_hi:exp_lo] = bias + {$random} % exp_range_ini;
              3: a[j][exp_hi:exp_lo] = exp_i_max + {$random} % exp_range_gti;
              default:; // For int leave random value.
            endcase
         end

         // Test cases for floating-point pairs.
         if ( a[0][k] && a[1][k] && a[1][exp_hi:0] ) begin
            // Generate fp numbers with matching exponents
            if ( {$random} & 1 ) a[0][exp_hi:exp_lo] = a[1][exp_hi:exp_lo];
            // Generate fp numbers with matching significands.
            if ( {$random} & 1 ) a[0][sig_hi:0] = a[1][sig_hi:0];
         end

         // Test cases for integer pairs.
         if ( !a[0][k] && !a[1][k] ) begin
            case ( {$random} % 6 )

              // Differ by 1
              0: a[1][k-1:0] = a[0][k-1:0] - 1;
              1: a[1][k-1:0] = a[0][k-1:0] + 1;

              // Differ by 2
              2: a[1][k-1:0] = a[0][k-1:0] - 2;
              3: a[1][k-1:0] = a[0][k-1:0] + 2;

              // Sort key in only 1 bit.
              4: begin a[0][k-1:0] &= i_1_mask; a[1][k-1:0] &= i_1_mask; end

              default:;
            endcase
         end

         // Test cases for int/fp keys.
         if ( a[0][k] != a[1][k] ) begin
            automatic int opt = {$random} % 32;
            casex ( opt )
              'h0xxx: if ( a[0][k] ) a[1][k-1:0] = fp_to_val(a[0])+opt[2:0]-4;
              'h1xxx: if ( a[1][k] ) a[0][k-1:0] = fp_to_val(a[1])+opt[2:0]-4;
            endcase

         end

         // Replace the keys found above with user-defined keys, if any.
         if ( tests.size() ) begin
            a[0] = tests.pop_front();
            a[1] = tests.pop_front();
         end

         case ( {a[0][k], a[1][k] } )
           'b00: test_type = "ii";
           'b11: test_type = "ff";
           default: test_type = "if";
         endcase

         for ( int j=0; j<2; j++ )
           val[j] = a[j][k] ? fp_to_val(a[j]) : signed'(a[j][s_pos:0]);

         swap = val[0] > val[1];
         { shadow_x0, shadow_x1 } = { a[swap], a[1-swap] };

         #1;

         if ( shadow_x0 !== x0 || shadow_x1 !== x1 ) begin

            err_count[test_type]++;
            if ( err_count[test_type] > 5 ) continue;

            $write
              ("Test %s %4d, error (x0,x1): (%h,%h) != (%h,%h) correct.\n",
               test_type, i,
               x0, x1, shadow_x0, shadow_x1);
            for ( int j=0; j<2; j++ )
              $write("           a%1d: data %h, key %12.5f = %s %s\n",
                     j, a[j][w-1:k], val[j], a[j][k] ? "FP " : "INT",
                     a[j][k] ?
                     $sformatf("s %b  exp %0d-%0d=%0d  sig 'h%h",
                               a[j][s_pos],
                               a[j][exp_hi:exp_lo], bias,
                               signed'({1'b0,a[j][exp_hi:exp_lo]}) - bias,
                               a[j][sig_hi:0])
                     : $sformatf("'h%h",a[j][k-1:0]));
            $write("           To re-run paste:  tests.push_back('h%h); tests.push_back('h%h);\n",
                   a[0],a[1]);
         end

      end

      $write("Done with %0d tests for k=%0d, exp=%0d:", num_tests,k,exp);
      foreach ( err_count[et] )
        $write("   %0d %s errs,", err_count[et], et);
      $write("\n");

      done = 1;

   end

endmodule



// cadence translate_on

`default_nettype wire

`include "/apps/linux/cadence/GENUS171/share/synth/lib/chipware/sim/verilog/CW/CW_fp_cmp.v"
`include "/apps/linux/cadence/GENUS171/share/synth/lib/chipware/sim/verilog/CW/CW_fp_i2flt.v"