////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2017 Homework 2 -- SOLUTION
//

 /// Assignment  http://www.ece.lsu.edu/koppel/v/2017/hw02.pdf

//////////////////////////////////////////////////////////////////////////////
///  Problem 1 -- SOLUTION
//
 /// Modify interp so that it performs linear interpolation. See the handout
 /// and module interp_behav.
//
//     [✔] Make sure that the testbench does not report errors.
//     [✔] Module must be synthesizable.
//     [✔] Module must do some FP arithmetic.
//     [✔] Modify include statements (at end) for any new ChipWare modules.

`default_nettype none

module interp
  #( int jw = 12, int amax = 255 )
   ( output uwire valid,
     output uwire [7:0] aj,
     input uwire [31:0] x1, a1, x2, a2,
     input uwire [jw-1:0] j );

   localparam logic [2:0] rnd_even = 3'b000; // Round to closest. Default.

   uwire [jw:0] x1i, x2i;

   /// SOLUTION

   /// First, generate the valid signal.

   // Convert x1 and x2 to integers.
   //
   fp_ftoi #( jw+1 ) ftoi1(x1i, x1);
   fp_ftoi #( jw+1 ) ftoi2(x2i, x2);
   //
   // Note: Since the ChipWare float-to-int module can only convert to
   // a signed integer and x is unsigned need to make the integer one
   // bit wider to accommodate the sign bit that we won't need.
   // Otherwise, values >= 2^{jw-1}, for the default, 2^11 = 2048,
   // will be clamped to the maximum 12-bit signed representation,
   // 2047.

   // Check whether j is between x1 and x2.
   //
   assign valid = x1i + j <= x2i;

   //
   /// Perform the interpolation: aj = a1 + j * ( a2 - a1 ) / ( x2 - x1 )
   //

   uwire [31:0] delta_x, delta_a, dadx, jr, jdadx, ajr;
   uwire [7:0] status[2]; // Unused status connections for CW modules.

   fp_sub sdx(delta_x, x2, x1);
   fp_sub sda(delta_a, a2, a1);

   CW_fp_div div
     ( .status(status[0]), .z(dadx), .a(delta_a), .b(delta_x), .rnd(rnd_even) );

   fp_itof #(jw) itof(jr,j);
   //
   // Note: Module performs an unsigned conversion, so we don't need to
   // widen j by one bit. See ftoi3 below and ftoi1 and ftoi2 above.

   CW_fp_mult mul
     ( .status(status[1]), .z(jdadx), .a(jr), .b(dadx), .rnd(rnd_even) );

   fp_add add(ajr,a1,jdadx);

   /// Convert the interpolated value to an integer and clamp it between
   // 0 and amax.

   // Declare aji signed so that the comparison operator works correctly
   // for aji < 0.
   //
   uwire signed [8:0] aji;

   fp_ftoi #( 9 ) ftoi3( aji, ajr );

   assign     aj = aji < 0 ? 0 : aji > amax ? amax : aji[7:0];
   //
   // Note that when amax is 255 the clamp isn't necessary
   // because the float-to-int module clamps to the maximum representable
   // value, which is 255 for a 9-bit signed integer.

endmodule


module fp_itof
  #( int wid = 10, logic i_is_signed = 0 )
   ( output uwire [31:0] f, input uwire [wid-1:0] i);

   uwire [7:0] status;
   localparam logic [2:0] rnd_even = 3'b000;

   CW_fp_i2flt #( .isize(wid), .isign(i_is_signed) )
     itof ( .status(status), .a(i), .z(f), .rnd(rnd_even) );
endmodule



//////////////////////////////////////////////////////////////////////////////
/// Convenience wrappers around ChipWare modules.
///
 //  Feel free to define additional modules.
 //  See http://www.ece.lsu.edu/v/ref.html for ChipWare documentation.

module fp_add(output uwire [31:0] x, input uwire [31:0] a, b );
   uwire [7:0] status;
   localparam logic [2:0] rnd_even = 3'b000; // Round to closest. Default.
   CW_fp_add add( .status(status), .z(x), .a(a), .b(b), .rnd(rnd_even) );
endmodule

module fp_sub(output uwire [31:0] x, input uwire [31:0] a, b );
   uwire [7:0] status;
   localparam logic [2:0] rnd_even = 3'b000; // Round to closest. Default.
   CW_fp_sub sub( .status(status), .z(x), .a(a), .b(b), .rnd(rnd_even) );
endmodule

module fp_ftoi
  #( int wid = 10 )
   ( output uwire [wid-1:0] i, input uwire [31:0] f);

   uwire [7:0] status;
   localparam logic [2:0] rnd_even = 3'b000; // Round to closer integer.
   localparam logic [2:0] rnd_trun = 3'b001; // Round towards zero. (truncate)
   localparam logic [2:0] rnd_minf = 3'b011; // Round towards -infinity.

   CW_fp_flt2i #( .isize(wid) ) ftoi
     ( .status(status), .z(i), .a(f), .rnd(rnd_trun) );
endmodule

//////////////////////////////////////////////////////////////////////////////
/// Behavioral Interpolation Module
//
//  Module below is correct but not synthesizable.


// cadence translate_off

module interp_behav
  #( int jw = 12,
     int amax = 255 )
   ( output logic valid,
     output logic [7:0] aj,
     input uwire [31:0] x1, a1, x2, a2,
     input uwire [jw-1:0] j );

   always_comb begin

      automatic shortreal x1r = $bitstoshortreal(x1);
      automatic shortreal x2r = $bitstoshortreal(x2);
      automatic shortreal a1r = $bitstoshortreal(a1);
      automatic shortreal a2r = $bitstoshortreal(a2);

      automatic int x1i = $floor(x1r);
      automatic int x2i = $floor(x2r);
      automatic int xj = x1i + j;

      shortreal dadx, ajr;

      valid = xj <= x2i;

      dadx = ( a2r - a1r ) / ( x2r - x1r );
      ajr = a1r + j * dadx;
      aj = ajr < 0 ? 0 : ajr > amax ? amax : $floor(ajr);

   end

endmodule


//////////////////////////////////////////////////////////////////////////////
/// Testbench Code
//
//  The code below instantiates some of the modules above,
//  provides test inputs, and verifies the outputs.
//
//  The testbench may be modified to facilitate your solution. Of
//  course, the removal of tests which your module fails is not a
//  method of fixing a broken module. (One might modify the testbench
//  so that the first tests it performs are those which make it easier
//  to determine what the problem is, for example, test inputs that
//  are all 0's or all 1's.)


module testbench();

   localparam bit trunc_x1 = 1;

   localparam int err_max_display = 20;
   localparam shortreal tolerance = 0.0001;

   localparam int num_tests = 2000;
   localparam int xmin = 0;
   localparam int xmax = 3839;
   localparam longint rand_max = longint'(1) << 32;
   localparam shortreal xscale = shortreal'(xmax) / rand_max;
   localparam shortreal short_len = 5;
   localparam shortreal short_scale = short_len / rand_max;

   localparam int amax = 255;
   localparam shortreal ascale = shortreal'(amax) / rand_max;

   localparam int jw = 12;

   typedef struct
     {
      string name;
      int err_valid = 0;
      int err_aj = 0;
      } Info;
   Info muts[int];
   task new_interp(input int idx, input string name);
      muts[idx].name = name;
   endtask

   localparam int mut_n_max = 5;

   logic [jw-1:0] mj;
   uwire        mvalid[mut_n_max];
   uwire [7:0] maj[mut_n_max];
   logic [31:0] mx1, mx2, ma1, ma2;

   interp_behav #(jw) i0(mvalid[0], maj[0], mx1, ma1, mx2, ma2, mj);
   initial new_interp(0,"interp_behav");
   interp #(jw) i1(mvalid[1], maj[1], mx1, ma1, mx2, ma2, mj);
   initial new_interp(1,"interp");

   initial begin

      for ( int i=0; i<num_tests; i++ ) begin

         automatic bit short_line = $random & 1;
         automatic shortreal x[] = { {$random} * xscale, {$random} * xscale };
         shortreal len1;
         shortreal x1, x2, a1, a2, dadx;
         int x1i, x2i;
         int npts;
         x.sort();
         len1 = x[1] - x[0];
         if ( short_line && len1 > short_len )
           x[1] = x[0] + {$random} * short_scale;

         if ( trunc_x1 ) x[0] = $floor(x[0]);
         x1 = x[0];  x2 = x[1];
         mx1 = $shortrealtobits(x1);
         mx2 = $shortrealtobits(x2);

         a1 = {$random} * ascale;
         a2 = {$random} * ascale;
         ma1 = $shortrealtobits(a1);
         ma2 = $shortrealtobits(a2);

         dadx = ( a2 - a1 ) / ( x2 - x1 );

         x1i = $floor(x1);
         x2i = $floor(x2);
         npts = x2i - x1i + 1;

         for ( int j=0; j<npts+10; j++ ) begin

            automatic shortreal aj = a1 + ( x1i + j - x1 ) * dadx;
            automatic int aji = aj < 0 ? 0 : aj > amax ? amax : $floor(aj);
            automatic shortreal ajfrac = aj - aji;
            automatic int tol =
              ajfrac < tolerance ? -1 : ajfrac > 1 - tolerance ? 1 : 0;
            automatic int ajalt = aji + tol;
            automatic logic valid = j < npts;
            mj = j;

            #1;

            foreach ( muts[m] ) begin

               if ( mvalid[m] !== valid ) begin
                  if ( muts[m].err_valid < err_max_display )
                    $write("Err in %s for %4.1f, %4.1f, j=%0d, valid %0d != %0d (correct)\n",
                           muts[m].name, x1, x2, j, mvalid[m], valid );
                  muts[m].err_valid++;
               end
               if ( valid && mvalid[m] && maj[m] !== aji && maj[m] !== ajalt )
                 begin
                    if ( muts[m].err_aj < err_max_display )
                      $write("Err in %s for %4.1f, %4.1f, j=%0d, aj=%.4f  %0d != %0d (correct)\n",
                             muts[m].name, a1, a2, j, aj, maj[m], aji );
                    muts[m].err_aj++;
                 end
            end
         end

      end

      foreach ( muts[m] )
        $write("Done with tests for %s, %0d + %0d errors.\n",
             muts[m].name,muts[m].err_valid, muts[m].err_aj);

   end

endmodule

// cadence translate_on

`default_nettype wire

`include "/apps/linux/cadence/RC142/share/synth/lib/chipware/sim/verilog/CW/CW_fp_add.v"
`include "/apps/linux/cadence/RC142/share/synth/lib/chipware/sim/verilog/CW/CW_fp_flt2i.v"

`include "/apps/linux/cadence/RC142/share/synth/lib/chipware/sim/verilog/CW/CW_fp_sub.v"
`include "/apps/linux/cadence/RC142/share/synth/lib/chipware/sim/verilog/CW/CW_fp_mult.v"
`include "/apps/linux/cadence/RC142/share/synth/lib/chipware/sim/verilog/CW/CW_fp_div.v"
`include "/apps/linux/cadence/RC142/share/synth/lib/chipware/sim/verilog/CW/CW_fp_i2flt.v"