```////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2017 Homework 2 -- SOLUTION
//

/// Assignment  http://www.ece.lsu.edu/koppel/v/2017/hw02.pdf

//////////////////////////////////////////////////////////////////////////////
///  Problem 1 -- SOLUTION
//
/// Modify interp so that it performs linear interpolation. See the handout
/// and module interp_behav.
//
//     [✔] Make sure that the testbench does not report errors.
//     [✔] Module must be synthesizable.
//     [✔] Module must do some FP arithmetic.
//     [✔] Modify include statements (at end) for any new ChipWare modules.

`default_nettype none

module interp
#( int jw = 12, int amax = 255 )
( output uwire valid,
output uwire [7:0] aj,
input uwire [31:0] x1, a1, x2, a2,
input uwire [jw-1:0] j );

localparam logic [2:0] rnd_even = 3'b000; // Round to closest. Default.

uwire [jw:0] x1i, x2i;

/// SOLUTION

/// First, generate the valid signal.

// Convert x1 and x2 to integers.
//
fp_ftoi #( jw+1 ) ftoi1(x1i, x1);
fp_ftoi #( jw+1 ) ftoi2(x2i, x2);
//
// Note: Since the ChipWare float-to-int module can only convert to
// a signed integer and x is unsigned need to make the integer one
// bit wider to accommodate the sign bit that we won't need.
// Otherwise, values >= 2^{jw-1}, for the default, 2^11 = 2048,
// will be clamped to the maximum 12-bit signed representation,
// 2047.

// Check whether j is between x1 and x2.
//
assign valid = x1i + j <= x2i;

//
/// Perform the interpolation: aj = a1 + j * ( a2 - a1 ) / ( x2 - x1 )
//

uwire [31:0] delta_x, delta_a, dadx, jr, jdadx, ajr;
uwire [7:0] status[2]; // Unused status connections for CW modules.

fp_sub sdx(delta_x, x2, x1);
fp_sub sda(delta_a, a2, a1);

CW_fp_div div
( .status(status[0]), .z(dadx), .a(delta_a), .b(delta_x), .rnd(rnd_even) );

fp_itof #(jw) itof(jr,j);
//
// Note: Module performs an unsigned conversion, so we don't need to
// widen j by one bit. See ftoi3 below and ftoi1 and ftoi2 above.

CW_fp_mult mul
( .status(status[1]), .z(jdadx), .a(jr), .b(dadx), .rnd(rnd_even) );

/// Convert the interpolated value to an integer and clamp it between
// 0 and amax.

// Declare aji signed so that the comparison operator works correctly
// for aji < 0.
//
uwire signed [8:0] aji;

fp_ftoi #( 9 ) ftoi3( aji, ajr );

assign     aj = aji < 0 ? 0 : aji > amax ? amax : aji[7:0];
//
// Note that when amax is 255 the clamp isn't necessary
// because the float-to-int module clamps to the maximum representable
// value, which is 255 for a 9-bit signed integer.

endmodule

module fp_itof
#( int wid = 10, logic i_is_signed = 0 )
( output uwire [31:0] f, input uwire [wid-1:0] i);

uwire [7:0] status;
localparam logic [2:0] rnd_even = 3'b000;

CW_fp_i2flt #( .isize(wid), .isign(i_is_signed) )
itof ( .status(status), .a(i), .z(f), .rnd(rnd_even) );
endmodule

//////////////////////////////////////////////////////////////////////////////
/// Convenience wrappers around ChipWare modules.
///
//  Feel free to define additional modules.
//  See http://www.ece.lsu.edu/v/ref.html for ChipWare documentation.

module fp_add(output uwire [31:0] x, input uwire [31:0] a, b );
uwire [7:0] status;
localparam logic [2:0] rnd_even = 3'b000; // Round to closest. Default.
CW_fp_add add( .status(status), .z(x), .a(a), .b(b), .rnd(rnd_even) );
endmodule

module fp_sub(output uwire [31:0] x, input uwire [31:0] a, b );
uwire [7:0] status;
localparam logic [2:0] rnd_even = 3'b000; // Round to closest. Default.
CW_fp_sub sub( .status(status), .z(x), .a(a), .b(b), .rnd(rnd_even) );
endmodule

module fp_ftoi
#( int wid = 10 )
( output uwire [wid-1:0] i, input uwire [31:0] f);

uwire [7:0] status;
localparam logic [2:0] rnd_even = 3'b000; // Round to closer integer.
localparam logic [2:0] rnd_trun = 3'b001; // Round towards zero. (truncate)
localparam logic [2:0] rnd_minf = 3'b011; // Round towards -infinity.

CW_fp_flt2i #( .isize(wid) ) ftoi
( .status(status), .z(i), .a(f), .rnd(rnd_trun) );
endmodule

//////////////////////////////////////////////////////////////////////////////
/// Behavioral Interpolation Module
//
//  Module below is correct but not synthesizable.

module interp_behav
#( int jw = 12,
int amax = 255 )
( output logic valid,
output logic [7:0] aj,
input uwire [31:0] x1, a1, x2, a2,
input uwire [jw-1:0] j );

always_comb begin

automatic shortreal x1r = \$bitstoshortreal(x1);
automatic shortreal x2r = \$bitstoshortreal(x2);
automatic shortreal a1r = \$bitstoshortreal(a1);
automatic shortreal a2r = \$bitstoshortreal(a2);

automatic int x1i = \$floor(x1r);
automatic int x2i = \$floor(x2r);
automatic int xj = x1i + j;

valid = xj <= x2i;

dadx = ( a2r - a1r ) / ( x2r - x1r );
ajr = a1r + j * dadx;
aj = ajr < 0 ? 0 : ajr > amax ? amax : \$floor(ajr);

end

endmodule

//////////////////////////////////////////////////////////////////////////////
/// Testbench Code
//
//  The code below instantiates some of the modules above,
//  provides test inputs, and verifies the outputs.
//
//  The testbench may be modified to facilitate your solution. Of
//  course, the removal of tests which your module fails is not a
//  method of fixing a broken module. (One might modify the testbench
//  so that the first tests it performs are those which make it easier
//  to determine what the problem is, for example, test inputs that
//  are all 0's or all 1's.)

module testbench();

localparam bit trunc_x1 = 1;

localparam int err_max_display = 20;
localparam shortreal tolerance = 0.0001;

localparam int num_tests = 2000;
localparam int xmin = 0;
localparam int xmax = 3839;
localparam longint rand_max = longint'(1) << 32;
localparam shortreal xscale = shortreal'(xmax) / rand_max;
localparam shortreal short_len = 5;
localparam shortreal short_scale = short_len / rand_max;

localparam int amax = 255;
localparam shortreal ascale = shortreal'(amax) / rand_max;

localparam int jw = 12;

typedef struct
{
string name;
int err_valid = 0;
int err_aj = 0;
} Info;
Info muts[int];
task new_interp(input int idx, input string name);
muts[idx].name = name;

localparam int mut_n_max = 5;

logic [jw-1:0] mj;
uwire        mvalid[mut_n_max];
uwire [7:0] maj[mut_n_max];
logic [31:0] mx1, mx2, ma1, ma2;

interp_behav #(jw) i0(mvalid[0], maj[0], mx1, ma1, mx2, ma2, mj);
initial new_interp(0,"interp_behav");
interp #(jw) i1(mvalid[1], maj[1], mx1, ma1, mx2, ma2, mj);
initial new_interp(1,"interp");

initial begin

for ( int i=0; i<num_tests; i++ ) begin

automatic bit short_line = \$random & 1;
automatic shortreal x[] = { {\$random} * xscale, {\$random} * xscale };
shortreal len1;
shortreal x1, x2, a1, a2, dadx;
int x1i, x2i;
int npts;
x.sort();
len1 = x[1] - x[0];
if ( short_line && len1 > short_len )
x[1] = x[0] + {\$random} * short_scale;

if ( trunc_x1 ) x[0] = \$floor(x[0]);
x1 = x[0];  x2 = x[1];
mx1 = \$shortrealtobits(x1);
mx2 = \$shortrealtobits(x2);

a1 = {\$random} * ascale;
a2 = {\$random} * ascale;
ma1 = \$shortrealtobits(a1);
ma2 = \$shortrealtobits(a2);

dadx = ( a2 - a1 ) / ( x2 - x1 );

x1i = \$floor(x1);
x2i = \$floor(x2);
npts = x2i - x1i + 1;

for ( int j=0; j<npts+10; j++ ) begin

automatic shortreal aj = a1 + ( x1i + j - x1 ) * dadx;
automatic int aji = aj < 0 ? 0 : aj > amax ? amax : \$floor(aj);
automatic shortreal ajfrac = aj - aji;
automatic int tol =
ajfrac < tolerance ? -1 : ajfrac > 1 - tolerance ? 1 : 0;
automatic int ajalt = aji + tol;
automatic logic valid = j < npts;
mj = j;

#1;

foreach ( muts[m] ) begin

if ( mvalid[m] !== valid ) begin
if ( muts[m].err_valid < err_max_display )
\$write("Err in %s for %4.1f, %4.1f, j=%0d, valid %0d != %0d (correct)\n",
muts[m].name, x1, x2, j, mvalid[m], valid );
muts[m].err_valid++;
end
if ( valid && mvalid[m] && maj[m] !== aji && maj[m] !== ajalt )
begin
if ( muts[m].err_aj < err_max_display )
\$write("Err in %s for %4.1f, %4.1f, j=%0d, aj=%.4f  %0d != %0d (correct)\n",
muts[m].name, a1, a2, j, aj, maj[m], aji );
muts[m].err_aj++;
end
end
end

end

foreach ( muts[m] )
\$write("Done with tests for %s, %0d + %0d errors.\n",
muts[m].name,muts[m].err_valid, muts[m].err_aj);

end

endmodule