//////////////////////////////////////////////////////////////////////////////// // /// LSU EE 4755 Fall 2017 Homework 2 -- SOLUTION // /// Assignment http://www.ece.lsu.edu/koppel/v/2017/hw02.pdf ////////////////////////////////////////////////////////////////////////////// /// Problem 1 -- SOLUTION // /// Modify interp so that it performs linear interpolation. See the handout /// and module interp_behav. // // [✔] Make sure that the testbench does not report errors. // [✔] Module must be synthesizable. // [✔] Module must do some FP arithmetic. // [✔] Modify include statements (at end) for any new ChipWare modules. `default_nettype none module interp #( int jw = 12, int amax = 255 ) ( output uwire valid, output uwire [7:0] aj, input uwire [31:0] x1, a1, x2, a2, input uwire [jw-1:0] j ); localparam logic [2:0] rnd_even = 3'b000; // Round to closest. Default. uwire [jw:0] x1i, x2i; /// SOLUTION /// First, generate the valid signal. // Convert x1 and x2 to integers. // fp_ftoi #( jw+1 ) ftoi1(x1i, x1); fp_ftoi #( jw+1 ) ftoi2(x2i, x2); // // Note: Since the ChipWare float-to-int module can only convert to // a signed integer and x is unsigned need to make the integer one // bit wider to accommodate the sign bit that we won't need. // Otherwise, values >= 2^{jw-1}, for the default, 2^11 = 2048, // will be clamped to the maximum 12-bit signed representation, // 2047. // Check whether j is between x1 and x2. // assign valid = x1i + j <= x2i; // /// Perform the interpolation: aj = a1 + j * ( a2 - a1 ) / ( x2 - x1 ) // uwire [31:0] delta_x, delta_a, dadx, jr, jdadx, ajr; uwire [7:0] status[2]; // Unused status connections for CW modules. fp_sub sdx(delta_x, x2, x1); fp_sub sda(delta_a, a2, a1); CW_fp_div div ( .status(status[0]), .z(dadx), .a(delta_a), .b(delta_x), .rnd(rnd_even) ); fp_itof #(jw) itof(jr,j); // // Note: Module performs an unsigned conversion, so we don't need to // widen j by one bit. See ftoi3 below and ftoi1 and ftoi2 above. CW_fp_mult mul ( .status(status[1]), .z(jdadx), .a(jr), .b(dadx), .rnd(rnd_even) ); fp_add add(ajr,a1,jdadx); /// Convert the interpolated value to an integer and clamp it between // 0 and amax. // Declare aji signed so that the comparison operator works correctly // for aji < 0. // uwire signed [8:0] aji; fp_ftoi #( 9 ) ftoi3( aji, ajr ); assign aj = aji < 0 ? 0 : aji > amax ? amax : aji[7:0]; // // Note that when amax is 255 the clamp isn't necessary // because the float-to-int module clamps to the maximum representable // value, which is 255 for a 9-bit signed integer. endmodule module fp_itof #( int wid = 10, logic i_is_signed = 0 ) ( output uwire [31:0] f, input uwire [wid-1:0] i); uwire [7:0] status; localparam logic [2:0] rnd_even = 3'b000; CW_fp_i2flt #( .isize(wid), .isign(i_is_signed) ) itof ( .status(status), .a(i), .z(f), .rnd(rnd_even) ); endmodule ////////////////////////////////////////////////////////////////////////////// /// Convenience wrappers around ChipWare modules. /// // Feel free to define additional modules. // See http://www.ece.lsu.edu/v/ref.html for ChipWare documentation. module fp_add(output uwire [31:0] x, input uwire [31:0] a, b ); uwire [7:0] status; localparam logic [2:0] rnd_even = 3'b000; // Round to closest. Default. CW_fp_add add( .status(status), .z(x), .a(a), .b(b), .rnd(rnd_even) ); endmodule module fp_sub(output uwire [31:0] x, input uwire [31:0] a, b ); uwire [7:0] status; localparam logic [2:0] rnd_even = 3'b000; // Round to closest. Default. CW_fp_sub sub( .status(status), .z(x), .a(a), .b(b), .rnd(rnd_even) ); endmodule module fp_ftoi #( int wid = 10 ) ( output uwire [wid-1:0] i, input uwire [31:0] f); uwire [7:0] status; localparam logic [2:0] rnd_even = 3'b000; // Round to closer integer. localparam logic [2:0] rnd_trun = 3'b001; // Round towards zero. (truncate) localparam logic [2:0] rnd_minf = 3'b011; // Round towards -infinity. CW_fp_flt2i #( .isize(wid) ) ftoi ( .status(status), .z(i), .a(f), .rnd(rnd_trun) ); endmodule ////////////////////////////////////////////////////////////////////////////// /// Behavioral Interpolation Module // // Module below is correct but not synthesizable. // cadence translate_off module interp_behav #( int jw = 12, int amax = 255 ) ( output logic valid, output logic [7:0] aj, input uwire [31:0] x1, a1, x2, a2, input uwire [jw-1:0] j ); always_comb begin automatic shortreal x1r = $bitstoshortreal(x1); automatic shortreal x2r = $bitstoshortreal(x2); automatic shortreal a1r = $bitstoshortreal(a1); automatic shortreal a2r = $bitstoshortreal(a2); automatic int x1i = $floor(x1r); automatic int x2i = $floor(x2r); automatic int xj = x1i + j; shortreal dadx, ajr; valid = xj <= x2i; dadx = ( a2r - a1r ) / ( x2r - x1r ); ajr = a1r + j * dadx; aj = ajr < 0 ? 0 : ajr > amax ? amax : $floor(ajr); end endmodule ////////////////////////////////////////////////////////////////////////////// /// Testbench Code // // The code below instantiates some of the modules above, // provides test inputs, and verifies the outputs. // // The testbench may be modified to facilitate your solution. Of // course, the removal of tests which your module fails is not a // method of fixing a broken module. (One might modify the testbench // so that the first tests it performs are those which make it easier // to determine what the problem is, for example, test inputs that // are all 0's or all 1's.) module testbench(); localparam bit trunc_x1 = 1; localparam int err_max_display = 20; localparam shortreal tolerance = 0.0001; localparam int num_tests = 2000; localparam int xmin = 0; localparam int xmax = 3839; localparam longint rand_max = longint'(1) << 32; localparam shortreal xscale = shortreal'(xmax) / rand_max; localparam shortreal short_len = 5; localparam shortreal short_scale = short_len / rand_max; localparam int amax = 255; localparam shortreal ascale = shortreal'(amax) / rand_max; localparam int jw = 12; typedef struct { string name; int err_valid = 0; int err_aj = 0; } Info; Info muts[int]; task new_interp(input int idx, input string name); muts[idx].name = name; endtask localparam int mut_n_max = 5; logic [jw-1:0] mj; uwire mvalid[mut_n_max]; uwire [7:0] maj[mut_n_max]; logic [31:0] mx1, mx2, ma1, ma2; interp_behav #(jw) i0(mvalid[0], maj[0], mx1, ma1, mx2, ma2, mj); initial new_interp(0,"interp_behav"); interp #(jw) i1(mvalid[1], maj[1], mx1, ma1, mx2, ma2, mj); initial new_interp(1,"interp"); initial begin for ( int i=0; i<num_tests; i++ ) begin automatic bit short_line = $random & 1; automatic shortreal x[] = { {$random} * xscale, {$random} * xscale }; shortreal len1; shortreal x1, x2, a1, a2, dadx; int x1i, x2i; int npts; x.sort(); len1 = x[1] - x[0]; if ( short_line && len1 > short_len ) x[1] = x[0] + {$random} * short_scale; if ( trunc_x1 ) x[0] = $floor(x[0]); x1 = x[0]; x2 = x[1]; mx1 = $shortrealtobits(x1); mx2 = $shortrealtobits(x2); a1 = {$random} * ascale; a2 = {$random} * ascale; ma1 = $shortrealtobits(a1); ma2 = $shortrealtobits(a2); dadx = ( a2 - a1 ) / ( x2 - x1 ); x1i = $floor(x1); x2i = $floor(x2); npts = x2i - x1i + 1; for ( int j=0; j<npts+10; j++ ) begin automatic shortreal aj = a1 + ( x1i + j - x1 ) * dadx; automatic int aji = aj < 0 ? 0 : aj > amax ? amax : $floor(aj); automatic shortreal ajfrac = aj - aji; automatic int tol = ajfrac < tolerance ? -1 : ajfrac > 1 - tolerance ? 1 : 0; automatic int ajalt = aji + tol; automatic logic valid = j < npts; mj = j; #1; foreach ( muts[m] ) begin if ( mvalid[m] !== valid ) begin if ( muts[m].err_valid < err_max_display ) $write("Err in %s for %4.1f, %4.1f, j=%0d, valid %0d != %0d (correct)\n", muts[m].name, x1, x2, j, mvalid[m], valid ); muts[m].err_valid++; end if ( valid && mvalid[m] && maj[m] !== aji && maj[m] !== ajalt ) begin if ( muts[m].err_aj < err_max_display ) $write("Err in %s for %4.1f, %4.1f, j=%0d, aj=%.4f %0d != %0d (correct)\n", muts[m].name, a1, a2, j, aj, maj[m], aji ); muts[m].err_aj++; end end end end foreach ( muts[m] ) $write("Done with tests for %s, %0d + %0d errors.\n", muts[m].name,muts[m].err_valid, muts[m].err_aj); end endmodule // cadence translate_on `default_nettype wire `include "/apps/linux/cadence/RC142/share/synth/lib/chipware/sim/verilog/CW/CW_fp_add.v" `include "/apps/linux/cadence/RC142/share/synth/lib/chipware/sim/verilog/CW/CW_fp_flt2i.v" `include "/apps/linux/cadence/RC142/share/synth/lib/chipware/sim/verilog/CW/CW_fp_sub.v" `include "/apps/linux/cadence/RC142/share/synth/lib/chipware/sim/verilog/CW/CW_fp_mult.v" `include "/apps/linux/cadence/RC142/share/synth/lib/chipware/sim/verilog/CW/CW_fp_div.v" `include "/apps/linux/cadence/RC142/share/synth/lib/chipware/sim/verilog/CW/CW_fp_i2flt.v"