```/// LSU EE 3755 -- Fall 2013 -- Computer Organization
//
/// Verilog Notes 8 -- Floating Point

// Time-stamp: <9 October 2013, 17:23:35 CDT, koppel@sky.ece.lsu.edu>

/// Contents
//
// Binary Floating-Point Representation and Arithmetic
// IEEE 754 FP Standard

/// References
//
// :PH:  Patterson & Hennessy, "Computer Organization & Design", 4th Edition

////////////////////////////////////////////////////////////////////////////////
/// Binary Floating-Point Representation and Arithmetic

// :PH: 3.5

/// Binary Floating-Point (FP) Representations
//
// The floating-point (FP) representations in this section (before
// IEEE 754) are NOT computer representations.
//
// Among other things, that means the number of bits needed to store a
// number is not specified.
//
// Computer representations for FP numbers covered in the next section,
// IEEE 754.

/// Binary Fixed Point Representation
//
//
// Each digit position has a multiplier.
//
//
// FiP Binary Number: 1  0  1  0  1. 1   0   0   1
// Digit Position:    4  3  2  1  0 -1  -2  -3  -4
// Multiplier:       16  8  4  2  1 1/2 1/4 1/8 1/16
//
// Value of number:  1*16 + 0*8 + 1*4 + 0*2 + 1*1 + 1/2 + 0/4 + 0/8 + 1/16
//                   = 21.5625
//
// Other Examples:
//
//     1.1    = 1.5
//     1.01   = 1.25
//     1.11   = 1.75
//     1.001  = 1.125
//  1111.1111 = 15.9375
//
//
// Fixed Point Decimal to Binary Conversion
//
//   To convert decimal number x,  0 < x < 1.
//
//   Method 1:
//
//     For digit position -1:
//        if x >= 1/2, digit is 1,  x = x - 1/2;
//        if x <  1/2, digit is 0,  x unchanged.
//     For digit position -2:
//        if x >= 1/4, digit is 1,  x = x - 1/4
//        if x <  1/4, digit is 0,  x unchanged.
//     Etc.
//
//   Method 2:
//
//     Let r be the number of digits past decimal point desired.
//
//     Convert x * 2^r to binary.
//
//     MSB is first digit past binary point, etc.
//
//     Example:
//       r = 4,  x = .75
//       Convert .75 * 2^4 = 12 to binary: 1100
//       x in binary is: .1100
//
// Examples to 12 digits:
//
//  1.1 = 1.000110011001...     1.1 * 2^12 = 4505 = 1000110011001
//  1.2 = 1.001100110011...
//  1.3 = 1.010011001100...
//  1.4 = 1.011001100110...
//  1.5 = 1.1
//
// Note:
//
// Common decimal numbers such as 0.2 do not have exact binary representations.

/// Binary Scientific Notation
//
// Binary Scientific Representation Similar to Decimal Scientific Notation
//
//  Decimal: SIGN SIGNIFICAND x 10^{EXPONENT}
//  Binary:  SIGN SIGNIFICAND x 2^{EXPONENT}
//
//  Decimal Examples:
//
//    1.23 x 10^{2}  = 123
//    1.23 x 10^{0}  = 1.23
//    1.23 x 10^{-1} = .123
//    Examples above are normalized, examples below are not.
//    12.3 x 10^{1}  = 123
//    .123 x 10^{1}  = 1.23
//    123 x 10^{-3}  = .123
//
//  Binary Examples
//
//    1 x 2^{0}    = 1 = 1
//    1 x 2^{1}    = 10 = 2
//    1 x 2^{2}    = 100 = 4
//    1.1 x 2^{2}  = 110 = 6
//    1.1 x 2^{1}  = 11 = 3
//    1.1 x 2^{0}  = 1.1 = 1.5
//    1.1 x 2^{-1} = .11 = .75
//    Examples above are normalized, examples below are not.
//    11 x 2^{1}   = 110 = 6
//    11 x 2^{0}   = 11 = 3
//    11 x 2^{-1}  = 1.1 = 1.5
//    11 x 2^{-2}  = .11 = .75

//
// Consider:
//
//   a_scand x 2^{a_exp}
//   b_scand x 2^{b_exp}
//
//   Assume that a > b in magnitude. (That is, |a| > |b|.)
//
//
//     Set b'_exp = a_exp.
//
//     Set b'scand = b_scand / 2^(a_exp - b_exp)
//       That is, right-shift b_scand by (a_exp - b_exp) bits.
//
//       Note that:  b_scand x 2^{b_exp} == b'_scand x 2^{b'_exp}
//
//     Set s_scand = a_scand + b'_scand
//
//     Set s_exp = a_exp
//
//     We now have the sum: s_scand x 2^{s_exp}

// :Example:
//
//  Multiply:
//    a = 1.1 x 2^4
//    b = 1.0 x 2^3
//
//    a_scand -> 1.1,  a_exp -> 4
//    b_scand -> 1.0,  b_exp -> 3
//
//  Step 1: Scale b so its exponent, 3, matches b's, 4.
//
//    b = 1.0 x 2^3 = 0.1 x 2^4
//
//    b'_scand => 0.1, b'_exp = 4
//
//
//    a_scand + b'_scand = 1.1 + 0.1 = 10.0
//
//    a+b = 10.0 x 2^4
//
//  Step 3: Normalize the number (optional)
//
//    a + b  =  10.0 x 2^4  =  1.0 x 2^5
//
//    Answer is: 1.0 x 2^5
//
// Subtraction is similar.

/// Multiplication Using Scientific Notation
//
// Consider:
//
//   a_scand x 2^{a_exp}
//   b_scand x 2^{b_exp}
//
//   To multiply these:
//
//     Set p_scand = a_scand x b_scand
//     Set p_exp = a_exp + b_exp
//     Optional: Normalize p
//
//     Product is p_scand x 2^{p_exp}

// :Example:
//
//  a = 1.1 x 2^4
//  b = 1.0 x 2^3
//
//  Step 1: Multiply significands and add exponents.
//
//    p_scand  =  a_scand * b_scand  =  1.0 * 1.1  =  1.1
//    p_exp    =  a_exp + b_exp      =  4+3        =  7
//
//    a * b = 1.1 x 2^7
//
//  Step 2: Normalize the number.
//
//    In this case result is already normal.
//
//    a * b = 1.1 x 2^7

////////////////////////////////////////////////////////////////////////////////
/// IEEE 754 FP Standard

// :PH: 3.5

/// Standard Specifies
//
// Formats of FP numbers. (There are several sizes.)
// Results of arithmetic operations, including rounding.

/// Objectives of Standard
//
// Represent range of numbers in common use. (Of course.)
// Predictable rounding behavior.
// Compare as integers.
//
// The following is NOT an objective:
//
// Keep things simple for an introductory computer class.
// Nevertheless, it's not that bad.

/// Features
//
// Can Represent:
//   Floating-point number.
//   + and - Infinity, and other special values.
//
// Special Properties
//   Positive Zero is 0.
//   Can use signed integer magnitude and equality tests.

/// Sizes
//
// Single: 32 bits.
// Double: 64 bits.
// Extended: Varies, not shown here.

/// Key Ideas
//
// Format Specifies:
//
//  Sign.
//  Exponent.
//  Significand (Fraction)
//
// Slight Complications (but for good reason):
//
//  Exponent is biased.
//  Significand usually omits the MSB (it's assumed to be 1).

/// IEEE 754 Single Format
//
// Format:   SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF
// 31:    S: Sign bit: 1 negative, 0 positive.
// 30-23: E: Biased Exponent. (Exponent is E-127)
// 22-0:  F: Significand (Fraction)
//
//  Case                    Value formula.
//  0 < E < 255,  S = 0  :    ( 1.0 + F / 2^{23} ) 2^{E-127}
//  0 < E < 255,  S = 1  :  - ( 1.0 + F / 2^{23} ) 2^{E-127}
//  E = 0, S = 0, F = 0  :    0
//  E = 0, S = 1, F = 0  :  - 0
//  E = 255, S = 0, F = 0:    Infinity
//  E = 255, S = 1, F = 0:  - Infinity

/// IEEE 754 Double Format
//
// Format:   SEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
// 63:    S: Sign bit: 1 negative, 0 positive.
// 62-52: E: Biased Exponent. (Exponent is E-1023)
// 51-0:  F: Significand (Fraction)
//
//  Case                      Value formula.
//  0 < E < 1023,  S = 0   :    ( 1.0 + F / 2^{23} ) 2^{E-1023}
//  0 < E < 1023,  S = 1   :  - ( 1.0 + F / 2^{23} ) 2^{E-1023}
//  E = 0, S = 0, F = 0    :    0
//  E = 0, S = 1, F = 0    :  - 0
//  E = 1023, S = 0, F = 0 :    Infinity
//  E = 1023, S = 1, F = 0 :  - Infinity

/// IEEE 754 Rounding Modes
//
// Format specifies four rounding modes.
// Hardware set to use desired rounding mode.
//
// Rounding Modes:
//
//  Round to even. (Nearest LSB zero.)  Most popular.
//  Round towards zero.
//  Round towards infinity.
//  Round towards -infinity.

/// IEEE 754 Single Format Examples:   IEEE 754 to Value
//
// Single FP:   32h'3fc00000
//            = 32b'00111111110000000000000000000000
//                  SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF
//
//                  0 01111111 10000000000000000000000
//                  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF
//
//                  S = 0,  E = 7f = 127,  F 400000 = 4194304
//
// Based on value of S and E, the following case applies:
//
//  0 < E < 255,  S = 0  :    ( 1.0 + F / 2^{23} ) 2^{E-127}
//
//  Value = ( 1.0 + 4194304 / 2^{23} ) 2^{127-127}
//        = ( 1.0 + 0.5 )
//        = 1.5

// Single FP:   32h'456ab000
//            = 32b'01000101011010101011000000000000
//                  SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF
//
//                  0 10001010 11010101011000000000000
//                  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF
//
//                  S = 0,  E = 8a = 138,  F 6ab000 = 6991872
//
// Based on value of S and E, the following case applies:
//
//  0 < E < 255,  S = 0  :    ( 1.0 + F / 2^{23} ) 2^{E-127}
//
//  Value = ( 1.0 + 6991872 / 2^{23} ) 2^{138-127}
//        = ( 1.0 + 0.833496 ) 2048
//        = 3755

// Single FP:   32h'c0490fdb
//            = 32b'11000000010010010000111111011011
//                  SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF
//
//                  1 10000000 10010010000111111011011
//                  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF
//
//                  S = 1,  E = 80 = 128,  F 490fdb = 4788187
//
// Based on value of S and E, the following case applies:
//
//  0 < E < 255,  S = 1  :  - ( 1.0 + F / 2^{23} ) 2^{E-127}
//
//  Value = - ( 1.0 + 4788187 / 2^{23} ) 2^{128-127}
//        = - ( 1.0 + 0.570796 ) 2
//        = -3.14159

// Single FP:   32h'0
//            = 32b'00000000000000000000000000000000
//                  SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF
//
//                  0 00000000 00000000000000000000000
//                  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF
//
//                  S = 0,  E = 0,  F = 0
//
// Based on value of S and E, the following case applies:
//
//  E = 0, S = 0, F = 0  :    0
//
//  Value = 0

// Single FP:   32h'7f800000
//            = 32b'01111111100000000000000000000000
//                  SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF
//
//                  0 11111111 00000000000000000000000
//                  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF
//
//                  S = 0,  E = 255,  F = 0
//
// Based on value of S and E, the following case applies:
//
//  E = 255, S = 0, F = 0:    Infinity
//
//  Value = Infinity.

/// IEEE 754 Single Format Examples:   Value to IEEE 754
//
// Value (decimal):        12.75
//   Convert to binary:  1100.11
//   Convert to normalized binary scientific notation:  1.10011 x 2^3
//
//   S = 0  (its positive)
//   E = 127 + 3 = 130 = 100 0010
//   F = 10011 000000000000000000
//
//   Single: 0  1000 0010  10011 000000000000000000
//         = 0100 0001 0100 1100 0000 0000 0000 0000
//         = 0x414c0000

////////////////////////////////////////////////////////////////////////////////

// FP arithmetic hardware simple in principle, but details can be overwhelming.
// Only hardware for FP adder shown.

//
// For simplicity, rounding is not performed correctly by example hardware.
//
//
//   Combinational. (Unrealistic)
//   Sequential.    (More realistic.  [Real adders would be pipelined.])
//

//
// To add "a" and "b" (without rounding)
//
// Step 1:
//   If b's exponent is larger than a's, swap a and b.
//
// Step 2:
//   Insert 1 in significand if corresponding exponent not zero.
//
// Step 3:
//   If necessary, un-normalize b so that a and b's exponent are the same.
//
// Step 4:
//   Negate significand if corresponding sign bit is negative.
//
// Step 5:
//   Compute sum of significands.
//
// Step 6:
//   Store sign of sum.  Take absolute value of sum.
//
// Step 7:
//   Normalize sum.

// :Example:
//
// Combinational adder.  Computes the sum of two IEEE 754 doubles.
// Does not round correctly and does not handle certain special cases.

input [63:0] a_original, b_original;
output [63:0] sum;

reg           sumneg;
reg [10:0]    sumexp;
reg [54:0]    sumsig;
assign        sum[63]    = sumneg;
assign        sum[62:52] = sumexp;
assign        sum[51:0]  = sumsig;

reg [63:0]    a, b;
reg [54:0]    asig, bsig;
reg [10:0]    aexp, bexp;
reg           aneg, bneg;
reg [10:0]    diff;

always @( a_original or b_original )
begin

/// Compute IEEE 754 Double Floating-Point Sum in Seven Easy Steps
//  Note: Rounding and sub-normals not handled properly.

/// Step 1: Copy inputs to a and b so that a's exponent not smaller than b's.
//
if ( a_original[62:52] < b_original[62:52] ) begin

a = b_original;  b = a_original;

end else begin

a = a_original;  b = b_original;

end

/// Step 2: Break operand into sign (neg), exponent, and significand.
//
aneg = a[63];     bneg = b[63];
aexp = a[62:52];  bexp = b[62:52];
// Put a 0 in bits 53 and 54 (later used for sign).
// Put a 1 in bit 52 of significand if exponent is non-zero.
// Copy significand into remaining bits.
asig = { 2'b0,    aexp ? 1'b1 : 1'b0,    a[51:0] };
bsig = { 2'b0, bexp ? 1'b1 : 1'b0, b[51:0] };

/// Step 3: Un-normalize b so that aexp == bexp.
//
diff = aexp - bexp;
bsig = bsig >> diff;
//
// Note: bexp no longer used. If it were would need to set bexp = aexp;

/// Step 4: If necessary, negate significands.
//
if ( aneg ) asig = -asig;
if ( bneg ) bsig = -bsig;

/// Step 5: Compute sum.
//
sumsig = asig + bsig;

/// Step 6: Take absolute value of sum.
//
sumneg = sumsig[54];
if ( sumneg ) sumsig = -sumsig;

/// Step 7: Normalize sum. (Three cases.)
//
if ( sumsig[53] ) begin
//
// Case 1: Sum overflow.
//         Right shift significand and increment exponent.

sumexp = aexp + 1;
sumsig = sumsig >> 1;

end else if ( sumsig ) begin:A
//
// Case 2: Sum is nonzero and did not overflow.
//         Normalize. (See cases 2a and 2b.)

// Find position of first non-zero digit.
pos = 0;
for (i = 52; i >= 0; i = i - 1 ) if ( !pos && sumsig[i] ) pos = i;

// Compute amount to shift significand and reduce exponent.
if ( aexp < adj ) begin
//
// Case 2a:
//   Exponent too small, floating point underflow, set to zero.

sumexp = 0;
sumsig = 0;
sumneg = 0;

end else begin
//
// Case 2b: Adjust significand and exponent.

end

end else begin
//
// Case 3: Sum is zero.

sumexp = 0;
sumsig = 0;

end

end
endmodule

// :Example:
//
// Sequential adder.  Computes the sum of two IEEE 754 doubles.
// Does not round correctly and does not handle certain special cases.
// More realistic than combinational adder.

input [63:0] a_original, b_original;
input        start, clk;
output [63:0] sum;

reg           sumneg;
reg [10:0]    sumexp;
reg [54:0]    sumsig;
assign        sum[63]    = sumneg;
assign        sum[62:52] = sumexp;
assign        sum[51:0]  = sumsig;

reg [63:0]    a, b;
reg [54:0]    asig, bsig;
reg [10:0]    aexp, bexp;
reg           aneg, bneg;
reg [10:0]    diff;

parameter     st_idle  = 0;
parameter     st_cyc_1 = 1;
parameter     st_cyc_2 = 2;
parameter     st_cyc_3 = 3;

reg [1:0]     state;

initial state = st_idle;

assign        ready = state == st_idle;

always @( posedge clk )
case( state )
st_idle:
if ( start ) begin

/// Step 1: Copy inputs to a and b so that a's exponent >= b's.
//
if ( a_original[62:52] < b_original[62:52] ) begin

a = b_original;  b = a_original;

end else begin

a = a_original;  b = b_original;

end

state = st_cyc_1;

end

st_cyc_1:
begin

/// Step 2: Break operand into sign (neg), exponent, and significand.
//
aneg = a[63];     bneg = b[63];
aexp = a[62:52];  bexp = b[62:52];
// Put a 0 in bits 53 and 54 (later used for sign).
// Put a 1 in bit 52 of significand if exponent is non-zero.
// Copy significand into remaining bits.
asig = { 2'b0, aexp ? 1'b1 : 1'b0, a[51:0] };
bsig = { 2'b0, bexp ? 1'b1 : 1'b0, b[51:0] };

/// Step 3: Un-normalize b so that aexp == bexp.
//
diff = aexp - bexp;
bsig = bsig >> diff;
//
// Note: bexp no longer used.
//       If it were would need to set bexp = aexp;

state = st_cyc_2;

end

st_cyc_2:
begin

/// Step 4: If necessary, negate significands.
//
if ( aneg ) asig = -asig;
if ( bneg ) bsig = -bsig;

/// Step 5: Compute sum.
//
sumsig = asig + bsig;

state = st_cyc_3;

end

st_cyc_3:
begin

/// Step 6: Take absolute value of sum.
//
sumneg = sumsig[54];
if ( sumneg ) sumsig = -sumsig;

/// Step 7: Normalize sum. (Three cases.)
//
if ( sumsig[53] ) begin
//
// Case 1: Sum overflow.
//         Right shift significand and increment exponent.

sumexp = aexp + 1;
sumsig = sumsig >> 1;

end else if ( sumsig ) begin:A
//
// Case 2: Sum is nonzero and did not overflow.
//         Normalize. (See cases 2a and 2b.)

// Find position of first non-zero digit.
pos = 0;
for (i = 52; i >= 0; i = i - 1 )
if ( !pos && sumsig[i] ) pos = i;

// Compute amount to shift significand and reduce exponent.
if ( aexp < adj ) begin
//
// Case 2a:
//   Exponent too small, floating point underflow, set to zero.

sumexp = 0;
sumsig = 0;
sumneg = 0;

end else begin
//
// Case 2b: Adjust significand and exponent.

end

end else begin
//
// Case 3: Sum is zero.

sumexp = 0;
sumsig = 0;

end

state = st_idle;

end

endcase

endmodule
```