l08.v|co02/v

/// LSU EE 3755 -- Spring 2002 -- Computer Organization
//
/// Verilog Notes 8 -- Floating Point

 // Time-stamp: <18 March 2002, 14:54:41 CST, koppel@sol>

/// Contents
//
// Binary Floating-Point Representation and Arithmetic
// IEEE 754 FP Standard
// FP Addition Hardware

/// References
//
// :P:   Palnitkar, "Verilog HDL"
// :Q:   Qualis, "Verilog HDL Quick Reference Card Revision 1.0"
// :H:   Hyde, "Handbook on Verilog HDL"
// :LRM: IEEE, Verilog Language Reference Manual  (Hawaii Section Numbering)
// :PH:  Patterson & Hennessy, "Computer Organization & Design"
// :HP:  Hennessy & Patterson, "Computer Architecture: A Quantitative Approach"


////////////////////////////////////////////////////////////////////////////////
/// Binary Floating-Point Representation and Arithmetic

// :PH: 4.8

 /// Binary Floating-Point (FP) Representations
//
// The floating-point (FP) representations in this section (before
// IEEE 754) are NOT computer representations.
//
// Among other things, that means the number of bits needed to store a
// number is not specified.
//
// Computer representations for FP numbers covered in the next section,
// IEEE 754.


 /// Binary Fixed Point Representation
//
//
// Each digit position has a multiplier.
//
//
// FiP Binary Number: 1  0  1  0  1. 1   0   0   1
// Digit Position:    4  3  2  1  0 -1  -2  -3  -4
// Multiplier:       16  8  4  2  1 1/2 1/4 1/8 1/16
//
// Value of number:  1*16 + 0*8 + 1*4 + 0*2 + 1*1 + 1/2 + 0/4 + 0/8 + 1/16
//                   = 21.5625
//
// Other Examples:
//
//     1.1    = 1.5
//     1.01   = 1.25
//     1.11   = 1.75
//     1.001  = 1.125
//  1111.1111 = 15.9375
//
//
// Fixed Point Decimal to Binary Conversion
//
//   To convert decimal number x,  0 < x < 1.
//
//   Method 1:
//
//     For digit position -1:
//        if x >= 1/2, digit is 1,  x = x - 1/2;
//        if x <  1/2, digit is 0,  x unchanged.
//     For digit position -2:
//        if x >= 1/4, digit is 1,  x = x - 1/4
//        if x <  1/4, digit is 0,  x unchanged.
//     Etc.
//
//   Method 2:
//
//     Let r be the number of digits past decimal point desired.
//
//     Convert x * 2^r to binary.
//
//     MSB is first digit past binary point, etc.
//
//     Example:
//       r = 4,  x = .75
//       Convert .75 * 2^4 = 12 to binary: 1100
//       x in binary is: .1100
//
// Examples to 12 digits:
//
//  1.1 = 1.000110011001...     1.1 * 2^12 = 4505 = 1000110011001
//  1.2 = 1.001100110011...
//  1.3 = 1.010011001100...
//  1.4 = 1.011001100110...
//  1.5 = 1.1
//
// Note:
//
// Common numbers such as 0.2 do not have exact representations.


 /// Binary Scientific Notation
//
// Binary Scientific Representation Similar to Decimal Scientific Notation
//
//  Decimal: SIGN SIGNIFICAND x 10^{EXPONENT}
//  Binary:  SIGN SIGNIFICAND x 2^{EXPONENT}
//
//  Decimal Examples:
//
//    1.23 x 10^{2}  = 123
//    1.23 x 10^{0}  = 1.23
//    1.23 x 10^{-1} = .123
//    Examples above are normalized, examples below are not.
//    12.3 x 10^{1}  = 123
//    .123 x 10^{1}  = 1.23
//    123 x 10^{-3}  = .123
//
//  Binary Examples
//
//    1 x 2^{0}    = 1 = 1
//    1 x 2^{1}    = 10 = 2
//    1 x 2^{2}    = 100 = 4
//    1.1 x 2^{2}  = 110 = 6
//    1.1 x 2^{1}  = 11 = 3
//    1.1 x 2^{0}  = 1.1 = 1.5
//    1.1 x 2^{-1} = .11 = .75
//    Examples above are normalized, examples below are not.
//    11 x 2^{1}   = 110 = 6
//    11 x 2^{0}   = 11 = 3
//    11 x 2^{-1}  = 1.1 = 1.5
//    11 x 2^{-2}  = .11 = .75


 /// Addition Using Scientific Notation
//
// Consider:
//
//   a_scand x 2^{a_exp}
//   b_scand x 2^{b_exp}
//
//   Assume a is larger magnitude number.
//
//   To add these:
//
//     Set b'_exp = a_exp.
//     Set b'scand = b_scand / 2^(a_exp - b_exp)
//     If two steps above correct:  b_scand x 2^{b_exp} == b'_scand x 2^{b'_exp}
//     Set s_scand = a_scand + b'_scand
//     Set s_exp = a_exp
//     Optional: Normalize s.
//
//     Sum is s_scand x 2^{s_exp}
//
//  Example:
//    See text or blackboard example.
//
// Subtraction is similar.


 /// Multiplication Using Scientific Notation
//
// Consider:
//
//   a_scand x 2^{a_exp}
//   b_scand x 2^{b_exp}
//
//   To multiply these:
//
//     Set p_scand = a_scand x b_scand
//     Set p_exp = a_exp + b_exp
//     Optional: Normalize p
//
//     Product is p_scand x 2^{p_exp}
//
//  Example:
//    See text or blackboard example.
//


////////////////////////////////////////////////////////////////////////////////
/// IEEE 754 FP Standard

// :PH: 4.8

 /// Standard Specifies
//
// Formats of FP numbers. (There are several sizes.)
// Results of arithmetic operations, including rounding.

 /// Objectives of Standard
//
// Represent range of numbers in common use. (Of course.)
// Predictable rounding behavior.
// Compare as integers.
//
// The following is NOT an objective:
//
// Keep things simple for an introductory computer class.
// Nevertheless, it's not that bad.

 /// Features
//
// Can Represent:
//   Floating-point number.
//   + and - Infinity, and other special values.
//
// Special Properties
//   Positive Zero is 0.
//   Can use signed integer magnitude and equality tests.

 /// Sizes
//
// Single: 32 bits.
// Double: 64 bits.
// Extended: Varies, not shown here.

 /// Key Ideas
//
// Format Specifies:
//
//  Sign.
//  Exponent.
//  Significand (Fraction)
//
// Slight Complications (but for good reason):
//
//  Exponent is biased.
//  Significand may not include MSB (if not, it's 1).


 /// IEEE 754 Single Format
//
// Format:   SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF
// 31:    S: Sign bit: 1 negative, 0 positive.
// 30-23: E: Biased Exponent. (Exponent is E-127)
// 22-0:  F: Significand (Fraction)
//
//  Case                    Value formula.
//  0 < E < 255,  S = 0  :    ( 1.0 + F / 2^{23} ) 2^{E-127}
//  0 < E < 255,  S = 1  :  - ( 1.0 + F / 2^{23} ) 2^{E-127}
//  E = 0, S = 0, F = 0  :    0
//  E = 0, S = 1, F = 0  :  - 0
//  E = 255, S = 0, F = 0:    Infinity
//  E = 255, S = 1, F = 0:  - Infinity

 /// IEEE 754 Double Format
//
// Format:   SEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
// 63:    S: Sign bit: 1 negative, 0 positive.
// 62-52: E: Biased Exponent. (Exponent is E-1023)
// 51-0:  F: Significand (Fraction)
//
//  Case                      Value formula.
//  0 < E < 1023,  S = 0   :    ( 1.0 + F / 2^{23} ) 2^{E-1023}
//  0 < E < 1023,  S = 1   :  - ( 1.0 + F / 2^{23} ) 2^{E-1023}
//  E = 0, S = 0, F = 0    :    0
//  E = 0, S = 1, F = 0    :  - 0
//  E = 1023, S = 0, F = 0 :    Infinity
//  E = 1023, S = 1, F = 0 :  - Infinity

 /// IEEE 754 Rounding Modes
//
// Format specifies four rounding modes.
// Hardware set to use desired rounding mode.
//
// Rounding Modes:
//
//  Round to even. (Nearest LSB zero.)  Most popular.
//  Round towards zero.
//  Round towards infinity.
//  Round towards -infinity.


 /// IEEE 754 Single Format Examples:   IEEE 754 to Value
//
// Single FP:   32h'3fc00000
//            = 32b'00111111110000000000000000000000
//                  SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF
//
//                  0 01111111 10000000000000000000000
//                  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF
//
//                  S = 0,  E = 7f = 127,  F 400000 = 4194304
//
// Based on value of S and E, the following case applies:
//
//  0 < E < 255,  S = 0  :    ( 1.0 + F / 2^{23} ) 2^{E-127}
//
//  Value = ( 1.0 + 4194304 / 2^{23} ) 2^{127-127}
//        = ( 1.0 + 0.5 )
//        = 1.5

// Single FP:   32h'456ab000
//            = 32b'01000101011010101011000000000000
//                  SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF
//
//                  0 10001010 11010101011000000000000
//                  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF
//
//                  S = 0,  E = 8a = 138,  F 6ab000 = 6991872
//
// Based on value of S and E, the following case applies:
//
//  0 < E < 255,  S = 0  :    ( 1.0 + F / 2^{23} ) 2^{E-127}
//
//  Value = ( 1.0 + 6991872 / 2^{23} ) 2^{138-127}
//        = ( 1.0 + 0.833496 ) 2048
//        = 3755

// Single FP:   32h'c0490fdb
//            = 32b'11000000010010010000111111011011
//                  SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF
//
//                  1 10000000 10010010000111111011011
//                  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF
//
//                  S = 1,  E = 80 = 128,  F 490fdb = 4788187
//
// Based on value of S and E, the following case applies:
//
//  0 < E < 255,  S = 1  :  - ( 1.0 + F / 2^{23} ) 2^{E-127}
//
//  Value = - ( 1.0 + 4788187 / 2^{23} ) 2^{128-127}
//        = - ( 1.0 + 0.570796 ) 2
//        = -3.14159

// Single FP:   32h'0
//            = 32b'00000000000000000000000000000000
//                  SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF
//
//                  0 00000000 00000000000000000000000
//                  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF
//
//                  S = 0,  E = 0,  F = 0
//
// Based on value of S and E, the following case applies:
//
//  E = 0, S = 0, F = 0  :    0
//
//  Value = 0

// Single FP:   32h'7f800000
//            = 32b'01111111100000000000000000000000
//                  SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF
//
//                  0 11111111 00000000000000000000000
//                  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF
//
//                  S = 0,  E = 255,  F = 0
//
// Based on value of S and E, the following case applies:
//
//  E = 255, S = 0, F = 0:    Infinity
//
//  Value = Infinity.

 /// IEEE 754 Single Format Examples:   Value to IEEE 754
//
// Value (decimal):        12.75
//   Convert to binary:  1100.11
//   Convert to normalized binary scientific notation:  1.10011 x 2^3
//
//   S = 0  (its positive)
//   E = 127 + 3 = 130 = 100 0010
//   F = 10011 000000000000000000
//
//   Single: 0  1000 0010  10011 000000000000000000
//         = 0100 0001 0100 1100 0000 0000 0000 0000
//         = 0x414c0000


////////////////////////////////////////////////////////////////////////////////
/// FP Addition Hardware

// FP arithmetic hardware simple in principle, but details can be overwhelming.
// Only hardware for FP adder shown.

 /// FP Addition Hardware Examples
//
// Add IEEE 754 Doubles
// For simplicity, rounding is not performed correctly by example hardware.
//
// Two Adders
//
//   Combinational. (Unrealistic)
//   Sequential.    (More realistic.  [Real adders would be pipelined.])
//

 /// Steps For Addition
//
// To add "a" and "b" (without rounding)
//
// Step 1:
//   If b's exponent is larger than a's, swap a and b.
//
// Step 2:
//   Insert 1 in significand if corresponding exponent not zero.
//
// Step 3:
//   If necessary, un-normalize b so that a and b's exponent are the same.
//
// Step 4:
//   Negate significand if corresponding sign bit is negative.
//
// Step 5:
//   Compute sum of significands.
//
// Step 6:
//   Store sign of sum.  Take absolute value of sum.
//
// Step 7:
//   Normalize sum.


// :Example:
//
// Combinational adder.  Computes the sum of two IEEE 754 doubles.
// Does not round correctly and does not handle certain special cases.

module fp_add(sum,a_original,b_original);
   input [63:0] a_original, b_original;
   output [63:0] sum;

   reg           sumneg;
   reg [10:0]    sumexp;
   reg [54:0]    sumsig;
   assign        sum[63]    = sumneg;
   assign        sum[62:52] = sumexp;
   assign        sum[51:0]  = sumsig;

   reg [63:0]    a, b;
   reg [54:0]    asig, bsig;
   reg [10:0]    aexp, bexp;
   reg           aneg, bneg;
   reg [10:0]    diff;

   always @( a_original or b_original )
     begin

        /// Compute IEEE 754 Double Floating-Point Sum in Seven Easy Steps
        //  Note: Rounding and sub-normals not handled properly.

        /// Step 1: Copy inputs to a and b so that a's exponent not smaller than b's.
        //
        if( a_original[62:52] < b_original[62:52] ) begin

           a = b_original;  b = a_original;

        end else begin

           a = a_original;  b = b_original;

        end

        /// Step 2: Break operand into sign (neg), exponent, and significand.
        //
        aneg = a[63];     bneg = b[63];
        aexp = a[62:52];  bexp = b[62:52];
        // Put a 0 in bits 53 and 54 (later used for sign).
        // Put a 1 in bit 52 of significand if exponent is non-zero.
        // Copy significand into remaining bits.
        asig = { 2'b0, aexp ? 1'b1 : 1'b0, a[51:0] };
        bsig = { 2'b0, bexp ? 1'b1 : 1'b0, b[51:0] };

        /// Step 3: Un-normalize b so that aexp == bexp.
        //
        diff = aexp - bexp;
        bsig = bsig >> diff;
        //
        // Note: bexp no longer used. If it were would need to set bexp = aexp;

        /// Step 4: If necessary, negate significands.
        //
        if( aneg ) asig = -asig;
        if( bneg ) bsig = -bsig;

        /// Step 5: Compute sum.
        //
        sumsig = asig + bsig;

        /// Step 6: Take absolute value of sum.
        //
        sumneg = sumsig[54];
        if( sumneg ) sumsig = -sumsig;

        /// Step 7: Normalize sum. (Three cases.)
        //
        if( sumsig[53] ) begin
           //
           // Case 1: Sum overflow.
           //         Right shift significand and increment exponent.

           sumexp = aexp + 1;
           sumsig = sumsig >> 1;

        end else if( sumsig ) begin:A
           //
           // Case 2: Sum is nonzero and did not overflow.
           //         Normalize. (See cases 2a and 2b.)

           integer pos, adj, i;

           // Find position of first non-zero digit.
           pos = 0;
           for(i = 52; i >= 0; i = i - 1 ) if( !pos && sumsig[i] ) pos = i;

           // Compute amount to shift significand and reduce exponent.
           adj = 52 - pos;
           if( aexp < adj ) begin
              //
              // Case 2a:
              //   Exponent too small, floating point underflow, set to zero.

              sumexp = 0;
              sumsig = 0;
              sumneg = 0;

           end else begin
              //
              // Case 2b: Adjust significand and exponent.

              sumexp = aexp - adj;
              sumsig = sumsig << adj;

           end

        end else begin
           //
           // Case 3: Sum is zero.

           sumexp = 0;
           sumsig = 0;

        end

     end
endmodule


// :Example:
//
// Sequential adder.  Computes the sum of two IEEE 754 doubles.
// Does not round correctly and does not handle certain special cases.
// More realistic than combinational adder.

module fp_add_seq(sum,ready,a_original,b_original,start,clk);
   input [63:0] a_original, b_original;
   input        start, clk;
   output [63:0] sum;
   output        ready;

   reg           sumneg;
   reg [10:0]    sumexp;
   reg [54:0]    sumsig;
   assign        sum[63]    = sumneg;
   assign        sum[62:52] = sumexp;
   assign        sum[51:0]  = sumsig;

   reg [63:0]    a, b;
   reg [54:0]    asig, bsig;
   reg [10:0]    aexp, bexp;
   reg           aneg, bneg;
   reg [10:0]    diff;

   parameter     st_idle  = 0;
   parameter     st_cyc_1 = 1;
   parameter     st_cyc_2 = 2;
   parameter     st_cyc_3 = 3;

   reg [1:0]     state;

   initial state = st_idle;

   assign        ready = state == st_idle;

   always @( posedge clk )
     case( state )
       st_idle:
         if( start ) begin

            /// Step 1: Copy inputs to a and b so that a's exponent not smaller than b's.
            //
            if( a_original[62:52] < b_original[62:52] ) begin

               a = b_original;  b = a_original;

            end else begin

               a = a_original;  b = b_original;

            end

            state = st_cyc_1;

         end

       st_cyc_1:
         begin

            /// Step 2: Break operand into sign (neg), exponent, and significand.
            //
            aneg = a[63];     bneg = b[63];
            aexp = a[62:52];  bexp = b[62:52];
            // Put a 0 in bits 53 and 54 (later used for sign).
            // Put a 1 in bit 52 of significand if exponent is non-zero.
            // Copy significand into remaining bits.
            asig = { 2'b0, aexp ? 1'b1 : 1'b0, a[51:0] };
            bsig = { 2'b0, bexp ? 1'b1 : 1'b0, b[51:0] };

            /// Step 3: Un-normalize b so that aexp == bexp.
            //
            diff = aexp - bexp;
            bsig = bsig >> diff;
            //
            // Note: bexp no longer used.
            //       If it were would need to set bexp = aexp;

            state = st_cyc_2;

         end

       st_cyc_2:
         begin

            /// Step 4: If necessary, negate significands.
            //
            if( aneg ) asig = -asig;
            if( bneg ) bsig = -bsig;

            /// Step 5: Compute sum.
            //
            sumsig = asig + bsig;

            state = st_cyc_3;

         end

       st_cyc_3:
         begin

            /// Step 6: Take absolute value of sum.
            //
            sumneg = sumsig[54];
            if( sumneg ) sumsig = -sumsig;

            /// Step 7: Normalize sum. (Three cases.)
            //
            if( sumsig[53] ) begin
               //
               // Case 1: Sum overflow.
               //         Right shift significand and increment exponent.

               sumexp = aexp + 1;
               sumsig = sumsig >> 1;

            end else if( sumsig ) begin:A
               //
               // Case 2: Sum is nonzero and did not overflow.
               //         Normalize. (See cases 2a and 2b.)

               integer pos, adj, i;

               // Find position of first non-zero digit.
               pos = 0;
               for(i = 52; i >= 0; i = i - 1 ) if( !pos && sumsig[i] ) pos = i;

               // Compute amount to shift significand and reduce exponent.
               adj = 52 - pos;
               if( aexp < adj ) begin
                  //
                  // Case 2a:
                  //   Exponent too small, floating point underflow, set to zero.

                  sumexp = 0;
                  sumsig = 0;
                  sumneg = 0;

               end else begin
                  //
                  // Case 2b: Adjust significand and exponent.

                  sumexp = aexp - adj;
                  sumsig = sumsig << adj;

               end

            end else begin
               //
               // Case 3: Sum is zero.

               sumexp = 0;
               sumsig = 0;

            end

            state = st_idle;

         end

     endcase

endmodule