/// LSU EE 3755 Fall 2009 Computer Organization

//

/// Verilog Notes 10 -- Floating Point

 

 

/// Contents

//

// Binary Floating-Point Representation and Arithmetic

// IEEE 754 FLP Standard

// FLP Addition Hardware

 

/// References

//

// :P:   Palnitkar, "Verilog HDL"

// :Q:   Qualis, "Verilog HDL Quick Reference Card Revision 1.0"

// :PH:  Patterson & Hennessy, "Computer Organization & Design"

 

////////////////////////////////////////////////////////////////////////////////

/// Binary Floating-Point Representation and Arithmetic

 

// :PH:

 

 /// Binary Floating-Point (FLP) Representations

//

// The floating-point (FLP) representations in this section (before

// IEEE 754) are NOT computer representations.

//

// Among other things, that means the number of bits needed to store a

// number is not specified.

//

// Computer representations for FLP numbers covered in the next section,

// IEEE 754.

 

 

/// Binary Fixed Point Representation

//

//

// Each digit position has a weight.

//

//

// FXP Binary Number: 1  0  1  0  1. 1   0   0   1

// Digit Position:    4  3  2  1  0 -1  -2  -3  -4

// Weight:           16  8  4  2  1 1/2 1/4 1/8 1/16

//

// Value of number:  1*16 + 0*8 + 1*4 + 0*2 + 1*1 + 1/2 + 0/4 + 0/8 + 1/16

//                   = 21.5625

//

// Other Examples:

//

//     1.1    = 1.5

//     1.01   = 1.25

//     1.11   = 1.75

//     1.001  = 1.125

//  1111.1111 = 15.9375

//

//

// Fixed Point Decimal to Binary Conversion

//

//   To convert decimal number x,  0 < x < 1.

//

//   Method 1:

//

//     For bit position -1:

//        if x >= 1/2, bit is 1,  x = x - 1/2;

//        if x <  1/2, bit is 0,  x unchanged.

//     For bit position -2:

//        if x >= 1/4, bit is 1,  x = x - 1/4

//        if x <  1/4, bit is 0,  x unchanged.

//     For bit position -3:

//        if x >= 1/8, bit is 1,  x = x - 1/8

//        if x <  1/8, bit is 0,  x unchanged.

//     For bit position -4:

//        if x >= 1/16, bit is 1, x = x - 1/16

//        if x <  1/16, bit is 0, x unchanged.

//     Etc.

//

//     Example:

//        x= .75

//      For bit position -1:

//      x = .75 >= 1/2, bit is 1 and updated x is (0.75) - (0.5) = 0.25;

//                                       first bit = 1(MSB of fraction).

//      For bit position -2:

//      x = .25 >= 1/4, bit is 1 and updated x is (0.25) - (0.25) = 0;

//                                       second bit = 1.

//      For bit position -3:

//      x = 0 < 1/8,    bit is 0 and x unchanged ; x= 0;              

//                                       third  bit = 0.

//      For bit position -4:

//      x = 0 < 1/16,   bit is 0 and x unchanged ; x= 0;              

//                                       fourth bit = 0.

//      so result is .1100.

 

//   Method 2:

//

//     Let r be the number of bits past binary point desired.

//

//     Convert x * 2^r to binary.

//

//     MSB is first bit past binary point, etc.

//

//     Example:

//       r = 4,  x = .75

//       Convert .75 * 2^4 = 12 to binary: 1100

//       x in binary is: .1100

//

//     This is the same thing like multiplying by 2 each time and keep

//     the integer part.

//     The first one is MSB of fraction.

//     .75 * 2 = 1.5 keep 1

//     .5  * 2 = 1.0 keep 1

//      0  * 2 =  0  keep 0

//      0  * 2 =  0  keep 0

//    so result is      .1100  for 4 bit representation.

 

// Examples to 12 digits:

//

//  1.1 = 1.000110011001...     1.1 * 2^12 = 4505 = 1000110011001

//  1.2 = 1.001100110011...

//  1.3 = 1.010011001100...

//  1.4 = 1.011001100110...

//  1.5 = 1.1

//

// Note:

//

// Common numbers such as 0.2 do not have exact representations.

 

 /// Binary Scientific Notation

//

// Binary Scientific Representation Similar to

//   Decimal Scientific Notation

//

//  Decimal: SIGN SIGNIFICAND(FRACTION) x 10^{EXPONENT}  

//  Binary:  SIGN SIGNIFICAND(FRACTION) x 2^{EXPONENT}

//

//  Significand does not always mean 100% fractional number for

//  scientific notation.

//

//  Decimal Examples:

//

//    1.23 x 10^{2}  = 123

//    1.23 x 10^{0}  = 1.23

//    1.23 x 10^{-1} = .123

//    Examples above are normalized

//  (only one non-zero digit before radix point).

//    Examples below are not(more than one non-zero digit or

//    zero digit before radix point).

//   

//    12.3 x 10^{1}  = 123

//    .123 x 10^{1}  = 1.23

//    123 x 10^{-3}  = .123

//

//  Binary Examples

//

//    1 x 2^{0}    = 1 = 1

//    1 x 2^{1}    = 10 = 2

//    1 x 2^{2}    = 100 = 4

//    1.1 x 2^{2}  = 110 = 6

//    1.1 x 2^{1}  = 11 = 3

//    1.1 x 2^{0}  = 1.1 = 1.5

//    1.1 x 2^{-1} = .11 = .75

//    Examples above are normalized(only one digit(1) before

//         radix point).

//    Examples below are not(more than one digit or zero digit before

//         radix point).

//    So when binary number is normalized, there is always 1 before

//        radix point.

//    IEEE 754 drops the 1(hidden 1) and saves only number(.xxxxx) after

//       radix point(saving 1 bit).

//    for IEEE 754 format the signifand(fraction) is 1.xxxxxx.

//    11 x 2^{1}   = 110 = 6

//    11 x 2^{0}   = 11 = 3

//    11 x 2^{-1}  = 1.1 = 1.5

//    11 x 2^{-2}  = .11 = .75

 

 

 /// Addition Using Scientific Notation

//

// Consider:

//

//   a_scand x 2^{a_exp}

//   b_scand x 2^{b_exp}

//

//   Assume a is larger magnitude number.

// (a>b or for simplicity a_exp >= b_exp).

//

//   To add these:

//

//     Set b'_exp = a_exp. //adjustment

//     Set b'scand = b_scand / 2^(a_exp - b_exp) //shift right

//     Set s_scand = a_scand + b'_scand //add

//     Normalize result.

//

//  

//

//

// Subtraction is similar.

 

 

 /// Multiplication Using Scientific Notation

// Not biased exponents.

//

// Consider:

//

//   a_scand x 2^{a_exp}

//   b_scand x 2^{b_exp}

//

//   To multiply these:

//

//     Set p_scand = a_scand x b_scand

//     Set p_exp = a_exp + b_exp

//     Normalize p //having only one digit before radix point

//

//     Product is p_scand x 2^{p_exp}

//

//

 

 

////////////////////////////////////////////////////////////////////////////////

/// IEEE 754 FLP Standard

 

// :PH:

 

 /// Standard Specifies

//

// Formats of FLP numbers. (There are several sizes.)

 

 /// Features

//

// Can Represent:

//   Floating-point number.

//   + and - Infinity, and other special values.

//

// Special Properties

//   Positive Zero is 0.

 

 /// Sizes

//

// Single: 32 bits.

// Double: 64 bits.

 

 

//

// Format Specifies:

//

//  Sign.

//  Exponent.

//  Significand (Fraction)

//

// Slight Complications :

//

//  Exponent is biased.

//  Significand may not include MSB.

//  We assume normalized fraction and normalized fraction means

//  there is always 1 at MSB part

//  So we drop the MSB(hidden 1) and when we convert the FLP to

//  decimal number we bring back the hidden 1.

//  See the examples below.

 

 /// IEEE 754 Single Format

//

// Format:   SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

// 31:    S: Sign bit: 1 negative, 0 positive.

// 30-23: E: Biased Exponent. (Exponent is E-127)  

// 22-0:  F: Significand (Fraction)

//

// E Biased Exponent will be in the range of 0000 0000 to 1111 1111

//   (0 to 255).

//   so actual Exponent value will be

//   0000 0000 - 127 which is -127 and 1111 1111 - 127 which is 128

//   (-127 to 128).

//  IEEE 754 single format  considers the bias 127.

 

//  Case                    Value formula.

//  0 < E < 255,  S = 0  :    ( 1.0 + F / 2^{23} ) 2^{E-127} //this 1.0

                                                          // is hidden 1.

//  0 < E < 255,  S = 1  :  - ( 1.0 + F / 2^{23} ) 2^{E-127}

//  E = 0, S = 0, F = 0  :    0

//  E = 0, S = 1, F = 0  :  - 0

//  E = 255, S = 0, F = 0:    Infinity

//  E = 255, S = 1, F = 0:  - Infinity

 

 /// IEEE 754 Double Format

//

// Format:   SEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF

// 63:    S: Sign bit: 1 negative, 0 positive.

// 62-52: E: Biased Exponent. (Exponent is E-1023)

// 51-0:  F: Significand (Fraction)

//

// E Biased Exponent will be in the range of (0 to 2047).

//   so actual Exponent value will be

// (-1023 to 1024).

//  IEEE 754 double format  considers the bias 1023.

 

//

//  Case                      Value formula.

//  0 < E < 2047,  S = 0   :    ( 1.0 + F / 2^{52} ) 2^{E-1023}

//  0 < E < 2047,  S = 1   :  - ( 1.0 + F / 2^{52} ) 2^{E-1023}

//  E = 0, S = 0, F = 0    :    0

//  E = 0, S = 1, F = 0    :  - 0

//  E = 2047, S = 0, F = 0 :    Infinity

//  E = 2047, S = 1, F = 0 :  - Infinity

 

 

 

 /// IEEE 754 Single Format Examples:   IEEE 754 to Value

//

// Single FLP:   32h'3fc00000

//            = 32b'00111111110000000000000000000000

//                  SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

//

//                  0 01111111 10000000000000000000000

//                  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF

//

//                  S = 0,  E = 7f = 127,  F 400000 = 4194304

//

// Based on value of S and E, the following case applies:

//

//  0 < E < 255,  S = 0  :    ( 1.0 + F / 2^{23} ) 2^{E-127}

//

//  Value = ( 1.0 + 4194304 / 2^{23} ) 2^{127-127}

//        = ( 1.0 + 0.5 )

//        = 1.5

 

// Single FLP:   32h'456ab000

//            = 32b'01000101011010101011000000000000

//                  SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

//

//                  0 10001010 11010101011000000000000

//                  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF

//

//                  S = 0,  E = 8a = 138,  F 6ab000 = 6991872

//

// Based on value of S and E, the following case applies:

//

//  0 < E < 255,  S = 0  :    ( 1.0 + F / 2^{23} ) 2^{E-127}

//

//  Value = ( 1.0 + 6991872 / 2^{23} ) 2^{138-127}

//        = ( 1.0 + 0.833496 ) 2048

//        = 3755

 

// Single FLP:   32h'c0490fdb

//            = 32b'11000000010010010000111111011011

//                  SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

//

//                  1 10000000 10010010000111111011011

//                  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF

//

//                  S = 1,  E = 80 = 128,  F 490fdb = 4788187

//

// Based on value of S and E, the following case applies:

//

//  0 < E < 255,  S = 1  :  - ( 1.0 + F / 2^{23} ) 2^{E-127}

//

//  Value = - ( 1.0 + 4788187 / 2^{23} ) 2^{128-127}

//        = - ( 1.0 + 0.570796 ) 2

//        = -3.14159

 

// Single FLP:   32h'0

//            = 32b'00000000000000000000000000000000

//                  SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

//

//                  0 00000000 00000000000000000000000

//                  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF

//

//                  S = 0,  E = 0,  F = 0

//

// Based on value of S and E, the following case applies:

//

//  E = 0, S = 0, F = 0  :    0

//

//  Value = 0

 

// Single FLP:   32h'7f800000

//            = 32b'01111111100000000000000000000000

//                  SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

//

//                  0 11111111 00000000000000000000000

//                  S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF

//

//                  S = 0,  E = 255,  F = 0

//

// Based on value of S and E, the following case applies:

//

//  E = 255, S = 0, F = 0:    Infinity

//

//  Value = Infinity.

 

 /// IEEE 754 Single Format Examples:   Value to IEEE 754

//

// Value (decimal):        12.75

//   Convert to binary:  1100.11

//   Convert to normalized binary scientific notation:  1.10011 x 2^3

//  

//

//   S = 0  (its positive)

//   E = 127 + 3 = 130 = 1000 0010

//   F = 10011 000000000000000000 (Notice we dropped the 1(hidden 1)

                                 // before the binary point).

//

//   Single: 0  1000 0010  10011 000000000000000000

//         = 0100 0001 0100 1100 0000 0000 0000 0000

//         = 32h'414c0000

 

 

////////////////////////////////////////////////////////////////////////////////

/// FLP Addition Hardware

 

// FLP arithmetic hardware simple in principle,

//  but details can be very complicated.

// Only hardware for FLP adder shown.

 

 /// FLP Addition Hardware Examples

//

//  Two Adders

//

//   Combinational.

//   Sequential.   

//

//

// :Example:

// Add IEEE 754 Single

// Combinational floating point adder.  Computes the sum of two 32bit

// floating  point numbers

// that consist of one sign bit and 8 bit biased exponents and 23 bit

// unsigned normalized fractions.

// for simplicity the two numbers are considered to be positive.

// the format for the number is :

//   Format:   SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

//         31:    S: Sign bit:

//         30:23  E: Exponent :

//         22:0   F: Fraction :

 

 

module fp_add(sum, exp_overflow, a_original,b_original);

   input [31:0] a_original, b_original;

   output [31:0] sum;

   output exp_overflow;

   reg    exp_overflow;

 

   reg [7:0]     sumexp;

   reg [23:0]    sumfraction;

   assign        sum[31] = a_original[31];//sign bit  always positive

                                          //this case

   assign        sum[30:23]  = sumexp;

   assign        sum[22:0]  = sumfraction[22:0];//It shows dropping of

                                         //hidden 1(or c at the below)

                                                //which is always 1.

 

   reg [31:0]    a, b;

   reg [23:0]    afraction, bfraction;//24 bits to include hidden 1.

   reg [7:0]     aexp, bexp;

   reg           c;

   reg [7:0]     diff;

 

 

   always @( a_original or b_original )

     begin

 

 /// Compute  Floating-Point Sum in Four  Steps

   

 

/// Step 1:  Adjust and Alignment

//         Put the number with the larger exponent in a

 

        if( a_original[30:23] < b_original[30:23] ) begin

 

           a = b_original;  b = a_original;

 

        end else begin

 

           a = a_original;  b = b_original;

 

        end

 

        /// Break operand into exponent, and fraction.

 

        aexp = a[30:23];  bexp = b[30:23];

       

        afraction  ={1'b1, a[22:0]}; //Inserting hidden 1

        bfraction  ={1'b1, b[22:0]}; //Inserting hidden 1        

 

        /// alignment  so that aexp == bexp.

        //

        diff = aexp - bexp;

        bfraction = bfraction >> diff;

       

 

        /// Step 2: add fractions

       

         c = 0;

        {c,sumfraction} = afraction + bfraction;

 

        /// Step 3: post normalize.

       

        if( c ) begin

          

            //shift right sumfraction with c and increment exponent.

            // we can check exponent overflow here.

            // if(aexp == 8'd255) exp_overflow = 1 // this is

                                                   // another way.

                                                   // step 4 is not

                                              // needed in this case.

           sumexp = aexp + 1;

           sumfraction  = {c,sumfraction[23:1]};//c is hidden 1 and

                                                // it's value is 1

                                                //so we should drop it.

 

 

        end

        else begin

           sumexp = aexp;//sumfraction does not change

                         //sumfraction[23] is hidden 1 and

                         //it's value is 1

                         //so we should drop it.

 

          

        end

       /// step 4:check for exponent overflow  

           if(aexp ==8’d255 && c ==1) exp_overflow =1;   //look, we are

                                    // checking aexp instead of sumexp.

                                  //if we check sumexp, it is too late.

 

     end

endmodule

 

// :Example:

// Add IEEE 754 Single

 

//    Floating Point Sequential adder.  Computes the sum of two 32bit

// floating  point numbers

// that consist of one sign bit and 8 bit biased exponents and 23 bit

// unsigned normalized fractions.

// for simplicity the two numbers are considered to be positive.

// the format for the number is :

// Format:   SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

//         31:    S: Sign bit:

//         30:23  E: Exponent:

//         22:0   F: Fraction:

.

 

module fp_add_seq(sum, exp_overflow, ready,a_original,b_original,start,clk);

   input [31:0] a_original, b_original;

   output [31:0] sum;

   input        start, clk;

   output        ready;

   output exp_overflow;

   reg    exp_overflow;

 

   reg [7:0]     sumexp;

   reg [23:0]    sumfraction;

   assign        sum[31] = a_original[31];//sign bit  always

                                       //positive this case

   assign        sum[30:23]  = sumexp;

   assign        sum[22:0]  = sumfraction[22:0];//It shows dropping of

                                         // hidden 1(or c at the below)

                                                //which is always 1.

 

   reg [31:0]    a, b;

   reg [23:0]    afraction, bfraction;

   reg [7:0]     aexp, bexp;

   reg           c;

   reg [7:0]     diff;

 

 

      parameter     st_idle  = 0;

      parameter     st_cyc_1 = 1;

      parameter     st_cyc_2 = 2;

      parameter     st_cyc_3 = 3;

 

      reg [1:0]     state;

 

      initial state = st_idle;

 

     assign        ready = state == st_idle;

 

    always @( posedge clk )

     case( state )

       st_idle:

         if( start ) begin

/// Step 1: Alignment and Adjust.

//         Put the number with the larger exponent in a

 

        if( a_original[30:23] < b_original[30:23] ) begin

 

           a = b_original;  b = a_original;

 

        end else begin

 

           a = a_original;  b = b_original;

 

        end

 

        /// Break operand into exponent, and fraction.

 

        aexp = a[30:23];  bexp = b[30:23];

       

        afraction  = {1'b1, a[22:0]}; //Inserting hidden 1        

        bfraction  = {1'b1, b[22:0]}; //Inserting hidden 1        

 

        /// alignment  so that aexp == bexp.

        //

        diff = aexp - bexp;

        bfraction = bfraction >> diff;

 

            state = st_cyc_1;

 

         end

 

       st_cyc_1:

         begin

      /// Step 2: add fractions

       

         c = 0;

        {c,sumfraction} = afraction + bfraction;

 

 

            state = st_cyc_2;

 

         end

 

       st_cyc_2:

         begin

/// Step 3: post normalize.

       

        if( c ) begin

          

            //shift right fraction with c and increment exponent.

 

           sumexp = aexp + 1;

           sumfraction  = {c,sumfraction[23:1]};//c is hidden 1 and          

                                                 //it's value is 1

                                                //so we should drop it.

 

        end

        else  begin

          sumexp = aexp;//sumfraction does not change

                        //sumfraction[23] is hidden 1 and it's value is 1

                        //so we should drop it.

        end

 

 

            state = st_cyc_3;

 

         end

 

       st_cyc_3:

      

         /// step 4:check for exponent overflow  

 

 

  begin

 

        

                  if(aexp  == 8’d255  &&  c == 1’b1) exp_overflow =1;       

 

 

            state = st_idle;

 

         end

 

     endcase

 

endmodule