/// LSU EE 3755 Computer Organization

/// Verilog Notes 10 -- Floating Point

/// Contents

// Binary Floating-Point Representation and Arithmetic

// IEEE 754 FLP Standard

// FLP Addition Hardware

/// References

// :P: Palnitkar, "Verilog HDL"

// :Q: Qualis, "Verilog HDL Quick Reference Card Revision 1.0"

// :PH: Patterson & Hennessy, "Computer Organization & Design"

////////////////////////////////////////////////////////////////////////////////

/// Binary Floating-Point Representation and Arithmetic

// :PH: 4.8

/// Binary Floating-Point (FLP) Representations

// The floating-point (FLP) representations in this section (before

// IEEE 754) are NOT computer representations.

// Among other things, that means the number of bits needed to store a

// number is not specified.

// Computer representations for FLP numbers covered in the next section,

// IEEE 754.

/// Binary Fixed Point Representation

// Each digit position has a weight.

// FXP Binary Number: 1 0 1 0 1. 1 0 0 1

// Digit Position: 4 3 2 1 0 -1 -2 -3 -4

// Weight: 16 8 4 2 1 1/2 1/4 1/8 1/16

// Value of number: 1*16 + 0*8 + 1*4 + 0*2 + 1*1 + 1/2 + 0/4 + 0/8 + 1/16

// = 21.5625

// Other Examples:

// 1.1 = 1.5

// 1.01 = 1.25

// 1.11 = 1.75

// 1.001 = 1.125

// 1111.1111 = 15.9375

// Fixed Point Decimal to Binary Conversion

// To convert decimal number x, 0 < x < 1.

// Method 1:

// For bit position -1:

// if x >= 1/2, bit is 1, x = x - 1/2;

// if x < 1/2, bit is 0, x unchanged.

// For bit position -2:

// if x >= 1/4, bit is 1, x = x - 1/4

// if x < 1/4, bit is 0, x unchanged.

// For bit position -3:

// if x >= 1/8, bit is 1, x = x - 1/8

// if x < 1/8, bit is 0, x unchanged.

// For bit position -4:

// if x >= 1/16, bit is 1, x = x - 1/16

// if x < 1/16, bit is 0, x unchanged.

// Etc.

// Example:

// x= .75

// For bit position -1:

// x = .75 >= 1/2, bit is 1 and updated x is (0.75) - (0.5) = 0.25;

// first bit = 1(MSB of fraction).

// For bit position -2:

// x = .25 >= 1/4, bit is 1 and updated x is (0.25) - (0.25) = 0;

// second bit = 1.

// For bit position -3:

// x = 0 < 1/8, bit is 0 and x unchanged ; x= 0;

// third bit = 0.

// For bit position -4:

// x = 0 < 1/16, bit is 0 and x unchanged ; x= 0;

// fourth bit = 0.

// so result is .1100.

// Method 2:

// Let r be the number of bits past binary point desired.

// Convert x * 2^r to binary.

// MSB is first bit past binary point, etc.

// Example:

// r = 4, x = .75

// Convert .75 * 2^4 = 12 to binary: 1100

// x in binary is: .1100

// This is the same thing like multiplying by 2 each time and keep

// the integer part.

// The first one is MSB of fraction.

// .75 * 2 = 1.5 keep 1

// .5 * 2 = 1.0 keep 1

// 0 * 2 = 0 keep 0

// so result is .1100 for 4 bit representation.

// Examples to 12 digits:

// 1.1 = 1.000110011001... 1.1 * 2^12 = 4505 = 1000110011001

// 1.2 = 1.001100110011...

// 1.3 = 1.010011001100...

// 1.4 = 1.011001100110...

// 1.5 = 1.1

// Note:

// Common numbers such as 0.2 do not have exact representations.

/// Binary Scientific Notation

// Binary Scientific Representation Similar to

// Decimal Scientific Notation

// Decimal: SIGN SIGNIFICAND(FRACTION) x 10^{EXPONENT}

// Binary: SIGN SIGNIFICAND(FRACTION) x 2^{EXPONENT}

// Significand does not always mean 100% fractional number for

// scientific notation.

// Decimal Examples:

// 1.23 x 10^{2} = 123

// 1.23 x 10^{0} = 1.23

// 1.23 x 10^{-1} = .123

// Examples above are normalized

// (only one non-zero digit before radix point).

// Examples below are not(more than one non-zero digit or

// zero digit before radix point).

// 12.3 x 10^{1} = 123

// .123 x 10^{1} = 1.23

// 123 x 10^{-3} = .123

// Binary Examples

// 1 x 2^{0} = 1 = 1

// 1 x 2^{1} = 10 = 2

// 1 x 2^{2} = 100 = 4

// 1.1 x 2^{2} = 110 = 6

// 1.1 x 2^{1} = 11 = 3

// 1.1 x 2^{0} = 1.1 = 1.5

// 1.1 x 2^{-1} = .11 = .75

// Examples above are normalized(only one digit(1) before

// radix point).

// Examples below are not(more than one digit or zero digit before

// radix point).

// So when binary number is normalized, there is always 1 before

// radix point.

// IEEE 754 drops the 1(hidden 1) and saves only number(.xxxxx) after

// radix point(saving 1 bit).

// for IEEE 754 format the signifand(fraction) is 1.xxxxxx.

// 11 x 2^{1} = 110 = 6

// 11 x 2^{0} = 11 = 3

// 11 x 2^{-1} = 1.1 = 1.5

// 11 x 2^{-2} = .11 = .75

/// Addition Using Scientific Notation

// Consider:

// a_scand x 2^{a_exp}

// b_scand x 2^{b_exp}

// Assume a is larger magnitude number.

// (a>b or for simplicity a_exp >= b_exp).

// To add these:

// Set b'_exp = a_exp. //adjustment

// Set b'scand = b_scand / 2^(a_exp - b_exp) //shift right

// Set s_scand = a_scand + b'_scand //add

// Normalize result.

// Subtraction is similar.

/// Multiplication Using Scientific Notation

// Not biased exponents.

// Consider:

// a_scand x 2^{a_exp}

// b_scand x 2^{b_exp}

// To multiply these:

// Set p_scand = a_scand x b_scand

// Set p_exp = a_exp + b_exp

// Normalize p //having only one digit before radix point

// Product is p_scand x 2^{p_exp}

////////////////////////////////////////////////////////////////////////////////

/// IEEE 754 FLP Standard

// :PH: 4.8

/// Standard Specifies

// Formats of FLP numbers. (There are several sizes.)

/// Features

// Can Represent:

// Floating-point number.

// + and - Infinity, and other special values.

// Special Properties

// Positive Zero is 0.

/// Sizes

// Single: 32 bits.

// Double: 64 bits.

// Format Specifies:

// Sign.

// Exponent.

// Significand (Fraction)

// Slight Complications :

// Exponent is biased.

// Significand may not include MSB.

// We assume normalized fraction and normalized fraction means

// there is always 1 at MSB part

// So we drop the MSB(hidden 1) and when we convert the FLP to

// decimal number we bring back the hidden 1.

// See the examples below.

/// IEEE 754 Single Format

// Format: SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

// 31: S: Sign bit: 1 negative, 0 positive.

// 30-23: E: Biased Exponent. (Exponent is E-127)

// 22-0: F: Significand (Fraction)

// E Biased Exponent will be in the range of 0000 0000 to 1111 1111

// (0 to 255).

// so actual Exponent value will be

// 0000 0000 - 127 which is -127 and 1111 1111 - 127 which is 128

// (-127 to 128).

// IEEE 754 single format considers the bias 127.

// Case Value formula.

// 0 < E < 255, S = 0 : ( 1.0 + F / 2^{23} ) 2^{E-127} //this 1.0

// is hidden 1.

// 0 < E < 255, S = 1 : - ( 1.0 + F / 2^{23} ) 2^{E-127}

// E = 0, S = 0, F = 0 : 0

// E = 0, S = 1, F = 0 : - 0

// E = 255, S = 0, F = 0: Infinity

// E = 255, S = 1, F = 0: - Infinity

/// IEEE 754 Double Format

// Format: SEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF

// 63: S: Sign bit: 1 negative, 0 positive.

// 62-52: E: Biased Exponent. (Exponent is E-1023)

// 51-0: F: Significand (Fraction)

// E Biased Exponent will be in the range of (0 to 2047).

// so actual Exponent value will be

// (-1023 to 1024).

// IEEE 754 double format considers the bias 1023.

// Case Value formula.

// 0 < E < 2047, S = 0 : ( 1.0 + F / 2^{52} ) 2^{E-1023}

// 0 < E < 2047, S = 1 : - ( 1.0 + F / 2^{52} ) 2^{E-1023}

// E = 0, S = 0, F = 0 : 0

// E = 0, S = 1, F = 0 : - 0

// E = 2047, S = 0, F = 0 : Infinity

// E = 2047, S = 1, F = 0 : - Infinity

/// IEEE 754 Single Format Examples: IEEE 754 to Value

// Single FLP: 32h'3fc00000

// = 32b'00111111110000000000000000000000

// SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

// 0 01111111 10000000000000000000000

// S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF

// S = 0, E = 7f = 127, F 400000 = 4194304

// Based on value of S and E, the following case applies:

// 0 < E < 255, S = 0 : ( 1.0 + F / 2^{23} ) 2^{E-127}

// Value = ( 1.0 + 4194304 / 2^{23} ) 2^{127-127}

// = ( 1.0 + 0.5 )

// = 1.5

// Single FLP: 32h'456ab000

// = 32b'01000101011010101011000000000000

// SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

// 0 10001010 11010101011000000000000

// S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF

// S = 0, E = 8a = 138, F 6ab000 = 6991872

// Based on value of S and E, the following case applies:

// 0 < E < 255, S = 0 : ( 1.0 + F / 2^{23} ) 2^{E-127}

// Value = ( 1.0 + 6991872 / 2^{23} ) 2^{138-127}

// = ( 1.0 + 0.833496 ) 2048

// = 3755

// Single FLP: 32h'c0490fdb

// = 32b'11000000010010010000111111011011

// SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

// 1 10000000 10010010000111111011011

// S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF

// S = 1, E = 80 = 128, F 490fdb = 4788187

// Based on value of S and E, the following case applies:

// 0 < E < 255, S = 1 : - ( 1.0 + F / 2^{23} ) 2^{E-127}

// Value = - ( 1.0 + 4788187 / 2^{23} ) 2^{128-127}

// = - ( 1.0 + 0.570796 ) 2

// = -3.14159

// Single FLP: 32h'0

// = 32b'00000000000000000000000000000000

// SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

// 0 00000000 00000000000000000000000

// S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF

// S = 0, E = 0, F = 0

// Based on value of S and E, the following case applies:

// E = 0, S = 0, F = 0 : 0

// Value = 0

// Single FLP: 32h'7f800000

// = 32b'01111111100000000000000000000000

// SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

// 0 11111111 00000000000000000000000

// S EEEEEEEE FFFFFFFFFFFFFFFFFFFFFFF

// S = 0, E = 255, F = 0

// Based on value of S and E, the following case applies:

// E = 255, S = 0, F = 0: Infinity

// Value = Infinity.

/// IEEE 754 Single Format Examples: Value to IEEE 754

// Value (decimal): 12.75

// Convert to binary: 1100.11

// Convert to normalized binary scientific notation: 1.10011 x 2^3

// S = 0 (its positive)

// E = 127 + 3 = 130 = 1000 0010

// F = 10011 000000000000000000 (Notice we dropped the 1(hidden 1)

// before the binary point).

// Single: 0 1000 0010 10011 000000000000000000

// = 0100 0001 0100 1100 0000 0000 0000 0000

// = 32h'414c0000

////////////////////////////////////////////////////////////////////////////////

/// FLP Addition Hardware

// FLP arithmetic hardware simple in principle,

// but details can be very complicated.

// Only hardware for FLP adder shown.

/// FLP Addition Hardware Examples

// Two Adders

// Combinational.

// Sequential.

// :Example:

// Add IEEE 754 Single

// Combinational floating point adder. Computes the sum of two 32bit

// floating point numbers

// that consist of one sign bit and 8 bit biased exponents and 23 bit

// unsigned normalized fractions.

// for simplicity the two numbers are considered to be positive.

// the format for the number is :

// Format: SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

// 31: S: Sign bit:

// 30:23 E: Exponent :

// 22:0 F: Fraction :

module fp_add(sum, exp_overflow, a_original,b_original);

input [31:0] a_original, b_original;

output [31:0] sum;

output exp_overflow;

reg exp_overflow;

reg [7:0] sumexp;

reg [23:0] sumfraction;

assign sum[31] = a_original[31];//sign bit always positive

//this case

assign sum[30:23] = sumexp;

assign sum[22:0] = sumfraction[22:0];//It shows dropping of

//hidden 1(or c at the below)

//which is always 1.

reg [31:0] a, b;

reg [23:0] afraction, bfraction;//24 bits to include hidden 1.

reg [7:0] aexp, bexp;

reg c;

reg [7:0] diff;

always @( a_original or b_original )

begin

/// Compute Floating-Point Sum in Four Steps

/// Step 1: Adjust and Alignment

// Put the number with the larger exponent in a

if( a_original[30:23] < b_original[30:23] ) begin

a = b_original; b = a_original;

end else begin

a = a_original; b = b_original;

end

/// Break operand into exponent, and fraction.

aexp = a[30:23]; bexp = b[30:23];

afraction ={1'b1, a[22:0]}; //Inserting hidden 1

bfraction ={1'b1, b[22:0]}; //Inserting hidden 1

/// alignment so that aexp == bexp.

diff = aexp - bexp;

bfraction = bfraction >> diff;

/// Step 2: add fractions

c = 0;

{c,sumfraction} = afraction + bfraction;

/// Step 3: post normalize.

if( c ) begin

//shift right sumfraction with c and increment exponent.

// we can check exponent overflow here.

// if(aexp == 8'd255) exp_overflow = 1 // this is

// another way.

// step 4 is not

// needed in this case.

sumexp = aexp + 1;

sumfraction = {c,sumfraction[23:1]};//c is hidden 1 and

// it's value is 1

//so we should drop it.

end

else begin

sumexp = aexp;//sumfraction does not change

//sumfraction[23] is hidden 1 and

//it's value is 1

//so we should drop it.

end

/// step 4:check for exponent overflow

if(aexp ==8’d255 && c ==1) exp_overflow =1; //look, we are

// checking aexp instead of sumexp.

//if we check sumexp, it is too late.

end

endmodule

// :Example:

// Add IEEE 754 Single

// Floating Point Sequential adder. Computes the sum of two 32bit

// floating point numbers

// that consist of one sign bit and 8 bit biased exponents and 23 bit

// unsigned normalized fractions.

// for simplicity the two numbers are considered to be positive.

// the format for the number is :

// Format: SEEEEEEEEFFFFFFFFFFFFFFFFFFFFFFF

// 31: S: Sign bit:

// 30:23 E: Exponent:

// 22:0 F: Fraction:

module fp_add_seq(sum, exp_overflow, ready,a_original,b_original,start,clk);

input [31:0] a_original, b_original;

output [31:0] sum;

input start, clk;

output ready;

output exp_overflow;

reg exp_overflow;

reg [7:0] sumexp;

reg [23:0] sumfraction;

assign sum[31] = a_original[31];//sign bit always

//positive this case

assign sum[30:23] = sumexp;

assign sum[22:0] = sumfraction[22:0];//It shows dropping of

// hidden 1(or c at the below)

//which is always 1.

reg [31:0] a, b;

reg [23:0] afraction, bfraction;

reg [7:0] aexp, bexp;

reg c;

reg [7:0] diff;

parameter st_idle = 0;

parameter st_cyc_1 = 1;

parameter st_cyc_2 = 2;

parameter st_cyc_3 = 3;

reg [1:0] state;

initial state = st_idle;

assign ready = state == st_idle;

always @( posedge clk )

case( state )

st_idle:

if( start ) begin

/// Step 1: Alignment and Adjust.

// Put the number with the larger exponent in a

if( a_original[30:23] < b_original[30:23] ) begin

a = b_original; b = a_original;

end else begin

a = a_original; b = b_original;

end

/// Break operand into exponent, and fraction.

aexp = a[30:23]; bexp = b[30:23];

afraction = {1'b1, a[22:0]}; //Inserting hidden 1

bfraction = {1'b1, b[22:0]}; //Inserting hidden 1

/// alignment so that aexp == bexp.

diff = aexp - bexp;

bfraction = bfraction >> diff;

state = st_cyc_1;

end

st_cyc_1:

begin

/// Step 2: add fractions

c = 0;

{c,sumfraction} = afraction + bfraction;

state = st_cyc_2;

end

st_cyc_2:

begin

/// Step 3: post normalize.

if( c ) begin

//shift right fraction with c and increment exponent.

sumexp = aexp + 1;

sumfraction = {c,sumfraction[23:1]};//c is hidden 1 and

//it's value is 1

//so we should drop it.

end

else begin

sumexp = aexp;//sumfraction does not change

//sumfraction[23] is hidden 1 and it's value is 1

//so we should drop it.

end

state = st_cyc_3;

end

st_cyc_3:

/// step 4:check for exponent overflow

begin

if(aexp == 8’d255 && c == 1’b1) exp_overflow =1;

state = st_idle;

end

endcase

endmodule