## LSU EE 4720 -- Fall 2002 -- Computer Architecture
#
## Assembler Notes -- RISC ISAs: MIPS, DLX


## Under Construction
# 
# Time-stamp: <30 August 2002, 10:11:38 CDT, koppel@drop>

## Contents
#
# Major ISA Families
# Summary of MIPS and DLX Instructions
# MIPS and DLX Floating-Point Instructions


################################################################################
## Major ISA Families

## ISA Families

# There are many ISAs, with many characteristics.
#
# Two ISAs can be similar (MIPS, Alpha) or different (MIPS, IA-32).
#
# There are generally accepted families of ISAs.
#   ISAs in the same family are similar.
#   ISAs in the diffrent family are very different.
#
# Three families are described below.  
#   More details covered in a different set.
#
#   RISC:  Simple Design
#   CISC:  Powerful Instructions
#   VLIW:  Faster Multiple-Issue (covered later) Implementations.
#
#   The families above are mutually exclusive (an ISA can't be in more
#     than one).
#   There are additional families. (An ISA may not fit in to any of the three.)

## RISC
#
# Reduced Instruction Set Computing

 ## Goals
#
# Simple to write compilers for.
# Low-cost and fast implementations (based on 1980's technology).

 ## Current Status
#
# Dominant for technical workstations, servers, and other large computers.
# ISAs and implementations continue to be developed though momentum slowing.

 ## Characteristics
#
# All instructions are the same size, (usually 32 bits).
# Moderate number of registers.
# Only "load" and "store" instructions allowed to access memory.
#   (Arithmetic instructions cannot access memory.)
# Amount of work done by instructions balanced.

 ## Examples
#
# MIPS, SPARC, Alpha, PA-RISC, PowerPC
#
# This class will frequently use MIPS and SPARC.


## CISC
#
# Complex Instruction Set Computing

 ## Goals
#
# Provide powerful (do-everything) instructions. (1970s/1980s)

 ## Characteristics
#
# Instruction sizes vary.
# Moderate number of registers.
# Arithmetic and other instructions can access memory.

 ## Examples
#
# VAX
# Arguably: IA-32 (80x86,Pentium) 

 ## Current Status
#
# Little new development, except for IA-32
# Outperformed by RISC.



## VLIW
#
# Very-Large Instruction Word

 ## Goals
#
# Allow fast multiple issue implementations by handling
# instructions in bundles.

 ## Characteristics
#
# Instructions handles in groups (usually of 3) called /bundles/.
# Information about instruction relationships provided to hardware.

 ## Examples
#
# IA-64, Tera

 ## Current Status
#
# Used in special purpose applications, such as signal processing.
# Being introduced for general purpose use. (IA-64)


################################################################################
## ISAs Used in EE 4720


## MIPS
#
# Used in the Patterson & Hennessy
# An early and still popular RISC ISA.
# Covered in EE 3755

## DLX
#
# Used in the Hennessy & Patterson text.
# A simplified form of MIPS.

## SPARC
#
# Used in ECE Sun computers.

## Use in EE 4720

# Many ISAs will be used, some are briefly covered.
#
# Details, including implementations, given for MIPS and DLX.
#
# Emphasis this semester (Spring 2002) and later on MIPS.
# Older material uses DLX.


################################################################################
## MIPS and DLX

## Registers and Memory
#
# Both:  32 general-purpose registers (GPR),  32 floating-point registers.
#        GPR are 32 bits.
#        FP registers are 32 bits but can be used in pairs.
#        FP instructions can only access floating-point registers.
#
# MIPS:  Two 32-bit integer multiplication and division registers (hi/lo).
#
# Registers
#
#  DLX GPR:  r0 - r31.  Register r0 is always zero.
#  MIPS GPR: $0 - $31.  Register $0 is always zero.
#  MIPS GPR also have names.  $t0, $ra
#  MIPS:     $hi, $lo.  Used for product, quotient, and remainder.
#
#  DLX FPR:  f0 - f31.
#  MIPS FPR: $f0 - $f31
#
#
# Memory
#
#  Both: 32-bit address space.
#        Aligned Access
#
#  DLX:  Big Endian.
#  MIPS: Either. (Big endian used in class.)



################################################################################
## MIPS and DLX Instruction Coding

# :PH: 3.4
# :Mv1: 4.2


 ## The Three MIPS Instruction Formats
#
# R Format:  Typically used for three-register instructions.
# I Format:  Typically used for instructions requiring immediates.
# J Format:  Used for jump instructions (not covered yet).
#
# Every MIPS instruction is in one of these format.

 ## MIPS R Format
# _________________________________________________________________
# | opcode    | rs      | rt      | rd      | sa      | function  |
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#  3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
#  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#
# Bits    Field Name    Unabbreviated Name     Typical Use
#
# 31:26:  opcode                               First part of opcode.
# 25:21:  rs            (Register Source)      Source register one.
# 20:16:  rt            (Register Target)      Source register two.
# 15:11:  rd            (Register Destination) Destination register.
# 10:6:   sa            (Shift Amount)         Five-bit immediate.
#  5:0    function                             Second part of opcode.
#

 ## DLX Type-R Instruction
# _________________________________________________________________
# | opcode    | rs1     | rs2     | rd      | func                |
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#  0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
#  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
#
# Bits    Field Name    Typical Use
#
#  0: 5   opcode        First part of opcode.
#  6:10:  rs1           Source register one.
# 11:15:  rs2           Source register two.
# 16:20:  rd            Destination register.
# 21:31   function      Second part of opcode.
#

 ## MIPS I Format
# _________________________________________________________________
# | opcode    | rs      | rt      | immed                         |
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#  3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
#  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#
# Bits    Field Name    Unabbreviated Name     Typical Use
#
# 31:26:  opcode                               Entire opcode (for I and J).
# 25:21:  rs            (Register Source)      Source register one.
# 20:16:  rt            (Register Target)      Source register two.
# 15:0:   immed         (Immediate)            Immediate value.

 ## DLX Type I
# _________________________________________________________________
# | opcode    | rs1     | rd      | immed                         |
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#  0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
#  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
#
# Bits    Field Name    Typical Use
#
#  0: 5   opcode        First part of opcode.
#  6:10:  rs1           Source register one.
# 11:15:  rd            Destination register.
# 16:31   immed         Immediate

 ## MIPS J Format
# _________________________________________________________________
# | opcode    | ii                                                |
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#  3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
#  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#
# Bits    Field Name    Unabbreviated Name     Typical Use
#
# 31:26:  opcode                               Entire opcode (for I and J).
# 25:0:   ii            (Instruction Index)    Part of jump target.


## Basic Type-R Instructions

# DLX:  add, addu, sub, subu, and, or, xor
# MIPS: add, addu, sub, subu, and, or, xor

# The MIPS addu and subu instructions are not unsigned, DLX are.

        add $1, $2, $3  # MIPS
        add r1, r2, r3  # DLX


## Basic Type-I Instructions

# DLX: addi, subi, andi, ori, xori
# MIPS: addi, andi, ori, xori

        addi $t0, $t1, 5  # MIPS
        addi r2, r3, #5  DLX

# MIPS does not have a subi, DLX does.


## Load Upper

# Loads the upper 16-bits of a register with a constant.

# MIPS and DLX have different instruction names, but otherwise the same.

# MIPS: lui
# DLX:  lhi

        lui $1, 0x1234   # MIPS
        lhi r1, #0x1234  # DLX



## Shift Instructions

# MIPS: sllv, srlv, srav, sll,  srl,  sra
# DLX:  sll,  srl,  sra,  slli, srli, srai

# MIPS constant shift instructions use special sa field, DLX use immed field.

        sllv $1, $2, $3   # MIPS
        sll  r1, r2, r3   # DLX

        sll  $1, $2, 5    # MIPS
        slli r1, r2, #5        

## Load and Store Instructions

# DLX, MIPS: lb, lbu, lh, lhu, lw, sb, sh, sw

# MIPS and DLX very similar.

        lw $1, 16($2)   # MIPS
        lw r1, 16(r2)   # DLX

        sw $1, 16($t5)   # MIPS
        sw 16(r10), r1   # DLX


## Integer Branches

# MIPS: beq, bne, bgtz, bgez, bltz, blez
# DLX:  beq, bne

# MIPS:  Branches have delay slots.
#        Can compare registers.
# DLX:   No delay slots.
#        Can only test if a register is zero.

        # DLX
        sub r6, r3, r4
        beq r6, TARGET
        xor r5, r6, r7

        # MIPS
        beq $3, $4, TARGET
        nop
        xor $5, $6, $7


TARGET:
        add $1, $2, $3


## Jump

# MIPS: j, jr
# DLX:  j, jr

# MIPS: Delayed
# DLX: Not delayed.

# MIPS: Immediate is region.
# DLX:  Immediate is displacement.

# PC= 0x12345678

# ii 0x3ffffff

# PC=   0x12345678
# 4ii   0x0ffffffc
# targ  0x1ffffffc

        j TARGET
        nop

TARGET:


## Jump and Link Instructions

# MIPS and DLX:  jal, jalr

# Both: Register 31 holds return address (link) (by default)
# MIPS: Can specify return address register.

        jal $1, TARG
        nop

TARG:

################################################################################
## Floating Point Summary

 ## Separate Floating Point Registers
#
# A feature of many RISC ISAs.
# Eases implementation.

 ## MIPS Floating Point 
#
# Supports IEEE 754 Single and Double FP Numbers
#
# Floating point handled by co-processor 1, one of 4 co-processors.
#
# MIPS floating point registers also called co-processor 1 registers.
# MIPS floating point instructions called co-processor 1 instructions.
#
# Registers named f0-f31.
# Load, store, and move instructions have "c1" in their names.
# Arithmetic instructions use ".s" (single) or ".d" (double) , or ".w" (int)
#  /completers/ to indicate operand type.
#
 ## MIPS Co-Processors (Briefly)
#
# Each co-processor has a register set and instructions.
# Co-processor x abbreviated cpx.
#
# cp0: Used for virtual memory and exceptions (covered later).
# cp1: Used for floating point in MIPS32 (used in class).
# cp2: Reserved for custom implementations.
# cp3: Used for floating point in MIPS64.


 ## DLX Floating Point 
#
# Supports IEEE 754 Single and Double FP Numbers
#
# Storage for FP registers called the FP register file.
#
# Registers named f0-f31.
# Load, store, and move instructions have "fp" in their names.
# Arithmetic instructions use "f" (single) or "d" (double) 
#  /completers/ to indicate operand type.


 ## Types of Floating-Point Instructions
#
# Briefly here, in detail later.
#
#
 ## Arithmetic Operations
#
# MIPS: add.d $f0, $f2, $f4
# DLX:  addd  f0, f2, f4
#
#
 ## Load and Store
#
# MIPS: ldc1 $f0, 8($t0)
# DLX:  ld f0, 8(r1)
#
#
 ## Move Between Register Files (E.g., integer to FP)
#
# MIPS: mtcp1   $f0, $t0
# DLX:  movi2fp f0, r2
#
#
 ## Format Conversion
#
# Convert from one format to another, e.g., integer to double.
#
# MIPS: cvt.d.w  $f0, $f2
# DLX:  cvt.i2d  $f0, $f2
#
#
 ## Condition Code Setting
#
# Compare and set condition code.
#
# MIPS:  c.gt.d $f0, $f2
# DLX:   gtd    f0, f2
#
#
 ## Conditional Branch
#
# Branch on floating-point condition.
#
# MIPS: BC1F TARGET
# DLX:  BFPF TARGET


 ## MIPS FP Load and Store

        # Load word in to coprocessor 1
        lwc1 $f0, 4($t4)   #  $f0 = Mem[ $t4 + 4 ]

        # Load double in to coprocessor 1
        ldc1 $f0, 0($t4)   #  $f0 = Mem[ $t4 + 0 ];  $f1 = Mem[ $t4 + 4 ]

        # Store word from coprocessor 1.
        swc1 $f0, 4($t4)   #  $f0 = Mem[ $t4 + 4 ]

        # Store double from coprocessor 1.
        sdc1 $f0, 0($t4)   #  $f0 = Mem[ $t4 + 0 ];  $f1 = Mem[ $t4 + 4 ]

 ## DLX FP Load and Store

        # Load float (32 bit)
        lf f0, 0(r1)
        # Load double (64 bit)
        ld f0, 0(r1)


 ## MIPS Move Instructions

        # Move to coprocessor 1
        mtc1 $f0, $t0

        # Move from coprocessor 1.
        mfc1 $t0, $f0

 ## DLX Move

        # Move X to Y
        # X,Y: fp, i
        # X,Y: f,d
        movX2Y rd, rs

 ## MIPS Conversion

        # To: s, d, w;  From: s, d, w
        cvt.TO.FROM rd, rs

        cvt.d.w $f0, $f2

 ## DLX Conversion

        # X,Y: s, d, i
        cvtXtoY

 ## MIPS Condition Setting

        # Compare:   fs COND ft
        # COND: eq, gt, lt, le, ge
        # FMT: s, d
        c.COND.FMT fs, ft

        c.lt.d $f0, $f2

 ## DLX Condition Setting

        # Cond: gt, lt, eq, etc.
        # FMT: f, d
        <COND><FMT>

 ## MIPS FP Branch

        # Branch coprocessor 1 true.
        # Delayed branch.
        bc1t TARG

        bc1f TARG

 # DLX FP Branch

        bfpt TARG
        bfpf TARG

## Integer Multiplication and Division

 # Both: Not an ordinary integer arithmetic instruction*.
 # * MIPS I

 ## MIPS Multiplication
#
# Product goes in to lo and hi registers.
#
# To multiply integers:
#
# Multiply
# Move product from lo and hi (if necessary) to integer registers.

        mult $t0, $t1  # {hi,lo} = $t0 * $t1
        mflo $t2      # $t2 = $lo
        

 ## DLX Multiplication
#
# Integer multiplication uses fp regs.

        # r3 = r1 x r2

        movi2fp f0, r1
        movi2fp f1, r2
        mul f3, f0, f1
        movfp2i r3, f3