## LSU EE 4720 -- Spring 2010 -- Computer Architecture
#
## MIPS  Overview
# Time-stamp: <22 January 2010, 10:20:50 CST, koppel@ellipse>
## Note
#
# This set based on EE 3755 Spring 2002 sets 9, 10, and 12.
## Contents
#
# MIPS Background
# Machine Language Basics
# Machine Languages Covered in ECE Courses
# Instructions, Registers, Immediates, and Memory
# Basic Three-Register Instructions
# Other Basic Instructions
# Instruction Coding
# Miscellaneous Integer Arithmetic Instructions
# Pseudo Instructions
#
# Types of Control-Transfer Instructions
# A Simple Jump Instruction
# A Branch Instruction
# More Branch Instructions
# More Jump Instructions
# Procedure Call Example
# Typical CTI Uses
#
# Load Byte (Unsigned)
# Store Byte
# Load Byte
# Load Word, Store Word
# Load and Store Half
# Array Access Examples
# Histogram Program
## References
#
# :PH:  Patterson & Hennessy, "Computer Organization & Design"
# :Mv1: MIPS Technologies, "MIPS32 Architecture for Programmers Vol I: Intro"
# :Mv2: MIPS Technologies, "MIPS32 Architecture for Programmers Vol II: Instr"
## Objectives
#
# Understand MIPS Register and Memory Organization
# Understand MIPS Instructions
#   Read programs using instructions, write program using instructions.
# Understand MIPS Instruction Coding and Formats
#   Determine binary coding from assembly, and vice versa.
# Ability to Learn New Instructions from MIPS Documentation
# Write Simple Assembly Language Programs
################################################################################
## MIPS Background
 ## MIPS Features
#
#   Example of a RISC ISA, to be defined later.
#
#   ISA is relatively elegant, as are some implementations.
#
#   Once popular for engineering workstations ...
#   ... now popular for embedded applications such as portable devices.
################################################################################
## Machine Language Review
# :PH: 3
# :Def: Machine Language
#
# The language understood by the computer processor itself.
# An ISA describes the machine language, plus other details.
#
# :Def: Machine Code
#
# All or part of a machine language program.
# :Def: High-Level Language
#
# A computer programming language for humans.
#
# Examples: C, Basic, Java, Fortran.
#
# A program in a high-level language may be converted into machine
# language and then run.
#
# A high-level language can be converted into the machine language of
# (compiled for) many machines.
# :Def: Assembly Language
#
# A language for humans used to prepare machine code.
#
# Assembly language is human readable, machine language is not.
# (At least without great effort or rare talent.)
v#
# For each machine language there is usually one assembly language.
# (In principle there could be many assembly languages for each
#  machine language.)
#
# The machine language and corresponding assembly language are sometimes
# known by the same name, for example MIPS.
#
# Almost all examples will be in assembly language.
 ## A Short MIPS Assembly Language Program
#
#
        add $s1, $s2, $s3       # s1 = s2 + s3
        sub $s1, $s4, $s1       # s1 = s4 - s1
#
# Comments start with a #.
#
# $s1-$s4 are storage locations called /registers/.
#
# Each register holds 32 bits.
# The program again, showing initial, intermediate, and final register values.
#
#                         Initial:  s1 = 10,   s2 = 20,   s3 = 30,   s4 = 40
        add $s1, $s2, $s3
#                                   s1 = 50,   s2 = 20,   s3 = 30,   s4 = 40
        sub $s1, $s4, $s1
#                         Final:    s1 =-10,   s2 = 20,   s3 = 30,   s4 = 40
# The program again, in three forms:
#
#  High-Level (C)       Assembly (MIPS)        Machine (MIPS)
#  a = b + c;           add $s1, $s2, $s3      0x02538820
#  a = d - a;           sub $s1, $s4, $s1      0x02918822
#
# Note: the relationship between high-level code and assembly code
#       is rarely one machine instruction per high-level statement.
################################################################################
## Instructions, Registers, Immediates, and Memory
# :PH: 3.3
# :Mv1: 2.8.4, 2.8.5  MIPS register descriptions
 ## Description is of MIPS-I (MIPS 32)
 ## Where MIPS Instructions Get Data
#
# Immediates:  Data stored in instruction.
# Registers:   A small number of high-speed 32-bit storage locations.
# Memory:      A large number (2^32) of low-speed 8-bit storage locations.
        addi $a0, $a0, 1  # Gets data from register ($a0) and immed. (1)
        lbu $t1, 0($a0)   # Gets data from memory (address in $a0)
 ## What MIPS Instructions can Modify
#
# Registers
# Memory
        addi $a0, $a0, 1  # Modifies register, $a0.
        sb $t1,-1($a0)    # Modifies memory, address is $a0-1
 ## Registers
#
# Most registers are 32 bits. (64 bits in MIPS-64)
#
# General-Purpose Registers (GPRs,  a.k.a., Integer Registers)
#   $0-$31
#
# Special Registers (Not part of a set.)
#   PC:  Program Counter
#   $lo, $hi: Used for multiplication and division.
#
# Co-processor 0: Processor and system control.
# Co-processor 1: MIPS-32 floating-point
#      $f0-$f31: 
# Co-processor 2: Reserved for special-purpose designs.
# Co-processor 3: 64-bit floating-point
 ## General-Purpose Register (GPR) Names
#
# There are 32 (arguably 31) general purpose registers (GPRs).
#
# Each register holds 32 bits. (MIPS-32)
#
# Each has a name and a number.
#
#   Numbers range from 0 to 31.
#
#   Names indicate suggested use.
#
# Either names or numbers can be used in assembly language.
# In machine language only numbers are used.
#
# Ways to Reference GPR Number 1 (varies by assembler, textbook author, etc.)
#
#   By number:  1, $1, r1, R1
#
#   By name:    $at
# List of General-Purpose Registers. (Will be explained later.)
# See :PH: A-23
#
# The value of register 0 ($zero) is always zero.
#
# The value of the other registers is the last value written.
#
#
 ##      Names     Numbers   Suggested Usage
#      $zero:    0       The constant zero.
#      $at:      1       Reserved for assembler.
#      $v0-$v1:  2-3     Return value
#      $a0-$a3:  4-7     Argument
#      $t0-$t7:  8-15    Temporary (Not preserved by callee.)
#      $s0-$s7: 16-23    Saved by callee.
#      $t8-$t9: 24-25    Temporary (Not preserved by callee.)
#      $k0-$k1: 26-27    Reserved for kernel (operating system).
#      $gp      28       Global Pointer
#      $sp      29       Stack Pointer
#      $fp      30       Frame Pointer
#      $ra:     31       Return address.
# :Example:
#
# The two instructions below are identical, the first uses register
# names, the second uses register numbers.
        add $s1, $t2, $sp
        add $17, $10, $29
####
 ## Memory
#
# Memory consists of 2^32 locations. [MIPS-64 has 2^64 locations.]
#
# Each location stores 8 bits, called a character or a byte.
#
# Each location has an address, an integer ranging from 0 to 4294967295
#
# Memory access is usually slower than register access.
# :Example:
#
# Examples of instructions accessing memory.  The first loads
# something from memory into a register, the second stores something
# from a register to memory.  These will be covered in detail later.
	lw	$4, 0($19)
	sw	$31, 72($sp)
####
 ## Immediates
#
# An immediate is a constant value stored in the instruction itself.
#
# In MIPS immediate are usually 16 bits and can be interpreted as
# signed or unsigned numbers (depending on the instruction).
# :Example:
#
# An instruction using an immediate, in this case 100.
        addi $1, $2, 100
################################################################################
## Basic Three-Register Instructions
# :PH:  Throughout Chapter 3.
# :Mv2: A complete list of instructions.
# The three-register instructions each read two source registers and
# write one destination register.
 ## The Plain Old Add Instruction
#
# :Syntax: ADD rd, rs, rt
#          rd <- rs + rt
#
# How to read the syntax shown above:
#
#   The mnemonic (name) of the instruction is ADD.  A lower-case
#   version of the mnemonic (add) should be used in assembly language.
#
#   The instruction has three operands, rd, rs, rt.
#   These symbols refer to general-purpose register operands.
#   In assembly language they would be replaced by a register name or number.
#   (Machine language coding will be covered later.)
#
#   The line rd <- rs + rt indicates what the instructions does.
#
# ADD instruction assembly language examples:
        add $1, $2, $3
        add $s1, $t2, $v1
# The instruction below is NOT an add instruction because $rt is not a GPR.
# A "strict" assembler would NOT be able to convert that in to machine
# code.  (A "liberal" assembler would correct the mistake by replacing
# "add" with "addi".)
        add $1, $2, 100
# There is an instruction that can take the constant 100, "addi."
 ## Other Basic Three-Operand Instructions
#
# :Syntax: SUB rd, rs, rt
#          rd <- rs - rt
#
# :Syntax: AND rd, rs, rt
#          rd <- rs & rt
#
# :Syntax: OR rd, rs, rt
#          rd <- rs | rt
#
# :Syntax: NOR rd, rs, rt
#          rd <- ~( rs | rt )
#
# :Syntax: XOR rd, rs, rt
#          rd <- rs ^ rt
#
# :Syntax: SLT rd, rs, rt           # Set less than.
#          rd <- rs < rt ? 1 : 0;
#          Comparison as signed integers.
#          There's no SGT, which makes sense if you think about it.
#
# :Syntax: SLLV rd, rs, rt          # Shift Left Logical Variable
#          rd <- rs << rt[4:0]
#          Note: Only five low-order bits of rt are used (other bits ignored).
#
# :Syntax: SRLV rd, rs, rt          # Shift Right Logical Variable
#          rd <- rs >> rt[4:0]
#          Note: "Vacated" bit positions are zero.
#          Note: Only five low-order bits of rt are used (other bits ignored).
#
# :Syntax: SRAV rd, rs, rt          # Shift Right Arithmetic Variable
#          rd <- {rs[31],rs[31],..., rs >> rt[4:0] }
#          Note: Sign is extended: bit (rs[31]) placed in vacated positions.
#          Note: Only five low-order bits of rt are used, the rest are ignored.
# :Example:
#
# Simplified assembler for the following line of C:
#
#  x = a + b - ( c + d );
#
#  Asm regs: x, $8;  a, $9;  b, $10;  c, $11;  d, $12
        add $8, $9, $10    # a + b
        add $13, $11, $12  # c + d
        sub $8, $8, $13
####
# :Example:
#
# Use of register zero.
#
# Simplified assembler for the following line of C:
#
#  x = -a;
#  y = -1;
#
#  Asm regs: x, $8;  a, $9;  y, $10
#  Reminder: register $0 is always zero.
        sub $8, $0, $9
        nor $10, $0, $0
####
# :Example:
#
# Simplified assembler for the following line of C:
#
#  x = ( ir >> off ) & mask;
#
# Asm regs: x, $8;  ir, $9;  off, $10;  mask, $11.
        srlv $8, $9, $10
        and $8, $8, $11
####
# :Example:
#
# Simplified assembler for the following line of C:
#
#  x = a + b > ( c & mask | d );
#
# Asm regs: x, $8;  a, $9;  b, $10;  mask, $11;  c, $12;  d, $13
        add $20, $9, $10    # a + b
        and $21, $12, $11   # c & mask
        or $21, $21, $13    # c & mask | d
        slt $8, $21, $20    # >
################################################################################
## Other Basic Instructions
# :PH:  Throughout Chapter 3.
# :Mv2: A complete list of instructions.
# :Def: Immediate
#
# A constant stored in the instruction itself.
#
# In MIPS most immediates are 16 bits, including the ones in this section.
 ## Assembler Metasyntactic Symbols for Immediates
#
# Metasyntactic symbols are used in syntax descriptions.
#
# immed:  16-bit immediate, for general use.
# sa:     5-bit immediate (shift amount), used in shift instructions.
 ## The Plain-Old Addi Instruction
#
# :Syntax: ADDI rt, rs, immed            # Add Immediate
#          rt <- rs + sign_extend(immed)
#
# Notes on the syntax above.
#
#   The destination register is now called rt.
#
#   Symbol immed refers to a 16 bit value which is stored in the instruction.
#
#   For the ADDI instruction immed is interpreted as a signed integer.
 ## Some More Instructions
#
# The following are NOT MIPS32 instructions:
#   SUBI, NORI
#
# :Syntax: ANDI rt, rs, immed           # And Immediate
#          rt <- rs & immed
#
# :Syntax: ORI rt, rs, immed
#          rt <- rs | immed
#
# :Syntax: XORI rt, rs, immed
#          rt <- rs ^ immed
#
# :Syntax: SLTI rt, rs, immed           # Set Less Than Immediate
#          rt <- rs < sign_extend(immed) ? 1 : 0;
#          Comparison as signed integers, immed is sign-extended.
#          There's no SGT, which makes sense if you think about it.
#
# :Syntax: SLL rd, rt, sa               # Shift Left Logical
#          rd <- rt << sa
#
# :Syntax: SRL rd, rt, sa               # Shift Right Logical
#          rd <- rt >> sa
#          Note: "Vacated" bit positions are zero.
#
# :Syntax: SRA rd, rt, sa               # Shift Right Arithmetic
#          rd <- {rt[31],rt[31],..., rs >> sa }
#          Note: Sign is extended: bit (rt[31]) placed in vacated positions.
#
# :Syntax: LUI rt, immed                # Load Upper Immediate
#          rt <- {immed,16'b0}
# :Example:
#
# Simple examples to illustrate what some instructions do.
#
        # Assuming $9 = 23
        addi $8, $9, 1
        # $8 <- 24
        # Assuming $9 = 23
        addi $8, $9, -1
        # $8 <- 22
        # Assuming $9 = 23
        addi $8, $9, 0x12345  # ASSEMBLER SHOULD REJECT THIS INSTRUCTION.
        # No result because no such instruction exists.
        # Assuming $9 = 0x12345678
        andi $8, $9, 0xff     # Called a "mask" operation.
        # $8 <- 0x78
        # Assuming $9 = 23
        slti $8, $9, 20       # if( $9 < 20 ) $8 = 1; else $8 = 0;
        # $8 <- 0
        slti $8, $9, 30
        # $8 <- 1
        # Assuming $9 = 1
        sll $8, $9, 1
        # $8 <- 2
        sll $8, $9, 3
        # $8 <- 8
        # Assuming $9 = 16
        srl $8, $9, 1
        # $8 <- 8
        srl $8, $9, 3
        # $8 <- 2
        sra $8, $9, 1
        # $8 <- 8
        sra $8, $9, 3
        # $8 <- 2
        # Assuming $9 = -1 = 0xffffffff
        srl $8, $9, 1
        # $8 <- 2147483647 = 0x7fffffff
        srl $8, $9, 3
        # $8 <- 536870911 = 0x1fffffff
        sra $8, $9, 1
        # $8 <- -1 = 0xffffffff
        sra $8, $9, 3
        # $8 <- -1 = 0xffffffff
        # Assuming $9 = -7 = 0xfffffff9
        srl $8, $9, 1
        # $8 <- 2147483644 = 0x7ffffffc
        srl $8, $9, 3
        # $8 <- 536870911 = 0x1fffffff
        sra $8, $9, 1
        # $8 <- -4 = 0xfffffffc
        lui $8, 0x1234
        # $8 <- 0x12340000
 ## Uses for Immediates
#
# Register Initialization
# Constants
# :Example:
#
# Register initialization examples.
#
# Simplified assembler for the following line of C:
#
#  a = 23;
#  b = 105;
#  c = 0x1234a678;  // Sixteen bits not enough.
#  d = 0;
#
# Asm regs: a, $8;  b, $9;  c, $10;  d, $11
        addi $8, $0, 23         # a = 23
        addi $9, $0, 105        # b = 105
        lui  $10, 0x1234        # c = 0x12340000 (so far)
        ori  $10, $10, 0xa678   # c = 0x1234a678
        add  $11, $0, $0        # d = 0 (lots of ways to do this)
#
####
# :Example:
#
# Simplified assembler for the following line of C:
#
#  rs = ( ir >> 21 ) & 0x1f;
#
# Asm regs: rs, $8;  ir, $9.
        srl $8, $9, 21
        andi $8, $8, 0x1f
####
################################################################################
## Instruction Coding
# :PH: 3.4
# :Mv1: 4.2
 ## The Three MIPS Instruction Formats
#
# R Format:  Typically used for three-register instructions.
# I Format:  Typically used for instructions requiring immediates.
# J Format:  Used for jump instructions (not covered yet).
#
# Every integer MIPS instruction is in one of these formats.
 ## R Format
# _________________________________________________________________
# | opcode    | rs      | rt      | rd      | sa      | function  |
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#  3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
#  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#
# Bits    Field Name    Unabbreviated Name     Typical Use
#
# 31:26:  opcode                               First part of opcode.
# 25:21:  rs            (Register Source)      Source register one.
# 20:16:  rt            (Register Target)      Source register two.
# 15:11:  rd            (Register Destination) Destination register.
# 10:6:   sa            (Shift Amount)         Five-bit immediate.
#  5:0    function                             Second part of opcode.
#
 ## I Format
# _________________________________________________________________
# | opcode    | rs      | rt      | immed                         |
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#  3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
#  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#
# Bits    Field Name    Unabbreviated Name     Typical Use
#
# 31:26:  opcode                               Entire opcode (for I and J).
# 25:21:  rs            (Register Source)      Source register one.
# 20:16:  rt            (Register Target)      Source register two.
# 15:0:   immed         (Immediate)            Immediate value.
 ## J Format
# _________________________________________________________________
# | opcode    | ii                                                |
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#
# Bits    Field Name    Unabbreviated Name     Typical Use
#
# 31:26:  opcode                               Entire opcode (for I and J).
# 25:0:   ii            (Instruction Index)    Part of jump target.
 ## Field Values
#
# Values for fields can be found in the following places:
#    :PH:  Page A-55 on (2nd edition) Listed by instruction.
#    :Mv2: Listed by instruction. "Official" MIPS documentation.
#
# opcode, function
#
#  Determined by the instruction.
#  For R-format instructions opcode is limited to a few values: e.g., 0, 1.
#  The function field takes on a larger range of values.
#
#  Values for opcode and function can be found in the following places:
#    :PH:  Figure A.19 (page A-54)  Compact, but perhaps intimidating at first.
#    :PH:  Page A-55 on (2nd edition) Listed by instruction.
#    :Mv2: Listed by instruction. "Official" MIPS documentation.
#
#
# rs, rt, rd
#
#  Usually register numbers represented using 5-bit unsigned integers.
#  Occasionally used as part of opcode or to specify something other
#    than a register.
#
#
# sa (Shift Amount)
#
#  A 5-bit constant usually used by shift instructions.
#
#
# immed
#
#  A 16-bit quantity.
#
#
# ii (Instruction Index)
#
#  A 26-bit quantity used to form a jump target address.
 ## R Format Examples
# _________________________________________________________________
# | opcode    | rs      | rt      | rd      | sa      | function  |
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#
# :Syntax: ADD rd, rs, rt
        add $2, $3, $4
#
# Based on the instruction above:
#   rd -> 2,  rs -> 3,  rt -> 4
#
# Using something in :PH: Appendix A or looking up the instruction in :Mv2:
#   opcode -> 0,  function -> 0x20,  sa -> 0
#
# Machine code for the instruction above:
#     0 3 4 2 0 0x20                      (By field, each field in hexadecimal)
#   = 000000 00011 00100 00010 00000 100000    (By field, binary)
#   = 0000 0000 0110 0100 0001 0000 0010 0000  (Groups of four bits)
#   = 0x00641020                               (Hexadecimal)
#
#
# :Syntax: SLL rd, rt, sa               # Shift Left Logical
        sll $2, $3, 4
# Based on the instruction above:
#   rd -> 2,  rt -> 3,  sa -> 4
#
# Using something in :PH: Appendix A or looking up the instruction in :Mv2:
#   opcode -> 0,  function -> 0,  rs -> 0
#
# Machine code for the instruction above:
#     0 0 3 2 4 0                      (By field, each field in hexadecimal)
#   = 000000 00000 00011 00010 00100 000000    (By field, binary)
#   = 0000 0000 0000 0011 0001 0001 0000 0000  (Groups of four bits)
#   = 0x00031100                               (Hexadecimal)
#
 ## I Format Examples
# _________________________________________________________________
# | opcode    | rs      | rt      | immed                         |
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#
# :Syntax: ORI rt, rs, immed
        ori $2, $3, 4
#
# Based on the instruction above:
#   rt -> 2,  rs -> 3,  immed -> 4
#
# Using something in :PH: Appendix A or looking up the instruction in :Mv2:
#   opcode -> 13
#
# Machine code for the instruction above:
#     0xd 3 2 4                      (By field, each field in hexadecimal)
#   = 001101 00011 00010 0000000000000100     (By field, binary)
#   = 0011 0100 0110 0010 0000 0000 0000 0100 (Groups of four bits.)
#   = 0x34620004                              (Hexadecimal)
#
################################################################################
## Pseudo Instructions
# :PH: 3.9 (Briefly under assembler heading.)
# A.K.A. Synthetic Instructions
# :Def: Pseudo Instruction
#
# An assembly language instruction that does not (necessarily)
# correspond to a machine language instruction.  The assembler will
# substitute one or two real machine instructions. Pseudo instructions
# are intended to make assembler code more readable and to support
# future ISA extensions.  (Some pseudo instructions may become real
# instructions.)
#
# When these notes are viewed in HTML or with the class Emacs package,
# pseudo instructions (those recognized by the SPIM simulator) will be
# italicized.  See examples.
 ## Some Pseudo Instructions
#
# This is not a complete list of pseudo instructions.
#
# (Note: the examples show both pseudo and real instructions.  In
#  actual assembly code you would use one or the other.)
#
# :Syntax: NOP                          # No Operation
#
#  A special NOP instruction is not needed because many instructions
#  will do nothing if register zero is used as the destination.
#  Instruction sll is the "official" NOP because its coding is all
#  zeros.
#
        nop               # Pseudo
        sll $0, $0, 0     # Real.
#
# :Syntax: MOVE rd, rs                  # Move
#          rd <- rs
#
        move $1, $2       # Pseudo
        addu $1, $0, $2   # Real
#
# :Syntax: LA rd, imm32                 # Load Address
# :Syntax: LI rd, imm32                 # Load Integer
#          rd <- imm32
#          Note: SPIM resolves la and li into the same instructions.
#          (That is, they do the same thing.  Use the one that better
#           conveys intent to the human assembly code reader.)
#
#
        li $2, 0x12345678   # Pseudo
        lui $1, 0x1234      # Real (First of two.)
        ori $2, $1, 0x5678  # Real (Second of two.)
        li $2, 0x12         # Pseudo
        ori $2, $0, 0x12    # Real (Assembler realizes lui not needed.)
################################################################################
## Types of Control-Transfer Instructions
# :PH:  Material covered in a different order.
# :Mv1: 4.1.3 (Brief overview.)
# :Def: Control-Transfer Instruction (CTI)
#
# An instruction that changes PC, conditionally or unconditionally.
# Types: branch, jump, trap, etc.; all are CTIs.
# :Def: CTI Target  Instruction (Branch Target, Jump Target, etc.)
#
# The instruction the CTI takes execution to if CTI taken.
# If a conditional branch is not taken, execution doesn't go to target.
 ## ISA Goals
#
# Efficiently implement common code sequences:
#
#  if/then/else
#  loops
#  call/return
#  switch/case
#  function pointers
#  operating system access
 ## Implementation Goals
#
# CTI performance can determine overall performance.
# Will see difficulties when we look at implementations.
 ## Types of Control Transfers in MIPS
#
# Branch (beq, etc.)
#   Instruction specifies a condition and an address, the target.
#   Used for loops, ifs, etc.  Most commonly used CTI.
#   If condition is true continue execution at target.
#
# Branch-and-Link (Not in MIPS-I)
#   Instruction specifies a condition and an address, the target.
#   Used for procedure calls.
#   If condition is true save PC in $31 and continue execution at target.
#
# Direct Jump (j)
#   Always taken, instruction specifies a fixed target.
#   Used for part of if/else, loops, etc.
# Indirect Jump (jr)
#   Always taken, but target address is in a register.
#   Used for returns, switch, function pointers, etc.
#
# Jump-and-Link (jal, jalr)
#   Always taken, instruction saves a return address in a register.
#   Target can be in instruction (jal) or register (jalr).
#   Used for procedure calls and returns and other purposes.
#
# Trap (Not covered in detail in this set.)
#   Conditional, (similar to branch). 
#   If condition true continue at specified entry in OS-prepared trap table.
#   Used for OS Code.
#
 ## Other ISAs' CTIs
#
# Similar types of instructions but names can vary.
 ## Conditions and Targets
#
# The methods used to specify conditions and targets vary by instruction.
# See descriptions in following sections
 ## Delayed CTI    <- PAY ATTENTION TO THIS.
#
# All of the CTIs covered here are delayed (except traps).
#
# The instruction after an ordinary delayed CTI IS ALWAYS EXECUTED.
#
        add $8, $0, $0
        j FORJOY
        addi $8, $8, 1
        addi $8, $8, 2
FORJOY:
        addi $8, $8, 4
        addi $8, $8, 8
################################################################################
## A Simple Jump Instruction
# :PH: 3.5
# :Mv2:
# Simple jump (j) covered here, others (jr,jal,jalr) covered in a
# later selection.
# Assembler :Syntax: J target
# Coding    :Syntax: J ii
#          After next instruction go to { PC[31:28], ii, 2'b0 },
#          where ii is bits 27:2 of target.
#
# In assembly language "target" is the line label for the target.
# In machine language "target", the ii field, is bits 27:2 of the
#   target address.
# Bits 31:28 of the target address must match bits 31:28 of the jump
#   instruction's address. (If not, use the jr instruction.)
# :Example:
#
# Code used to illustrate how target is determined.
FIRSTLINE:  # Address of add: 0x400000
        add $8, $0, $0
        j FORJOY
        addi $8, $8, 1
        addi $8, $8, 2
FORJOY:     # Address of addi: 0x400010
        addi $8, $8, 4
        addi $8, $8, 8
# In the code above:
#
#   FORJOY is a line label, part of the assembly language.
#   Suppose the assembler [really the linker] sets FORJOY = 0x400010
################################################################################
## A Branch Instruction
# :PH: 3.5
# :Mv2:
# One branch instruction covered here (bne), others (beq, bltz, etc.)
# covered in a later section.
# Assembler :Syntax: BNE rs, rt, target    # Branch Not Equal
# Coding    :Syntax: BNE rs, rt, offset
#           if rs != rt, after next instruction
#                        go to PC + 4 + 4 * sign_extend(offset)
#
# "target" is the line label to continue at.
# "offset" is the number of instructions to skip, starting at
#   the instruction after the branch.
# :Example:
#
# Code to illustrate the offset.
FIRSTLINE:
        add $9, $0, $0
        addi $10, $9, 1
        bne $9, $10, LINE
        addi $8, $8, 1      # Offset computed from this instruction.
        addi $8, $8, 2
LINE:
        addi $8, $8, 4      # The branch target, offset = 2 instructions.
        addi $8, $8, 8
# :Example:
#
# Use of a branch in a loop.
        add $8, $0, $0   # $8 -> 0
        addi $9, $0, 10  # $9 -> 10
        add $10, $0, $0  # $10 -> 0 (i)
LOOP:
        add $8, $8, $10  # $8 = $8 + i
        addi $9, $9, -1
        bne $9, $0, LOOP
        addi $10, $10, 1 # i = i + 1
################################################################################
## More Branch Instructions
# :PH: 3.5
# :Mv2:
# Assembler :Syntax: BEQ rs, rt, target    # Branch Equal
# Coding    :Syntax: BEQ rs, rt, offset
#           if rs == rt after next instruction
#                       go to PC + 4 + 4 * sign_extend(offset)
#
# Assembler :Syntax: BGEZ rs, target    # Branch >= Zero
# Coding    :Syntax: BGEZ rs, offset
#           if rs >= 0 after next instruction
#                      go to PC + 4 + 4 * sign_extend(offset)
#
# Assembler :Syntax: BGTZ rs, target    # Branch > Zero
# Coding    :Syntax: BGTZ rs, offset
#           if rs > 0 after next instruction
#                     go to PC + 4 + 4 * sign_extend(offset)
#
# Assembler :Syntax: BLEZ rs, target    # Branch <= Zero
# Coding    :Syntax: BLEZ rs, offset
#           if rs <= 0 after next instruction
#                      go to PC + 4 + 4 * sign_extend(offset)
#
# Assembler :Syntax: BLTZ rs, target    # Branch < Zero
# Coding    :Syntax: BLTZ rs, offset
#           if rs < 0 after next instruction
#                     go to PC + 4 + 4 * sign_extend(offset)
################################################################################
## More Jump Instructions
# :PH: 3.5
# :Mv2:
# Assembler :Syntax: JAL target   # Jump and link.
# Coding    :Syntax: JAL ii
#          $31 <- PC + 8
#          After next instruction go to { PC[31:28], ii, 2'b0 },
#          where ii is bits 27:2 of target.
#          Note: $31 a.k.a. $ra
#
# :Syntax: JR rs                  # Jump register.
#          After next instruction go to address in rs.
#
# :Syntax: JALR rs, rt            # Jump and link register.
#          $rt <- PC + 8
#          After next instruction go to address in rs
#          Note: $31 a.k.a. $ra
#
# :Example:
#
# Use of j instruction.
        # Jump to LINEX
        j LINEX
        nop
        nop
LINEX:
        addi $t2, $t2, 1
# :Example:
#
# Use of jal instruction.
LINEA:  # LINEA = 0x12345678
        # Instruction below is stored at (PC=) 0x12345678 
        # Set $ra -> PC + 8 = 0x12345680 and jump to LINEX
        jal LINEX
        nop
        nop
LINEX:
        addi $t2, $t2, 1
# :Example:
#
# Use of jr instruction.
        # Load the address of a line into $to.
        # Without using pseudo: la $t0, LINEX
        lui $t0, hi(LINEX)
        ori $t0, $t0, lo(LINEX)
        # Jump to the line.
        jr $t0
        nop
        nop
LINEX:
        addi $t2, $t2, 1
# :Example:
#
# Use of jalr instruction.
LINEA:  # LINEA = 0x12345678
        la $t0, LINEX   # Note: this resolves to two instructions.
        # Instruction below is stored at (PC=) 0x12345680
        # Set $ra -> PC + 8 = 0x12345688 and jump to LINEX
        jalr $t0
        nop
        nop
LINEX:
        addi $t2, $t2, 1
################################################################################
## Procedure Call Example
# :PH: 3.6
# :Example:
#
# Use of jal, jr, and jalr instructions to call and return from
# a procedure.
#
# The called procedure, times_three, puts 3 times $a0 in $v0.
#
# The procedure is called correctly three times, after which the code
# goes into an infinite loop.
#
# For clarity CTI delay slots are filled with nops.  In many examples
# and in assignments they should be filled, where possible, with
# useful instructions.
        .globl __start
__start:
        addi $a0, $0, 12     # Set value to be tripled.
        jal times_three      # Jump and save PC + 8 in $ra ($31)
        nop
        addi $a0, $0, 3755   # Returns here after jal above.
        jal times_three      # Call times_three again.
        nop
        nop                  # Returns here after jal above.
        la $s1, times_three  # Store address of times_three in $s1
        addi $a0, $0, 2002   # Value to triple.
        jalr $s1             # Call times_three again, this time using jalr.
        nop
        addi $a0, $0, 22     # 
        jr $s1               # THE WRONG WAY TO CALL A PROCEDURE!!
        nop
        addi $a0, $0, 0      # Never reached.
times_three:
        sll $v0, $a0, 1     # Start of times_three.
        add $v0, $v0, $a0
        jr $ra              # Jump using return address.
        nop
################################################################################
## Typical CTI Uses
# :PH: 3.5
# Branches (bne, beq, bltz, etc.)
#   if/else
#   Loops
#
# Regular Jump (j)
#   if/else, loops (along with branches)
#
# Jump and Link: (jal, jalr)
#   Procedure calls.
#
# Jump Register
#   Procedure returns.
#   switch statements.
 ## CTIs for if/else Statement
#
#  - Use branch and jump.
#  - Distances small.
#  - Very frequent.
# :Example:
#
# Typical if/else statement.
#
#   if ( a + b > c )           # Condition
#     {
#       d = d >> 1;   e++;     # If Part
#     } 
#   else
#     { 
#       d = d << 1;   e--;     # Else Part
#     }
#
# i++;
#
# Reg Usage: a, $8;  b, $9;  c, $10;  d, $11;  e, $12;  i, $13
        ## Condition
        #
        # Compute condition.
        #
        add $16, $8, $9     # $16 = a + b
        slt $17, $10, $16   # $17 = c < a + b
        #
        # Branch based on condition.
        #
        beq $17, $0, ELSEPART
        nop
        ## If Part
        #
        sra $11, $11, 1     # d = d >> 1
        addi $12, $12, 1    # e++;
        j LINEX
        nop
        ## Else Part
        #
ELSEPART:
        sll $11, $11, 1     # d = d << 1
        addi $12, $12, -1   # e--
        # Done with the if/else
        
LINEX:
        addi $13, $13, 1    # i++
 ## CTIs for Loop
#
# - Branch and jump.
# - Distances small to medium
# - Frequently encountered.
# :Example:
#
# Typical for loop.
#
# for( i = a - 5;       i < b + c;   i = i + d )   x = x ^ ( x >> 1 );
#      Initialization   Test         Increment     Body
#
# Reg Usage: a, $8;  b, $9;  c, $10;  d, $11;  x, $12;  i, $13
         ## Initialization
        #
        addi $13, $8, -5    # i = a - 5
        j TEST
        nop
LOOP:
        ## Body
        #
        sra $16, $12, 1     # $16 = x >> 1
        xor $12, $12, $16   # x = x ^ $16
        
        ## Increment
        #
        add $13, $13, $11   # i = i + d
        ## Test
TEST:
        add $16, $9, $10  # $16 = b + c  Note: Can be moved out of loop.
        slt $17, $13, $16 # i < $16
        bne $17, $0, LOOP
        nop
 ## CTIs for Switch
#
# - Direct and Indirect Jump.
# - Distances small to medium.
# - Occasionally encountered.
# :Example:
#
# Switch statement
#
# Indirect jump, jr used to jump to correct case.
#
# If cases are dense (consecutive or almost consecutive) compiler will
# create a dispatch table of case statement addresses.
#
# switch ( t1 ) {
#  case 1: t0++; break;
#  case 2: t0--; break;
#  case 3: t0=>1; break;
# }
# s2 = t0 & s3;
        .data
 ## Dispatch table, holding address of case statements.
DTABLE:
        .word CASE0
        .word CASE1
        .word CASE2
        .word CASE3
        .text
        la $t5, DTABLE
        sll $t6, $t1, 2
        add $t6, $t6, $t5    #  Compute dispatch table address.
        lw $t7, 0($t6)       #  Load address of case statement. 
        jr $t7               #  Use jr to jump to case statement.
        nop
CASE1:
        j ENDCASE
        addi $t0, $t0, 1
CASE2:
        j ENDCASE
        addi $t0, $t0, -1
CASE3:
        j ENDCASE
        srl $t0, $t0, 1
ENDCASE:
        and $s2, $t0, $s3
################################################################################
## Load Byte (Unsigned)
# :Syntax: LBU rt, offset(rs)                  # Load Byte Unsigned
#          rt <- { 24'b0, Mem[ rs + offset ] }
#          Note: rs and rt are registers.
#          Offset is a 16-bit immediate.
#
# :Example:
#
# Simple uses of lbu.
        # Initially: $a2 -> 0x1000
        # At Mem[ 0x1004 ] = 0x12
        #
        lbu $a0, 4($a2)
        #
        # Effective address is 4 + 0x1000 = 0x1004.
        # $a0 loaded with 0x12.
        addi $a2, $a2, 4   # $a2 -> 0x1004
        lbu $a0, 0($a2)
        #
        # Effective address is 0 + 0x1004 = 0x1004 (same as above.)
        # $a0 loaded with 0x12 (again)
####
# :Example:
#
# Procedure to determine the length of a C-style string.  Includes
# code to call the procedure.
        .data
str:
        .asciiz "I Love NO"
msg:
        .asciiz "The length of string \n   \"%/a1/s\"\nis %/v1/d.\n"
        .text
        .globl __start
__start:
        la $a0, str             # Load address of string.
        jal strlen              # Call strlen procedure.
        nop
        addi $a1, $a0, 0        # Move address of string to $a1
        addi $v1, $v0, 0        # Move length of string to $v1
        addi $v0, $0, 11        # System call code for message.
        la $a0, msg             # Address of message.
        syscall
        addi $v0, $0, 10        # System call code for exit.
        syscall
strlen:
        ## Register Usage
        #
        # $a0: Address of first character of string.
        # $v0: Return value, the length of the string.
        #
        # $t0: Character being examined.
        # $t1: Address of current character being examined.
        #
        addi $t1, $a0, 0
LOOP:
        lbu $t0, 0($t1)
        addi $t1, $t1, 1
        bne $t0, $0, LOOP
        nop
        addi $t1, $t1, -1
        jr $ra
        sub $v0, $t1, $a0
####
################################################################################
## Store Byte
# :Syntax: SB rt, offset(rs)                  # Store Byte
#          Mem[ rs + offset ] = rt[7:0]
# :Example:
#
# Simple uses of sb.
        # Initially: $a2 -> 0x1004
        # At Mem[ 0x1004 ] = 0
        #
        addi $t0, $0, 0x14    # t0 -> 0x14
        sb $t0, 0($a2)
        #
        # Effective address is 0 + 0x1004 = 0x1004
        # 0x14 written to Mem[ 0x1004 ]
        addi $t0, $0, 0x1234   # t0 -> 0x1234
        sb $t0, 0($a2)
        #
        # Effective address is 0 + 0x1004 = 0x1004
        # 0x34 written to Mem[ 0x1004 ]
        # Note that only bits 7:0 of $t0 written to memory.
####
# :Example:
#
# Program to convert a C-style string to upper case.
        .data
str:
        .asciiz "I Love NO"
before:
        .asciiz "Before: %/s1/s\n"
after:
        .asciiz "After:  %/s1/s\n"
        .text
        .globl __start
__start:
        la $s1, str
        la $a0, before
        addi $v0, $0, 11
        syscall
        jal upper
        add $a0, $s1, $0
        la $a0, after
        addi $v0, $0, 11
        syscall
        li $v0, 10
        syscall
upper:
        ## Register Usage
        #
        # $a0: (Call) Address of string to convert.
        #
        # $a0: Address of character being examined.
        # $t1: Character being examined.
        # $t2: Comparison result.
LOOP:
        lbu $t1, 0($a0)
        addi $a0, $a0, 1
        beq $t1, $0, DONE
        slti $t2, $t1, 97 # < 'a'
        bne $t2, $0 LOOP
        slti $t2, $t1, 123 # 'z' + 1
        beq $t2, $0, LOOP
        addi $t1, $t1, -32
        j LOOP
        sb $t1,-1($a0)
DONE:
        jr $ra
        nop
####
################################################################################
## Load Byte
# :Syntax: LB rt, offset(rs)                  # Load Byte
#          rt <- sign_extend( Mem[ rs + offset ] );
#          Note: rs and rt are registers.
#          Offset is a 16-bit immediate.
# :Example:
#
#
        # Initially: $a2 -> 0x1000
        # At Mem[ 0x1000 ] = 0x12
        # At Mem[ 0x1001 ] = 0x7f
        # At Mem[ 0x1002 ] = 0x80
        # At Mem[ 0x1003 ] = 0xff
        #
        lb $t0, 0($a2)  # $t0 -> 0x12        18
        lb $t1, 1($a2)  # $t1 -> 0x7f        127
        lb $t2, 2($a2)  # $t2 -> 0xffffff80  (-128)
        lb $t3, 3($a2)  # $t3 -> 0xffffffff  (-1)
####
 ## Usage
#
# lbu:  Characters, unsigned integers.
# lb:   Signed integers.
################################################################################
## Load Word, Store Word
 ## Note
#
# Each memory location holds 8 bits.
# Register size: 32 bits.
#
# To load a register use lw (load word).
# Loads for consecutive bytes into a register.
# :Syntax: LW rt, offset(rs)
#          rt <- { Mem[ rs + offset + 0 ], Mem[ rs + offset + 1 ],
#                  Mem[ rs + offset + 2 ], Mem[ rs + offset + 3 ]  }
#          Load register rt with four bytes of memory starting at address
#          rs + offset.
#          Address rs + offset must be a multiple of 4.
# :Example:
#
        # Assume: $a2 -> 0x1004
        # At Mem[ 0x1004 ] = 0x00003755
        # More precisely:  (Big Endian)
        #   Mem[ 0x1004 ] = 0x00
        #   Mem[ 0x1005 ] = 0x00
        #   Mem[ 0x1006 ] = 0x37
        #   Mem[ 0x1007 ] = 0x55
        #
        lw $a0, 0($a2)
        # 0x3755 loaded into $a0
        lw $a0, 2($a2)  # Error
        # Effective address = 0x1006, not a multiple of 4
        addi $a2, $a2, -2
        lw $a0, 2($a2)  # No problem
        # Effective address = 0x1004, is a multiple of 4
# :Example:
#
        .data
my_word:
        .word 123
        .word 456
        .text
        la $t0, my_word
        lw $t1, 0($t0)    # t1 <- 123
        lw $t2, 4($t0)    # t2 <- 456
        addi $t0, $t0, 4
        lw $t3, 0($t0)    # t3 <- 456  (Another way to load 456)
        addi $t0, $t0, 4
        lw $t4, -4($t0)   # t4 <- 456  (And another way to load 456)
# Error, instruction will never finish because address not a multiple of 4.
        lw $t9, 1($t0)
####
# :Syntax: SW rt, offset(rs)
#          Mem[ rs + offset ] = rt[31:24]
#          Mem[ rs + offset + 1 ] = rt[23:16]
#          Mem[ rs + offset + 2 ] = rt[15:8]
#          Mem[ rs + offset + 3 ] = rt[7:0]
#          Write memory starting at address offset + rs with contents of rt.
#          Effective address, rs + offset, must be a multiple of 4.
# :Example:
#
        # Assume: $a2 -> 0x1004
        # At Mem[ 0x1004 ] = 0x00003755
        # More precisely:  (Big Endian)
        #   Mem[ 0x1004 ] = 0x00
        #   Mem[ 0x1005 ] = 0x00
        #   Mem[ 0x1006 ] = 0x00
        #   Mem[ 0x1007 ] = 0x00
        #
        lui $a0, 0x1234
        ori $a0, $a0, 0x5678  # $a0 -> 0x12345678
        sw $a0, 0($a2)
        #   Mem[ 0x1004 ] = 0x12
        #   Mem[ 0x1005 ] = 0x34
        #   Mem[ 0x1006 ] = 0x56
        #   Mem[ 0x1007 ] = 0x78
        lbu $t0, 0($a2)  # $t0 -> 12
        lbu $t1, 1($a2)  # $t1 -> 34
        lbu $t2, 2($a2)  # $t2 -> 56
        lbu $t3, 3($a2)  # $t3 -> 78
        lw $a0, 2($a2)  # Error
        # Effective address = 0x1006, not a multiple of 4
        addi $a2, $a2, -2
        lw $a0, 2($a2)  # No problem
        # Effective address = 0x1004, is a multiple of 4
####
################################################################################
## Load and Store Half
# :Syntax: LH rt, offset(rs)                    # Load Half
#          rt <- sign_extend( { Mem[ rs + offset ], Mem[ rs + offset + 1] } )
#          Load register rt with two bytes of memory starting at address
#          rs + offset.
#          Address rs + offset must be a multiple of 2.
#
# :Syntax: LHU rt, offset(rs)                   # Load Half Unsigned
#          rt <- { 16'b0, Mem[ rs + offset ], Mem[ rs + offset + 1] }
#          Load register rt with two bytes of memory starting at address
#          rs + offset.
#          Address rs + offset must be a multiple of 2.
#
# :Syntax: SH rt, offset(rs)                    # Store Half
#          Mem[ rs + offset + 0 ] = rt[15:8]
#          Mem[ rs + offset + 1 ] = rt[7:0]
#          Effective address, rs + offset, must be a multiple of 2.
################################################################################
## Array Access Examples
# Given the base address of an array (address of first element, say &a[0]),
# need to compute the address of an element at index i, (say, &a[i]).
# To do this compute:
#
#  English: Address of i'th element of a = address of a + i * size of element.
# :Example:
#
# Array accesses.  In most examples i is the index (the number of the
# element to load).  Note that i must be multiplied by the size of the
# element before adding it on to the address of the first element.
#
# Registers:  a, s1;  b, s5;  s, s2;  us, s3;  c, s4;  i, t0;  x, t1
        # char *c; ...      # $s4 = c;  $t0 = i
        # x = c[i];
        #
        add $t5, $s4, $t0   # $t5 -> &c[i]  (Address of c[i].)
        lb $t1, 0($t5)      # x = c[i];   $t1 -> c[i]
        # char *c; ...      # $s4 = c;  $t0 = i
        # x = c[i+1] + c[i+2];
        #
        add $t5, $s4, $t0   # $t5 -> &c[i]  (Address of c[i].)
        lb $t6, 1($t5)      # $t6 -> c[i+1]
        lb $t7, 2($t5)      # $t7 -> c[i+2]
        add $t1, $t6, $t7   # x = c[i+1] + c[i+2]
        # int *a; ...       # $s1 = a;  $t0 = i
        # x = a[i];
        sll $t5, $t0, 2     # $t5 -> i * 4;  Each element is four characters.
        add $t5, $s1, $t5   # $t5 -> &a[i]  (Address of a[i].)
        lw $t1, 0($t5)      # x = a[i];   $t1 -> a[i]
        # int *a; ...       # $s1 = a;  $t0 = i
        # x = a[i+1] + a[i+2];
        #
        sll $t5, $t0, 2     # $t5 -> i * 4;  Each element is four characters.
        add $t5, $s1, $t5   # $t5 -> &a[i]  (Address of a[i].)
        lw $t6, 4($t5)      # $t6 -> a[i+1]
        lw $t7, 8($t5)      # $t7 -> a[i+2]
        add $t1, $t6, $t7   # x = a[i+1] + a[i+2]
        # int x, j, *a, *b; # $t1 = x;  $t2 = j;  $s1 = a;  $s2 = b
        # j = 3;
        # b = a + j;
        # x = *b;      // x = a[3];
        addi $t2, $0, 3     # j = 3;
        sll $t5, $t2, 2     # $t5 -> j * 4
        add $s5, $s1, $t5   # b = a + j;
        lw $t1, 0($s5)      # x = *b = a[j]
        # short *s; ...     # $s2 = s;  $t0 = i
        # x = s[i];
        #
        sll $t5, $t0, 1     # $t5 -> i * 2;  Each element is two characters.
        add $t5, $s2, $t5   # $t5 -> &s[i]  (Address of s[i].)
        lh $t1, 0($t5)      # x = s[i];   $t1 -> s[i]
        #                     $s3 = us;  $t0 = i
        # unsigned short *us;
        # x = us[i];
        #
        sll $t5, $t0, 1     # $t5 -> i * 2;  Each element is two characters.
        add $t5, $s3, $t5   # $t5 -> &us[i]  (Address of us[i].)
        lhu $t1, 0($t5)      # x = us[i];   $t1 -> us[i]
####
################################################################################
## Miscellaneous Integer Arithmetic Instructions
# :PH:  Throughout Chapter 3.
# :Mv2: A complete list of instructions.
 ## Miscellaneous Integer Arithmetic Instructions
#
# "Unsigned" Instructions: The name is misleading.
# Multiplication
# Division
 ## "Unsigned" Instructions
#
# Instructions SUBU, ADDU and ADDIU are NOT UNSIGNED.
#              The "U" means ignore overflow for these instructions.
#
# Instructions SLTU and SLTIU are unsigned.
#
# :Syntax: ADDU rd, rs, rt                # Add, overflow ignored.
#          rd <- rs + rt
#          Note: Identical to ADD except an overflow is ignored.
#
# :Syntax: SUBU rd, rs, rt                # Sub, overflow ignored.
#          rd <- rs - rt
#          Note: Identical to SUB except an overflow is ignored.
#
# :Syntax: ADDIU rt, rs, immed            # Add Immediate, overflow ignored.
#          rt <- rs + sign_extend(immed)
#          Note: Identical to ADDI except an overflow is ignored.
#          Yes, the immediate IS sign extended.
#
# :Syntax: SLTU rd, rs, rt                # Set Less Than Unsigned
#          rd <- rs < rt ? 1 : 0;
#          Comparison as unsigned integers.
#
# :Syntax: SLTIU rt, rs, immed            # Set Less Than Immediate Unsigned
#          rt <- rs < immed ? 1 : 0;
#          Comparison is for unsigned integers, immed IS sign-extended.
 ## Integer Overflow
#
# Because C expects integer overflow to be ignored, C compilers emit
# ADDU, SUBU, ADDIU, SUBIU for integer arithmetic.
 ## Difference between add and addu, etc.
#
# With add, sub, and addi overflow causes an error.
# With addu, subu, addui overflow is ignored.
# :Example:
#
# Note: In C integer overflow is silently ignored.
# C Code:
#
#  int x, a, b;
#
#  a = 0x7fffffff;  // Largest 32-bit signed integer.
#  b = 1;
#  x = a + b;
#
#  Asm regs: a, $8;  b, $9;  x, $10
        lui $8, 0x7fff;
        ori $8, $8, 0xffff;
        addi $9, $0, 1
        #  $8 = 0x7fffffff = 2147483647
        #  $9 = 1
        addu $10, $8, $9
        #  $10 = 0x80000000 = -2147483648  (Addition overflowed.)
# :Example:
#
# Language in a C-like language in which integer overflow is an error.
#
#  int x, a, b;
#
#  a = 0x7fffffff;  // Largest 32-bit signed integer.
#  b = 1;
#  x = 3755;
#  x = a + b;
#
#  Asm regs: a, $8;  b, $9;  x, $10
        lui $8, 0x7fff;
        ori $8, $8, 0xffff;
        addi $9, $0, 1
        addi $10, $0, 3755
        #  $8 = 0x7fffffff = 2147483647
        #  $9 = 1
        #  $10 = 3755;
        add $10, $8, $9
        #  Instruction above causes an error (exception, $10 not changed.)
        #  $10 = 3755
# :Example:
#
# Sign extension of immediate.
# addi, addui, and ori
# Remember that immediate is 16 bits.
        addi $8, $0, 1
        # $8 = 1
        addi $8, $0, -1
        # $8 = 0xffffffff = -1  (-1 sign extended)
        addiu $8, $0, 1
        # $8 = 1
        addiu $8, $0, -1
        # $8 = 0xffffffff = -1  (Yes, -1 sign extended.)
        ori $8, $0, 1
        # $8 = 1;
        ori $8, $0, -1
        # $8 = 0xffff  (-1 not sign extended.)
 ## Multiplication and Division
#
# Integer multiplication instructions write product to special
# hi and lo registers: lo gets bits 31:0, hi gets bits 63:32.
#
# :Syntax: MULT rs, rt
#          {hi,lo} <- rs * rt
#          Operands signed.
#          Overflow does not raise an exception.
#
# :Syntax: MULTU rs, rt
#          {hi,lo} <- rs * rt
#          Operands unsigned.
#          Overflow does not raise an exception.
#
# :Syntax: DIV rs, rt
#          lo <- rs / rt;  hi <- rs % rt
#          Operands signed.
#          Never raises an exception, even for division by zero.
#
# :Syntax: DIVU rs, rt
#          lo <- rs / rt;  hi <- rs % rt
#          Operands unsigned
#          Never raises an exception, even for division by zero.
#
# :Syntax: MTLO rs                      # Move to lo register.
#          lo <- rs
#
# :Syntax: MTHI rs                      # Move to hi register.
#          hi <- rs
#
# :Syntax: MFLO rd                      # Move from lo register.
#          rd <- lo
#
# :Syntax: MFHI rd                      # Move from hi register.
#          rd <- hi
# :Example:
#
# Simple multiplication and division examples.
#
# integer x, y, z, a, b;
# ...
# x = a * b;
# y = a / b;
# z = a % b;
#
#  Asm regs: x, $8;  y, $9;  z, $10;  a, $11;  b, $12
        mult $11, $12
        mflo $8
        div $11, $12
        mflo $9
        mfhi $10
#
####
 ## Multiply Pseudo Instruction
#
# :Syntax: MUL rd, rs, rt
#          rd <- rs * rt;
#          Note: SPIM treats this as a pseudo instruction, but it is
#          a real instruction in MIPS32.
#
        mul $2, $3, $4      # Pseudo
        mult $3, $4         # Real (First of two.)
        mflo $2             # Real (Second of two.)
################################################################################
## Histogram Program
 ## Histogram Program.
#
# Computes how many times each letter appears in a string.
#
# For example, for the following string:
# 
#   Do you know what it means to miss New Orleans
#   And miss it each night and day
#   I know I'm not wrong... this feeling's gettin' stronger
#   The longer, I stay away
#   Miss them moss covered vines...the tall sugar pines
#   Where mockin' birds used to sing
#   And I'd like to see that lazy Mississippi...hurryin' into spring
#   
#   The moonlight on the bayou.......a creole tune.... that fills the air
#   I dream... about magnolias in bloom......and I'm wishin I was there
#   
#   Do you know what it means to miss new orleans
#   When that's where you left your heart
#   And there's one thing more...i miss the one I care for
#   More than I miss New Orleans
#   
#   -- Louis Armstrong
#
# The program would generate the counts shown below:
#
#   Letter A  count:  35 (  5.66) *****************
#   Letter B  count:   4 (  0.65) **
#   Letter C  count:   5 (  0.81) **
#   Letter D  count:  13 (  2.10) ******
#   Letter E  count:  48 (  7.77) ************************
#   Letter F  count:   4 (  0.65) **
#   Letter G  count:  13 (  2.10) ******
#   Letter H  count:  26 (  4.21) *************
#   Letter I  count:  45 (  7.28) **********************
#   Letter K  count:   5 (  0.81) **
#   Letter L  count:  17 (  2.75) ********
#   Letter M  count:  21 (  3.40) **********
#   Letter N  count:  42 (  6.80) *********************
#   Letter O  count:  40 (  6.47) ********************
#   Letter P  count:   4 (  0.65) **
#   Letter R  count:  27 (  4.37) *************
#   Letter S  count:  42 (  6.80) *********************
#   Letter T  count:  41 (  6.63) ********************
#   Letter U  count:  11 (  1.78) *****
#   Letter V  count:   2 (  0.32) *
#   Letter W  count:  14 (  2.27) *******
#   Letter X  count:   1 (  0.16) 
#   Letter Y  count:  10 (  1.62) *****
#   Letter Z  count:   1 (  0.16) 
#
#
# Here is such a histogram procedure written in C:
#
#
# Unoptimized:
#
# void
# histo(char *str, int *table)
# {
#   upper(str);
#
#   for(; *str; str++)
#     {
#       char c    = *str;
#       int index = c - 'A';
#
#       if( index >= 0 && index < 26 )
#         table[ index ]++;
#     }
# }
#
#
# Optimized:
#
# void
# histo2(unsigned char *str, int *table)
# {
#   upper(str);
#
#   for(; *str; str++)
#     {
#       unsigned int index = *str - 'A';
#
#       if( index < 26 )
#         table[ index ]++;
#
#     }
# }
#
#
# The assembler version of the procedure is to be called with register
# $a0 set to the address of the first character of the string and $a1
# set to the address of the first element of the histogram table.
#
# The code below is just the histogram procedure.  The program
# used to generate the table above can be found at:
# http://www.ece.lsu.edu/ee4720/2005f/histo.html
histo:
        ## Register Usage
        #
        # Call: $a0  String to analyze.
        #       $a1  Address of table.  Each element is an integer.
        addi $s0, $ra, 0    # Make a copy of the return address.
        jal upper           # Convert to upper case.
        addi $t0, $a0, 0    # Make a copy of string start address.
        addi $ra, $s0, 0    # Restore return address.
LOOP:
        lbu $t1, 0($t0)     # Load a character.
        addi $t0, $t0, 1    # Increment address.
        beq $t1, $0, DONE   # Check for null termination.
        addi $t1, $t1, -65  # Set $t1 to table index. ( A->0, B->1, etc.)
        sltiu $t2, $t1, 26  # If $t1 is >= 26 then it's not a letter.
        beq $t2, $0, LOOP   # Note that comparison above is unsigned.
        sll $t1, $t1, 2     # Scale index.
        add $t3, $a1, $t1   # Add index on to address of first element
        lw $t4, 0($t3)      # Load histogram entry.
        addi $t4, $t4, 1
        j LOOP
        sw $t4, 0($t3)      # Store the incremented value.
DONE:
        jr $ra
        nop
####
# Assembly language generated by gcc for unoptimized version:
#
# Comment text in square brackets describe activities not yet covered.
#
$Lscope0:
	.align	2
	.globl	histo
	.text
$LM6:
# hist.c:11: {
	.ent	histo
histo:
	.frame	$sp,32,$ra		# vars= 0, regs= 3/0, args= 16, extra= 0
	.mask	0x80030000,-8
	.fmask	0x00000000,0
$LBB2:
	subu	$sp,$sp,32      # [Increment stack pointer.]
	sw	$s0,16($sp)     # [Save register s0, restored before return.]
	move	$s0,$a0
	sw	$s1,20($sp)     # [Save register s1, restored before return.]
	sw	$ra,24($sp)     # [Save register ra, restored before return.]
$LM7:
# hist.c:12:   upper(str);
	.set	noreorder
	.set	nomacro
	jal	upper
	move	$s1,$a1         # Make a copy of $a1.
	.set	macro
	.set	reorder
$LM8:
# hist.c:14:   for(; *str; str++)
	lb	$v0,0($s0)      # Load first character of string, sign extended.
	lbu	$a0,0($s0)      # Load first character of string again.
	.set	noreorder
	.set	nomacro
	beq	$v0,$zero,$L26
	sll	$a3,$a0,24      # First of two instructions to sign-extend
	.set	macro           # loaded character. (I don't know why
	.set	reorder         # compiler didn't just use $v0.)
$LBB3:
$LM9:
# hist.c:16:       char c    = *str;
$L27:
	sra	$v1,$a3,24      # Finish sign extension.
$LM10:
# hist.c:17:       int index = c - 'A';
	addu	$a1,$v1,-65
	sll	$a2,$a1,2
$LM11:
# hist.c:19:       if( index >= 0 && index <= 26 )
	sltu	$a0,$a1,27      # Note: unsigned comparison, so no need
                                # to check for index >= 0.
$LM12:
# hist.c:16:       char c    = *str;
	addu	$s0,$s0,1       # This is really : str++
$LM13:
# hist.c:19:       if( index >= 0 && index <= 26 )
	.set	noreorder
	.set	nomacro
	beq	$a0,$zero,$L11
	addu	$v1,$a2,$s1     # $v1 -> &table[ index ];
	.set	macro
	.set	reorder
$LM14:
# hist.c:20:         table[ index ]++;
	lw	$t1,0($v1)
	#nop
	addu	$t0,$t1,1
	sw	$t0,0($v1)
$LBE3:
$LM15:
# hist.c:14:   for(; *str; str++)
$L11:
	lb	$t2,0($s0)      # Load character of string, sign extended.
	lbu	$a0,0($s0)      # Load character of string again.
	.set	noreorder
	.set	nomacro
	bne	$t2,$zero,$L27
	sll	$a3,$a0,24      # First of two instructions to sign-extend
	.set	macro           # loaded character. (I don't know why
	.set	reorder         # compiler didn't just use $t2.)
$L26:
	lw	$ra,24($sp)     # [Restore saved $ra]
	lw	$s1,20($sp)     # [Restore saved $s1]
	lw	$s0,16($sp)     # [Restore saved $s0]
	#nop
	.set	noreorder
	.set	nomacro
	j	$ra
	addu	$sp,$sp,32      # [Restore stack pointer.]
	.set	macro
	.set	reorder
$LBE2:
	.end	histo