## LSU EE 3755 -- Spring 2002 -- Computer Organization
#
## Note Set 12 -- Load and Store Instructions
#
# Time-stamp: <15 April 2002, 11:44:24 CDT, koppel@sol>

## Contents
#
# Load Byte (Unsigned)
# Store Byte
# Load Byte
# Load Word, Store Word
# Load and Store Half
# Array Access Examples
# Histogram Program


## References
#
# :PH:  Patterson & Hennessy, "Computer Organization & Design"
# :Mv1: MIPS Technologies, "MIPS32 Architecture for Programmers Vol I: Intro"
# :Mv2: MIPS Technologies, "MIPS32 Architecture for Programmers Vol II: Instr"


################################################################################
## Load Byte (Unsigned)


# :Syntax: LBU rt, offset(rs)                  # Load Byte Unsigned
#          rt <- { 24'b0, Mem[ rs + offset ] }
#          Note: rs and rt are registers.
#          Offset is a 16-bit immediate.
#


# :Example:
#
# Simple uses of lbu.

        # Initially: $a2 -> 0x1000
        # At Mem[ 0x1004 ] = 0x12
        #
        lbu $a0, 4($a2)
        #
        # Effective address is 4 + 0x1000 = 0x1004.
        # $a0 loaded with 0x12.

        addi $a2, $a2, 4   # $a2 -> 0x1004
        lbu $a0, 0($a2)
        #
        # Effective address is 0 + 0x1004 = 0x1004 (same as above.)
        # $a0 loaded with 0x12 (again)
####


# :Example:
#
# Procedure to determine the length of a C-style string.  Includes
# code to call the procedure.

        .data
str:
        .asciiz "The quick brown fox, fox 0, jumps over the lazy dog."
msg:
        .asciiz "The length of string \n   \"%/a1/s\"\nis %/v1/d.\n"

        .text
        .globl __start
__start:
        la $a0, str             # Load address of string.
        jal strlen              # Call strlen procedure.
        nop
        addi $a1, $a0, 0        # Move address of string to $a1
        addi $v1, $v0, 0        # Move length of string to $v1
        addi $v0, $0, 11        # System call code for message.
        la $a0, msg             # Address of message.
        syscall
        addi $v0, $0, 10        # System call code for exit.
        syscall

strlen:
        ## Register Usage
        #
        # $a0: Address of first character of string.
        # $v0: Return value, the length of the string.
        #
        # $t0: Character being examined.
        # $t1: Address of current character being examined.
        #
        addi $t1, $a0, 0
LOOP:
        lbu $t0, 0($t1)
        addi $t1, $t1, 1
        bne $t0, $0, LOOP
        nop

        addi $t1, $t1, -1
        jr $ra
        sub $v0, $t1, $a0

####


################################################################################
## Store Byte

# :Syntax: SB rt, offset(rs)                  # Store Byte
#          Mem[ rs + offset ] = rt[7:0]


# :Example:
#
# Simple uses of sb.

        # Initially: $a2 -> 0x1004
        # At Mem[ 0x1004 ] = 0
        #
        addi $t0, $0, 0x14    # t0 -> 0x14
        sb $t0, 0($a2)
        #
        # Effective address is 0 + 0x1004 = 0x1004
        # 0x14 written to Mem[ 0x1004 ]

        addi $t0, $0, 0x1234   # t0 -> 0x1234
        sb $t0, 0($a2)
        #
        # Effective address is 0 + 0x1004 = 0x1004
        # 0x34 written to Mem[ 0x1004 ]
        # Note that only bits 7:0 of $t0 written to memory.
####


# :Example:
#
# Program to convert a C-style string to upper case.

        .data
str:
        .asciiz "Hello, world!"
before:
        .asciiz "Before: %/s1/s\n"
after:
        .asciiz "After:  %/s1/s\n"

        .text
        .globl __start
__start:
        la $s1, str
        la $a0, before
        addi $v0, $0, 11
        syscall
        jal upper
        add $a0, $s1, $0
        la $a0, after
        addi $v0, $0, 11
        syscall
        li $v0, 10
        syscall


upper:
        ## Register Usage
        #
        # $a0: (Call) Address of string to convert.
        #
        # $a0: Address of character being examined.
        # $t1: Character being examined.
        # $t2: Comparison result.
LOOP:
        lbu $t1, 0($a0)
        addi $a0, $a0, 1
        beq $t1, $0, DONE
        slti $t2, $t1, 97 # < 'a'
        bne $t2, $0 LOOP
        slti $t2, $t1, 123 # 'z' + 1
        beq $t2, $0, LOOP
        addi $t1, $t1, -32
        j LOOP
        sb $t1,-1($a0)

DONE:
        jr $ra
        nop
####


################################################################################
## Load Byte

# :Syntax: LB rt, offset(rs)                  # Load Byte
#          rt <- sign_extend( Mem[ rs + offset ] );
#          Note: rs and rt are registers.
#          Offset is a 16-bit immediate.


# :Example:
#
#

        # Initially: $a2 -> 0x1000
        # At Mem[ 0x1000 ] = 0x12
        # At Mem[ 0x1001 ] = 0x7f
        # At Mem[ 0x1002 ] = 0x80
        # At Mem[ 0x1003 ] = 0xff
        #
        lb $t0, 0($a2)  # $t0 -> 0x12        18
        lb $t1, 1($a2)  # $t1 -> 0x7f        127
        lb $t2, 2($a2)  # $t2 -> 0xffffff80  (-128)
        lb $t3, 3($a2)  # $t3 -> 0xffffffff  (-1)
####


 ## Usage
#
# lbu:  Characters, unsigned integers.
# lb:   Signed integers.


################################################################################
## Load Word, Store Word

 ## Note
#
# Each memory location holds 8 bits.
# Register size: 32 bits.
#
# To load a register use lw (load word).
# Loads for consecutive bytes into a register.

# :Syntax: LW rt, offset(rs)
#          rt <- { Mem[ rs + offset + 0 ], Mem[ rs + offset + 1 ],
#                  Mem[ rs + offset + 2 ], Mem[ rs + offset + 3 ]  }
#          Load register rt with four bytes of memory starting at address
#          rs + offset.
#          Address rs + offset must be a multiple of 4.


# :Example:
#
        # Assume: $a2 -> 0x1004
        # At Mem[ 0x1004 ] = 0x00003755
        # More precisely:  (Big Endian)
        #   Mem[ 0x1004 ] = 0x00
        #   Mem[ 0x1005 ] = 0x00
        #   Mem[ 0x1006 ] = 0x37
        #   Mem[ 0x1007 ] = 0x55
        #
        lw $a0, 0($a2)
        # 0x3755 loaded into $a0

        lw $a0, 2($a2)  # Error
        # Effective address = 0x1006, not a multiple of 4

        addi $a2, $a2, -2
        lw $a0, 2($a2)  # No problem
        # Effective address = 0x1004, is a multiple of 4



# :Example:
#

        .data
my_word:
        .word 123
        .word 456

        .text
        la $t0, my_word
        lw $t1, 0($t0)    # t1 <- 123
        lw $t2, 4($t0)    # t2 <- 456
        addi $t0, $t0, 4
        lw $t3, 0($t0)    # t3 <- 456  (Another way to load 456)
        addi $t0, $t0, 4
        lw $t4, -4($t0)   # t4 <- 456  (And another way to load 456)
# Error, instruction will never finish because address not a multiple of 4.
        lw $t9, 1($t0)
####


# :Syntax: SW rt, offset(rs)
#          Mem[ rs + offset ] = rt[31:24]
#          Mem[ rs + offset + 1 ] = rt[23:16]
#          Mem[ rs + offset + 2 ] = rt[15:8]
#          Mem[ rs + offset + 3 ] = rt[7:0]
#          Write memory starting at address offset + rs with contents of rt.
#          Effective address, rs + offset, must be a multiple of 4.


# :Example:
#
        # Assume: $a2 -> 0x1004
        # At Mem[ 0x1004 ] = 0x00003755
        # More precisely:  (Big Endian)
        #   Mem[ 0x1004 ] = 0x00
        #   Mem[ 0x1005 ] = 0x00
        #   Mem[ 0x1006 ] = 0x00
        #   Mem[ 0x1007 ] = 0x00
        #
        lui $a0, 0x1234
        ori $a0, $a0, 0x5678  # $a0 -> 0x12345678
        sw $a0, 0($a2)
        #   Mem[ 0x1004 ] = 0x12
        #   Mem[ 0x1005 ] = 0x34
        #   Mem[ 0x1006 ] = 0x56
        #   Mem[ 0x1007 ] = 0x78
        lbu $t0, 0($a2)  # $t0 -> 12
        lbu $t1, 1($a2)  # $t1 -> 34
        lbu $t2, 2($a2)  # $t2 -> 56
        lbu $t3, 3($a2)  # $t3 -> 78


        lw $a0, 2($a2)  # Error
        # Effective address = 0x1006, not a multiple of 4

        addi $a2, $a2, -2
        lw $a0, 2($a2)  # No problem
        # Effective address = 0x1004, is a multiple of 4
####


################################################################################
## Load and Store Half


# :Syntax: LH rt, offset(rs)                    # Load Half
#          rt <- sign_extend( { Mem[ rs + offset ], Mem[ rs + offset + 1] } )
#          Load register rt with two bytes of memory starting at address
#          rs + offset.
#          Address rs + offset must be a multiple of 2.
#
# :Syntax: LHU rt, offset(rs)                   # Load Half Unsigned
#          rt <- { 16'b0, Mem[ rs + offset ], Mem[ rs + offset + 1] }
#          Load register rt with two bytes of memory starting at address
#          rs + offset.
#          Address rs + offset must be a multiple of 2.
#
# :Syntax: SH rt, offset(rs)                    # Store Half
#          Mem[ rs + offset + 0 ] = rt[15:8]
#          Mem[ rs + offset + 1 ] = rt[7:0]
#          Effective address, rs + offset, must be a multiple of 2.


################################################################################
## Array Access Examples

# :Example:
#
# Array accesses.  In most examples i is the index (the number of the
# element to load).  Note that i must be multiplied by the size of the
# element before adding it on to the address of the first element.
#
# Registers:  a, s1;  b, s5;  s, s2;  us, s3;  c, s4;  i, t0;  x, t1

        # char *c; ...      # $s4 = c;  $t0 = i
        # x = c[i];
        #
        add $t5, $s4, $t0   # $t5 -> &c[i]  (Address of c[i].)
        lb $t1, 0($t5)      # x = c[i];   $t1 -> c[i]

        # char *c; ...      # $s4 = c;  $t0 = i
        # x = c[i+1] + c[i+2];
        #
        add $t5, $s4, $t0   # $t5 -> &c[i]  (Address of c[i].)
        lb $t6, 1($t5)      # $t6 -> c[i+1]
        lb $t7, 2($t5)      # $t7 -> c[i+2]
        add $t1, $t6, $t7   # x = c[i+1] + c[i+2]

        # int *a; ...       # $s1 = a;  $t0 = i
        # x = a[i];
        sll $t5, $t0, 2     # $t5 -> i * 4;  Each element is four characters.
        add $t5, $s1, $t5   # $t5 -> &a[i]  (Address of a[i].)
        lw $t1, 0($t5)      # x = a[i];   $t1 -> a[i]

        # int *a; ...       # $s1 = a;  $t0 = i
        # x = a[i+1] + a[i+2];
        #
        sll $t5, $t0, 2     # $t5 -> i * 4;  Each element is four characters.
        add $t5, $s1, $t5   # $t5 -> &a[i]  (Address of a[i].)
        lw $t6, 4($t5)      # $t6 -> a[i+1]
        lw $t7, 8($t5)      # $t7 -> a[i+2]
        add $t1, $t6, $t7   # x = a[i+1] + a[i+2]

        # int x, j, *a, *b; # $t1 = x;  $t2 = j;  $s1 = a;  $s2 = b
        # j = 3;
        # b = a + j;
        # x = *b;      // x = a[3];
        addi $t2, $0, 3     # j = 3;
        sll $t5, $t2, 2     # $t5 -> j * 4
        add $s5, $s1, $t5   # b = a + j;
        lw $t1, 0($s5)      # x = *b = a[j]

        # short *s; ...     # $s2 = s;  $t0 = i
        # x = s[i];
        #
        sll $t5, $t0, 1     # $t5 -> i * 2;  Each element is two characters.
        add $t5, $s2, $t5   # $t5 -> &s[i]  (Address of s[i].)
        lh $t1, 0($t5)      # x = s[i];   $t1 -> s[i]

        #                     $s3 = us;  $t0 = i
        # unsigned short *us;
        # x = us[i];
        #
        sll $t5, $t0, 1     # $t5 -> i * 2;  Each element is two characters.
        add $t5, $s3, $t5   # $t5 -> &us[i]  (Address of us[i].)
        lhu $t1, 0($t5)      # x = us[i];   $t1 -> us[i]
####


################################################################################
## Histogram Program

 ## Histogram Program.
#
# Computes how many times each letter appears in a string.
#
# For example, for the following string:
#
#  We hold these truths to be self-evident that all men are created
#  equal, that they are endowed by their Creator with certain unalienable
#  Rights, that among these are Life, Liberty, and the pursuit of
#  Happiness. That to secure these rights, Governments are instituted
#  among Men, deriving their just powers from the consent of the
#  governed.  That whenever any Form of Government becomes destructive of
#  these ends, it is the Right of the People to alter or to abolish it,
#  and to institute new Government, laying its foundation on such
#  principles and organizing its powers in such form, as to them shall
#  seem most likely to effect their Safety and Happiness
#
# The program would generate the counts shown below:
#
#  Letter A  count:  33 (  5.05) ****************
#  Letter B  count:   6 (  0.92) ***
#  Letter C  count:  11 (  1.68) *****
#  Letter D  count:  15 (  2.30) *******
#  Letter E  count:  77 ( 11.79) **************************************
#  Letter F  count:  14 (  2.14) *******
#  Letter G  count:  13 (  1.99) ******
#  Letter H  count:  32 (  4.90) ****************
#  Letter I  count:  36 (  5.51) ******************
#  Letter J  count:   1 (  0.15)
#  Letter K  count:   1 (  0.15)
#  Letter L  count:  18 (  2.76) *********
#  Letter M  count:  14 (  2.14) *******
#  Letter N  count:  39 (  5.97) *******************
#  Letter O  count:  36 (  5.51) ******************
#  Letter P  count:  11 (  1.68) *****
#  Letter Q  count:   1 (  0.15)
#  Letter R  count:  34 (  5.21) *****************
#  Letter S  count:  36 (  5.51) ******************
#  Letter T  count:  65 (  9.95) ********************************
#  Letter U  count:  13 (  1.99) ******
#  Letter V  count:   8 (  1.23) ****
#  Letter W  count:   7 (  1.07) ***
#  Letter Y  count:   7 (  1.07) ***
#  Letter Z  count:   1 (  0.15)
#
#
# Here is such a histogram procedure written in C:
#
#
# Unoptimized:
#
# void
# histo(char *str, int *table)
# {
#   upper(str);
#
#   for(; *str; str++)
#     {
#       char c    = *str;
#       int index = c - 'A';
#
#       if( index >= 0 && index <= 26 )
#         table[ index ]++;
#     }
# }
#
#
# Optimized:
#
# void
# histo2(unsigned char *str, int *table)
# {
#   upper(str);
#
#   for(; *str; str++)
#     {
#       unsigned int index = *str - 'A';
#
#       if( index < 26 )
#         table[ index ]++;
#
#     }
# }
#
#
# The assembler version of the procedure is to be called with register
# $a0 set to the address of the first character of the string and $a1
# set to the address of the first element of the histogram table.
#
# The code below is just the histogram procedure.  The program
# used to generate the table above can be found at:
#http://www.ece.lsu.edu/ee3755/2001f/histo.html

histo:
        ## Register Usage
        #
        # Call: $a0  String to analyze.
        #       $a1  Address of table.  Each element is an integer.

        addi $s0, $ra, 0    # Make a copy of the return address.
        jal upper           # Convert to upper case.
        addi $t0, $a0, 0    # Make a copy of string start address.
        addi $ra, $s0, 0    # Restore return address.

LOOP:
        lbu $t1, 0($t0)     # Load a character.
        addi $t0, $t0, 1    # Increment address.
        beq $t1, $0, DONE   # Check for null termination.
        addi $t1, $t1, -65  # Set $t1 to table index. ( A->0, B->1, etc.)
        sltiu $t2, $t1, 26  # If $t1 is >= 26 then it's not a letter.
        beq $t2, $0, LOOP   # Note that comparison above is unsigned.
        sll $t1, $t1, 2     # Scale index.
        add $t3, $a1, $t1   # Add index on to address of first element
        lw $t4, 0($t3)      # Load histogram entry.
        addi $t4, $t4, 1
        j LOOP
        sw $t4, 0($t3)      # Store the incremented value.

DONE:
        jr $ra
        nop
####


# Assembly language generated by gcc for unoptimized version:
#
# Comment text in square brackets describe activities not covered in 3755.
#
$Lscope0:
	.align	2
	.globl	histo
	.text
$LM6:
# hist.c:11: {
	.ent	histo
histo:
	.frame	$sp,32,$ra		# vars= 0, regs= 3/0, args= 16, extra= 0
	.mask	0x80030000,-8
	.fmask	0x00000000,0
$LBB2:
	subu	$sp,$sp,32      # [Increment stack pointer.]
	sw	$s0,16($sp)     # [Save register s0, restored before return.]
	move	$s0,$a0
	sw	$s1,20($sp)     # [Save register s1, restored before return.]
	sw	$ra,24($sp)     # [Save register ra, restored before return.]
$LM7:
# hist.c:12:   upper(str);
	.set	noreorder
	.set	nomacro
	jal	upper
	move	$s1,$a1         # Make a copy of $a1.
	.set	macro
	.set	reorder

$LM8:
# hist.c:14:   for(; *str; str++)
	lb	$v0,0($s0)      # Load first character of string, sign extended.
	lbu	$a0,0($s0)      # Load first character of string again.
	.set	noreorder
	.set	nomacro
	beq	$v0,$zero,$L26
	sll	$a3,$a0,24      # First of two instructions to sign-extend
	.set	macro           # loaded character. (I don't know why
	.set	reorder         # compiler didn't just use $v0.)

$LBB3:
$LM9:
# hist.c:16:       char c    = *str;
$L27:
	sra	$v1,$a3,24      # Finish sign extension.
$LM10:
# hist.c:17:       int index = c - 'A';
	addu	$a1,$v1,-65
	sll	$a2,$a1,2
$LM11:
# hist.c:19:       if( index >= 0 && index <= 26 )
	sltu	$a0,$a1,27      # Note: unsigned comparison, so no need
                                # to check for index >= 0.
$LM12:
# hist.c:16:       char c    = *str;
	addu	$s0,$s0,1       # This is really : str++
$LM13:
# hist.c:19:       if( index >= 0 && index <= 26 )
	.set	noreorder
	.set	nomacro
	beq	$a0,$zero,$L11
	addu	$v1,$a2,$s1     # $v1 -> &table[ index ];
	.set	macro
	.set	reorder

$LM14:
# hist.c:20:         table[ index ]++;
	lw	$t1,0($v1)
	#nop
	addu	$t0,$t1,1
	sw	$t0,0($v1)
$LBE3:
$LM15:
# hist.c:14:   for(; *str; str++)
$L11:
	lb	$t2,0($s0)      # Load character of string, sign extended.
	lbu	$a0,0($s0)      # Load character of string again.
	.set	noreorder
	.set	nomacro
	bne	$t2,$zero,$L27
	sll	$a3,$a0,24      # First of two instructions to sign-extend
	.set	macro           # loaded character. (I don't know why
	.set	reorder         # compiler didn't just use $t2.)

$L26:
	lw	$ra,24($sp)     # [Restore saved $ra]
	lw	$s1,20($sp)     # [Restore saved $s1]
	lw	$s0,16($sp)     # [Restore saved $s0]
	#nop
	.set	noreorder
	.set	nomacro
	j	$ra
	addu	$sp,$sp,32      # [Restore stack pointer.]
	.set	macro
	.set	reorder

$LBE2:
	.end	histo