## LSU EE 4720 -- Spring 2024 -- Computer Architecture
#
## Array Access Examples

# Time-stamp: <5 February 2024, 10:16:02 CST, koppel@dmk-laptop-23>

 ## Contents
#
# MIPS Load and Store Review
# Array Access Examples


################################################################################
## MIPS Load and Store Review

 ## In MIPS-I
#
# Each memory location holds 8 bits (one byte, character, or octet).
#   This is true of most ISAs today, including RISC, CISC, and VLIW.
#
# Register size: 32 bits.
#   This is true of older ISAs and those meant for embedded applications.
#
# Notice that four memory locations (8 bits each) can fit in one
# MIPS-I register. A detail that needs to be specified is where in the
# register to put the byte(s) loaded from memory. For example, a lw
# loads four bytes (from four consecutive memory locations). The
# arrangement of bytes used by an ISA (or instruction) is called
# the *byte ordering*.
#
# :Def: Byte Ordering
#      The rule used to determine how bytes loaded from memory
#      are arranged in a register. The two common orderings are
#      big-endian and little-endian.
#
# :Def: Big-Endian Byte Ordering
#      A byte ordering in which the byte corresponding to the memory
#      address is placed in the most-significant 8 bits of the
#      register. (Put another way, the byte is put in the big end of
#      the register.) See the MIPS lw instruction below.
#
# :Def: Little-Endian Byte Ordering
#      A byte ordering in which the byte corresponding to the memory
#      address is placed in the least-significant 8 bits of the
#      register. (Put another way, the byte is put in the little end of
#      the register.)
#
# MIPS is big-endian. Most 21st century and ISAs are bi-endian,
# meaning they can operate with either a big- or little-endian byte
# order. HOWEVER, byte ordering is similar to the side of the road to
# drive on. Though it really doesn't make a difference which side you
# drive on it's important that everyone follows the rule. For that
# reason, an OS and ABI will use just one byte ordering, rather than
# allowing code to switch even if the ISA allows it.

 ## MIPS Load and Store Instructions

 ## MIPS Load Byte, Load Byte Unsigned
#
# :Syntax: LB rt, offset(rs)                  # Load Byte
#          rt <- sign_extend( Mem[ rs + offset ] );
#
#          Writes byte into least-significant 8 bits (bits 7:0) ..
#          .. and writes bits 31:8 with the sign bit of Mem[rs+offset].

# :Syntax: LBU rt, offset(rs)                 # Load Byte Unsigned
#          rt <- zero_pad( Mem[ rs + offset ] );
#
#          Writes byte into least-significant 8 bits (bits 7:0) ..
#          .. and writes bits 31:8 with 0.
#
#          Writing zeros this way is called /zero-padding/.
#
#          Note: rs and rt are registers.
#          Offset is a 16-bit immediate.

# :Example:
#
        # Initially: $a2 -> 0x1000
        # At Mem[ 0x1000 ] = 0x12
        # At Mem[ 0x1001 ] = 0x7f
        # At Mem[ 0x1002 ] = 0x80
        # At Mem[ 0x1003 ] = 0xff
        #
        lb $t0, 0($a2)  # $t0 -> 0x12        18
        lb $t1, 1($a2)  # $t1 -> 0x7f        127
        lb $t2, 2($a2)  # $t2 -> 0xffffff80  (-128)  # Sign-extend  MSB of 0x80
        lb $t3, 3($a2)  # $t3 -> 0xffffffff  (-1)

        # Here bits 31:8 of the destination are always padded with zeros.
        lbu $t4, 0($a2)  # $t4 -> 0x12        18
        lbu $t5, 1($a2)  # $t5 -> 0x7f        127
        lbu $t6, 2($a2)  # $t6 -> 0x80        128
        lbu $t7, 3($a2)  # $t7 -> 0xff        255

 ## MIPS Load Word
#
# To load a register use lw (load word).
# Loads for consecutive bytes into a register.

# :Syntax: LW rt, offset(rs)
#          rt <- { Mem[ rs + offset + 0 ], Mem[ rs + offset + 1 ],
#                  Mem[ rs + offset + 2 ], Mem[ rs + offset + 3 ]  }
#          Load register rt with four bytes of memory starting at address
#          rs + offset.
#          Address rs + offset must be a multiple of 4.

# :Example:
#
        # Assume: $a2 -> 0x1004
        # At Mem[ 0x1004 ] = 0x11223344
        # More precisely:
        #   Mem[ 0x1004 ] = 0x11
        #   Mem[ 0x1005 ] = 0x22
        #   Mem[ 0x1006 ] = 0x33
        #   Mem[ 0x1007 ] = 0x44
        #
        lw $a0, 0($a2)
        # 0x11223344 loaded into $a0

        lw $a0, 2($a2)  # Error
        # Effective address = 0x1006, not a multiple of 4

        addi $a2, $a2, -2
        lw $a0, 2($a2)  # No problem
        # Effective address = 0x1004, is a multiple of 4

 ## Other MIPS Load and Store Instructions

        lh  $a0, 0(a1)   # Load half
        lhu $a0, 0(a1)   # Load half unsigned.
        sh  $a0, 0(a1)   # Store half



 ## Aligned Access
#
# :Def: Alignment Restriction on a memory address used by a load or store insn
#      A restriction on the addresses acceptable for use in the load,
#      store, or other memory access instruction. The address must be
#      a multiple of the size of the item loaded. For example, a MIPS
#      lw instruction (load word) loads a four-byte value from memory.
#      Therefore the address must be a multiple of 4. The same holds
#      for sw (store word).

# MIPS Alignment Restriction
#      A memory address must be a multiple of the data size.
#      lw: Multiple of 4. Good: 0x1000, 0x1004.  Bad: 0x1001, 0x1002.
#      lh: Multiple of 2. Good: 0x1000, 0x1002.  Bad: 0x1001, 0x1003.
#      lb: Multiple of 1.

# :Example:
#
# Three of the four instructions below makes an unaligned access.
# As a result the unaligned access the instruction will raise an
# exception, forcing the OS to take over and take appropriate action.
# Appropriate action might mean terminating the program.
#
        lw $t1, 0($t2)
        lw $t3, 1($t2)
        lw $t3, 2($t2)
        lw $t3, 3($t2)




################################################################################
## Array Access Examples

 ## Array Access
#
#   Consider the C code:
#
#      int a[10];
#      ...
#      x = a[0];
#
#   How do we convert it to assembly language?
#
#   Key Pieces of Information:
#
#     The C variable "a" holds the address of the start of the array.
#     The size of an int in this compiler is 4 bytes.
#
#   Let's assume that the compiler or assembler assigns "a" to
#   register $t1.
#
#   Let us also assume that the compiler assigned C variable x
#   to register $t2.
#
#   Then the C "x = a[0];" statement in assembly language would be:
#
        lw $t2, 0($t1)
#
#
#   This was easy because the array index is zero.
#
# Given the base address of an array (address of first element, say &a[0]),
# need to compute the address of an element at index i, (say, & a[i] ).
# To do this compute:
#
#     i * sizeof( a[0] )
#
#  In English:
#     Address of i'th element of a is the address of a + i * size of element.
#
 ## Element Sizes
#
#   Some languages specify exact sizes for types, ..
#   .. such as 4 bytes for an integer.
#
#   C IS NOT one of those languages.
#   Nor is C++, C++11, C++20.
#
#   An informal terminology indicates the data model (sizes of C types):
#
#     ILP32:
#     int, long, void* (Pointers) are 32 bits (four characters).
#     short, 16 bits.
#
#     LP64
#     long,  void* (Pointers) are 64 bits (eight characters).
#     int, 32 bits,
#
#     ILP64
#     int, long, void* (Pointers) are 64 bits (eight characters).
#
#   For MIPS code, assume an ILP32 data model.
#   Otherwise, assume LP64.




# :Example:
#
# Appearing below are fragments of C code along with the equivalent
# MIPS assembler. In most fragments i is the index (the number of the
# element to load). Note that i must be multiplied by the size of the
# element before adding it on to the address of the first element.
#

       ## C CODE                     #  ASM REGISTER = C VARIABLE NAME
        #
        # char *c; ...      # $s4 = c;  $t0 = i
        # x = c[i];
        #

        add $t5, $s4, $t0   # $t5 -> &c[i]  (Address of c[i].)
        lb $t1, 0($t5)      # x = c[i];   $t1 -> c[i]

       ## C CODE                     #  ASM REGISTER = C VARIABLE NAME
        #
        # char *c; ...      # $s4 = c;  $t0 = i
        # x = c[i+1] + c[i+2];
        #

        ## WARNING: naïve:
        addi $t1, $t0, 1
        add $t5, $s4, $t1   # $t5 -> &c[i]  (Address of c[i].)
        lb $t6, 0($t5)      # $t6 -> c[i+1]
        addi $t1, $t0, 2
        add $t5, $s4, $t1   # $t5 -> &c[i]  (Address of c[i].)
        lb $t7, 0($t5)      # $t7 -> c[i+2]
        add $t9, $t6, $t7

        # The is the way it should be done.
        add $t5, $s4, $t0   # $t5 -> &c[i]  (Address of c[i].)
        lb $t6, 1($t5)      # $t6 -> c[i+1]
        lb $t7, 2($t5)      # $t7 -> c[i+2]

        add $t1, $t6, $t7   # x = c[i+1] + c[i+2]


       ## C CODE                     #  ASM REGISTER = C VARIABLE NAME
        #
        # int *a; ...       # $s1 = a;  $t0 = i    sizeof(int) = 4 chars.
        # x = a[i];


        sll $t5, $t0, 2     # $t5 -> i * 4;  Each element is four characters.
        add $t5, $s1, $t5   # $t5 -> &a[i]  (Address of a[i].)
        lw $t1, 0($t5)      # x = a[i];   $t1 -> a[i]


       ## C CODE                     #  ASM REGISTER = C VARIABLE NAME
        #
        # int *a; ...       # $s1 = a;  $t0 = i
        # x = a[i+1] + a[i+2];
        #

        sll $t5, $t0, 2     # $t5 -> i * 4;  Each element is four characters.
        add $t5, $s1, $t5   # $t5 -> &a[i]  (Address of a[i].)
        lw $t6, 4($t5)      # $t6 -> a[i+1]
        lw $t7, 8($t5)      # $t7 -> a[i+2]
        add $t1, $t6, $t7   # x = a[i+1] + a[i+2]


       ## C CODE                     #  ASM REGISTER = C VARIABLE NAME
        #
        # int x, j, *a, *b; # $t1 = x;  $t2 = j;  $s1 = a;  $s2 = b
        # // Assume: a = 0x1000
        # j = 3;
        # b = a + j;   // b <= 0x100c   b = &a[3] 
        # x = *b;      // x = a[3];

        addi $t2, $0, 3     # j = 3;
        sll $t5, $t2, 2     # $t5 -> j * 4
        add $s5, $s1, $t5   # b = a + j;
        lw $t1, 0($s5)      # x = *b = a[j]

        # Optimized
        lw $t1, 12($s1)      # x = a[3]


       ## C CODE                     #  ASM REGISTER = C VARIABLE NAME
        #
        # short *s; ...     # $s2 = s;  $t0 = i;  sizeof(short) = 2 chars
        # x = s[i];
        #

        sll $t5, $t0, 1     # $t5 -> i * 2;  Each element is two characters.
        add $t5, $s2, $t5   # $t5 -> &s[i]  (Address of s[i].)
        lh $t1, 0($t5)      # x = s[i];   $t1 -> s[i]


       ## C CODE                     #  ASM REGISTER = C VARIABLE NAME
        #
        # short *s; ...     # $s2 = s;  $t0 = i
        # x = s[ s[i] ];
        #

        sll $t5, $t0, 1     # $t5 -> i * 2;  Each element is two characters.
        add $t5, $s2, $t5   # $t5 -> &s[i]  (Address of s[i].)
        lh $t8, 0($t5)

        sll $t5, $t8, 1     # $t5 -> i * 2;  Each element is two characters.
        add $t5, $s2, $t5   # $t5 -> &s[i]  (Address of s[i].)
        lh $t1, 0($t5)



       ## C CODE                     #  ASM REGISTER = C VARIABLE NAME
        #
        #                     $s3 = us;  $t0 = i
        # unsigned short *us;
        # x = us[i];
        #

        sll $t5, $t0, 1     # $t5 -> i * 2;  Each element is two characters.
        add $t5, $s3, $t5   # $t5 -> &us[i]  (Address of us[i].)
        lhu $t1, 0($t5)      # x = us[i];   $t1 -> us[i]



       ## C CODE                     #  ASM REGISTER = C VARIABLE NAME
        #              0      4       5          8
        # struct str { int i; char c; char p[3]; int j };
        #              <-4->  <-1->   <- 3 --->  8
        # str a;
        # ..
        # x = a.i + a.c + a.j + a.p[1]; // $t1 -> x;    $a1 -> &a (address of a)

        # $a1 is address of a;
        lw $t1, 0($a1)  # $t1-> a.i;
        lb $t2, 4($a1)  # $t2-> a.c;
        lw $t3, 8($a1)  # $t3-> a.j
        lb $t4, 6($a1)  # $t4-> a.p[1]
        add $t1, $t1, $t2
        add $t1, $t1, $t3
        add $t1, $t1, $t4

####