################################################################################
##
## LSU EE 4720 Fall 2005 Homework 3 -- Solution
##
##

# Original Assignment: http://www.ece.lsu.edu/ee4720/2005f/hw3.html

.data
name:
        .asciiz "David Koppelman"  # Put your name between the quotes.

################################################################################
## Problem 1 -- Unpack Routine -- Solution

# The unpack routine below unpacks every string into "Not yet
# implemented", which of course can't be right. Modify the routine so
# that it unpacks correctly. Follow the guidelines below next to the
# check boxes ([ ]).


        ## Register Usage
        #
        # $a0: Procedure call argument. Address of packed string.
        # $a1: Procedure call argument. Address of memory for unpacked string.
        # There are no return values.

        # [x] Make sure that the code at least runs to completion.
        #     Substantial credit will be deducted for syntax
        #     and run-time errors.
        # [x] Make sure the code passes the test in the demo routine
        #     and works for other strings too. The unpack routine may be
        #     graded using different strings.
        #
        # [x] Registers $t0-$t9 and $a0-$a3 can be used.
        # [x] Fill as many delay slots as possible.
        # [x] The only pseudo instructions that can be used are "nop" and "la".
        # [x] The code should be reasonable efficient,
        #     don't use two instructions where one would do.

        .text

unpack:
        la $t0, KEEP

ULOOP:
        lhu $t1, 0($a0)       # Load set of 3 encoded chars from packed string.
        andi $t2, $t1, 0x1f   # Extract the first encoded character.
        add $t3, $t0, $t2     # Compute address of decoded character ...
        lbu $t4, 0($t3)       # ...load from KEEP string ...
        sb $t4, 0($a1)        # ...and store in unpacked string.

        srl $t1, $t1, 5       # Shift second character to lower bits of $t1
        andi $t2, $t1, 0x1f   # Extract second encoded character.
        add $t3, $t0, $t2     # Compute address of decoded character ...
        lbu $t4, 0($t3)       # ...load from KEEP string ...
        sb $t4, 1($a1)        # ...and store unpacked string.

        srl $t1, $t1, 5       # Shift third character to lower bits of $t1
        andi $t2, $t1, 0x1f   # Extract the third encoded character.
        add $t3, $t0, $t2     # Compute address of decoded character...
        lbu $t4, 0($t3)       # ...load from KEEP string ...
        sb $t4, 2($a1)        # ...and store in unpacked string.

        srl $t1, $t1, 5       # Shift end-of-string bit to LSB of $t1.
        addi $a0, $a0, 2      # Compute address of next set of 3 encoded chars.
        beq $t1, $0  ULOOP    # If end-of-string bit not present, loop.
        addi $a1, $a1, 3

        jr $ra
        sb $0, 0($a1)         # Terminate the unpacked string.




################################################################################
## Demonstration Routine

# This routine calls the pack and unpack routines on a sample
# string below (DEMO_STRING), displays the original and unpacked strings
# side by side, and verifies whether the unpacked string is correct.


        .data         # Indicate to the assembler that the stuff below is data.

#
# String to demonstrate pack and unpack routines.
#
DEMO_STRING: # Address of stuff immediately following.
        # Place string in memory here, end string with NULL (0).
        .asciiz ".,!Time flies like an arrow, fruit flies like a banana. azAZ09"
        .byte 0, 0, 0 # Put three zeros in memory here.

#
# Memory space for packed and unpacked strings.
#
        .align 4      # Make sure the address below is a multiple of 4.
BUFFER_PACK:
        .space 256    # Allocate 256 characters for use by the pack routine.
        .align 4
BUFFER_UNPACK:
        .space 256    # Allocate 256 characters for use by the unpack routine.

#
# Message printed by the demonstration routine.
#
MSG:
        .asciiz "Original:\n\"%/a2/s\"\nUnpacked: \n\"%/a1/s\"\n"
MSGSPACES:
        .asciiz "                                                                                                                                                                                                                                                                "
MSGERROR:
        .asciiz "%/s4/s |<---- First error here."
MSGCORRECT:
        .asciiz "Unpack appears correct."

#
# Message displayed if name omitted.
#
who_are_you:
        .asciiz "Please put your name at the top of the file where indicated."


        .text         # Indicate to the assembler that the stuff below is code.
        .globl __start
__start:

        la $s0, name
        lb $s0, ($s0)
        bne $s0, $0,  DOTESTS
        la $a0, who_are_you
        addi $v0, $0, 11
        syscall
        addi $v0, $0, 10
        syscall


DOTESTS:
        # Load address of string and address of memory for packed
        # string and call the pack routine.
        #
        la $a0, DEMO_STRING
        la $a1, BUFFER_PACK
        jal pack
        nop

        # Load address of packed string and address of memory for unpacked
        # string and call the unpack routine.
        #
        la $a0, BUFFER_PACK
        la $a1, BUFFER_UNPACK
        jal unpack
        nop

        # Call a system (simulator) routine to display results.
        #
        la $a0, MSG
        la $a1, BUFFER_UNPACK
        la $a2, DEMO_STRING
        addi $v0, $0, 11
        syscall

        la $s0, DEMO_STRING
        la $s1, LUT
        la $s2, BUFFER_UNPACK

DVLOOP:
        lb $t0, 0($s0)
        beq $t0, $0 DVDONEOKAY
        add $t1, $s1, $t0
        lb $t1, 0($t1)
        lb $t2, 0($s2)
        bne $t1, $0 DCHAR
        addi $s0, $s0, 1
        addi $t0, $0, 46 # '.'
DCHAR:
        beq $t0, $t2, DVLOOP
        addi $s2, $s2, 1
        la $s3, DEMO_STRING
        sub $s3, $s0, $s3
        la $s4, MSGERROR
        sub $s4, $s4, $s3
        la $a0, MSGERROR
        j DVDONE
        nop
DVDONEOKAY:
        la $a0, MSGCORRECT
DVDONE:
        addi $v0, $0, 11
        syscall


        addi $v0, $0, 10
        syscall
        nop



################################################################################
## Pack Routine

        ## Register Usage
        #
        # $a0: Call argument. Address of string to pack. (Original string.)
        # $a1: Call argument. Address of memory for packed string.
        # There are no return values.

        .data
        .align 4
LUT:
        .space 256
KEEP:
        .asciiz ".,!?;abcdefghijklmnopqrstuvwxyz "
        .byte 0, 0, 0, 0
        .text
pack:

        #
        ## Packed Data Format
        #
        # Groups of three characters from the original string are packed
        # into two characters in the packed string arranged as follows:
        #
        #
        # Start with three characters from original string:
        # _________________  _________________  _________________
        # |    char0      |  |    char1      |  |    char2      |
        # ^^^^^^^^^^^^^^^^^  ^^^^^^^^^^^^^^^^^  ^^^^^^^^^^^^^^^^^
        #  7 6 5 4 3 2 1 0    7 6 5 4 3 2 1 0    7 6 5 4 3 2 1 0
        #
        #
        # Convert 8-bit characters to 5-bit encodings:
        #
        # char0 -> enc0   E.g., 'b' -> 6
        # char1 -> enc1   E.g., ',' -> 1
        # char2 -> enc2   E.g., '#' -> 0  (# cannot be represented)
        #
        #
        # Arrange the encodings into a 16-bit quantity with a
        # termination bit (t, position 15) as shown. The termination
        # is 1 for the last group of three characters. Note that the
        # exact length of the original string is not remembered.
        #
        # ___________________________________
        # | t |  enc2   |  enc1   |  enc0   |
        # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        #   1  1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
        #   5  4 3 2 1 0 9 8 7 6 5 4 3 2 1 0

        #
        ## Encoding
        #
        # Each character in the original string is encoded as a 5-bit
        # quantity, allowing for 32 different characters. The string
        # KEEP specifies a character's encoding by its position. For
        # example, 'a' is at position 5, so it is encoded in the
        # packed string as a 5.  If a character is not in KEEP it
        # is encoded as a 0 (a period).

        #
        ## Prepare Lookup Table (LUT)
        #
        # To speed the encoding of large strings a lookup table (LUT)
        # is prepared based on the KEEP string. The i'th element of
        # LUT gives the encoding for character i. For example, element
        # 96 ('a') of LUT is a 5, meaning in the packed string an 'a'
        # appears as a 5.

        # Set lookup table to zeros.
        #
        la $t0, LUT
        addi $t2, $t0, 252
PLOOP1:
        sw $0, 0($t0)
        bne $t0, $t2, PLOOP1
        addi $t0, $t0, 4

        # Write entries of lookup table based on KEEP string.
        #
        la $t0, LUT
        la $t1, KEEP
        addi $t3, $0, 0        # Initialize encoding to zero.
PLOOP2:
        lbu $t2, 0($t1)        # Load character of KEEP string.
        add $t4, $t0, $t2      # Find corresponding position in LUT.
        sb $t3, 0($t4)         # Write its encoding into LUT.
        addi $t1, $t1, 1       # Increment KEEP string address.
        bne $t2, $0  PLOOP2
        addi $t3, $t3, 1       # Increment encoding, for next character.

        # Initialize first element of LUT (for the NULL character).
        #
        addi $t3, $0, -1
        sb $t3, 0($t0)

        #
        ## Pack String
        #

        # Each iteration of the loop packs three characters.

PLOOP:
        lbu $t4, 0($a0)     # Load char from original string.
        add $t1, $t0, $t4   # Compute LUT address.

        lb $t2, 0($t1)      # Load character's encoding.
        add $t5, $t2, $0    # Keep copy of encoding to check for end of string.

        lbu $t4, 1($a0)     # Load next character from original string.
        add $t1, $t0, $t4   # Compute LUT address.

        lb $t3, 0($t1)      # Load character's encoding.
        or $t5, $t5, $t3    # OR in encoding to remember if at end of string.

        sll $t3, $t3, 5     # Shift second encoding over...
        or $t2, $t2, $t3    # ...and OR into packed value. (Two chars so far.)

        lbu $t4, 2($a0)     # Load next character from original string.
        add $t1, $t0, $t4   # Compute LUT address.

        lb $t3, 0($t1)      # Load character's encoding.
        or $t5, $t5, $t3    # OR in encoding to remember if at end of string.
        sll $t3, $t3, 10    # Shift third encoding over...
        or $t2, $t2, $t3    # and OR into packed value. All three chars packed.

        sh $t2, 0($a1)      # Store packed characters...
        addi $a1, $a1, 2    # ...and increment address.

        bgez $t5  PLOOP     # If none of the 3 chars NULL continue with loop.
        addi $a0, $a0, 3    # Move to next group of three characters.

        #
        ## Terminate String and Return
        #

        ori $t2, $t2, 0x8000 # Add termination bit to last packed value.

        jr $ra
        sh $t2, -2($a1)      # Re-write last packed value with termination.