################################################################################
##
## LSU EE 4720 Fall 2005 Homework 2 Solution
##
##

# Assignment http://www.ece.lsu.edu/ee4720/2005f/hw2.html


################################################################################
## Pack Routine

        ## Register Usage
        #
        # $a0: Call argument. Address of string to pack. (Original string.)
        # $a1: Call argument. Address of memory for packed string.
        # There are no return values.

        # [x] Add comments to this routine.
        #     Comments should explain what routine does.
        # [x] Add comments explaining what sections do.
        # [x] Show format of packed data.
        # [x] Show how end of string indicated in packed data.
        # [x] Modify so that upper-case translated to lower case.

        .data
        .align 4
LUT:
        .space 256
KEEP:
        .asciiz ".,!?;abcdefghijklmnopqrstuvwxyz "

        ## Part of Problem 3 Solution Below
        #
        # The string below is part of the solution to problem 3.
        #
        .asciiz ".,!?;ABCDEFGHIJKLMNOPQRSTUVWXYZ "
        #
        ## Part of Problem 3 Solution Above

        .byte 0, 0, 0, 0
        .text
pack:

        #
        ## Packed Data Format
        #
        # Groups of three characters from the original string are packed
        # into two characters in the packed string arranged as follows:
        #
        #
        # Start with three characters from original string:
        # _________________  _________________  _________________
        # |    char0      |  |    char1      |  |    char2      |
        # ^^^^^^^^^^^^^^^^^  ^^^^^^^^^^^^^^^^^  ^^^^^^^^^^^^^^^^^
        #  7 6 5 4 3 2 1 0    7 6 5 4 3 2 1 0    7 6 5 4 3 2 1 0
        #
        #
        # Convert 8-bit characters to 5-bit encodings:
        #
        # char0 -> enc0   E.g., 'b' -> 6
        # char1 -> enc1   E.g., ',' -> 1
        # char2 -> enc2   E.g., '#' -> 0  (# cannot be represented)
        #
        #
        # Arrange the encodings into a 16-bit quantity with a
        # termination bit (t, position 15) as shown. The termination
        # is 1 for the last group of three characters. Note that the
        # exact length of the original string is not remembered.
        #
        # ___________________________________
        # | t |  enc2   |  enc1   |  enc0   |
        # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        #   1  1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
        #   5  4 3 2 1 0 9 8 7 6 5 4 3 2 1 0

        #
        ## Encoding
        #
        # Each character in the original string is encoded as a 5-bit
        # quantity, allowing for 32 different characters. The string
        # KEEP specifies a character's encoding by its position. For
        # example, 'a' is at position 5, so it is encoded in the
        # packed string as a 5.  If a character is not in KEEP it
        # is encoded as a 0 (a period).

        #
        ## Prepare Lookup Table (LUT)
        #
        # To speed the encoding of large strings a lookup table (LUT)
        # is prepared based on the KEEP string. The i'th element of
        # LUT gives the encoding for character i. For example, element
        # 96 ('a') of LUT is a 5, meaning in the packed string an 'a'
        # appears as a 5.

        # Set lookup table to zeros.
        #
        la $t0, LUT
        addi $t2, $t0, 252
PLOOP1:
        sw $0, 0($t0)
        bne $t0, $t2, PLOOP1
        addi $t0, $t0, 4

        # Write entries of lookup table based on KEEP string.
        #
        la $t0, LUT
        la $t1, KEEP
        addi $t3, $0, 0        # Initialize encoding to zero.
PLOOP2:
        lbu $t2, 0($t1)        # Load character of KEEP string.
        add $t4, $t0, $t2      # Find corresponding position in LUT.
        sb $t3, 0($t4)         # Write its encoding into LUT.
        addi $t1, $t1, 1       # Increment KEEP string address.
        bne $t2, $0  PLOOP2
        addi $t3, $t3, 1       # Increment encoding, for next character.

        ## Part of Problem 3 Solution Below
        #
        # If there is another KEEP string process that.
        #
        lbu $t2, 0($t1)
        bne $t2, $0 PLOOP2
        addi $t3, $0, 0
        #
        ## Part of Problem 3 Solution Above

        # Initialize first element of LUT (for the NULL character).
        #
        addi $t3, $0, -1
        sb $t3, 0($t0)

        #
        ## Pack String
        #

        # Each iteration of the loop packs three characters.

PLOOP:
        lbu $t4, 0($a0)     # Load char from original string.
        add $t1, $t0, $t4   # Compute LUT address.

        lb $t2, 0($t1)      # Load character's encoding.
        add $t5, $t2, $0    # Keep copy of encoding to check for end of string.

        lbu $t4, 1($a0)     # Load next character from original string.
        add $t1, $t0, $t4   # Compute LUT address.

        lb $t3, 0($t1)      # Load character's encoding.
        or $t5, $t5, $t3    # OR in encoding to remember if at end of string.

        sll $t3, $t3, 5     # Shift second encoding over...
        or $t2, $t2, $t3    # ...and OR into packed value. (Two chars so far.)

        lbu $t4, 2($a0)     # Load next character from original string.
        add $t1, $t0, $t4   # Compute LUT address.

        lb $t3, 0($t1)      # Load character's encoding.
        or $t5, $t5, $t3    # OR in encoding to remember if at end of string.
        sll $t3, $t3, 10    # Shift third encoding over...
        or $t2, $t2, $t3    # and OR into packed value. All three chars packed.

        sh $t2, 0($a1)      # Store packed characters...
        addi $a1, $a1, 2    # ...and increment address.

        bgez $t5  PLOOP     # If none of the 3 chars NULL continue with loop.
        addi $a0, $a0, 3    # Move to next group of three characters.

        #
        ## Terminate String and Return
        #

        ori $t2, $t2, 0x8000 # Add termination bit to last packed value.

        jr $ra
        sh $t2, -2($a1)      # Re-write last packed value with termination.