################################################################################
##
## LSU EE 4720 Fall 2005 Homework 2 Solution
##
##
# Assignment http://www.ece.lsu.edu/ee4720/2005f/hw2.html
################################################################################
## Pack Routine
## Register Usage
#
# $a0: Call argument. Address of string to pack. (Original string.)
# $a1: Call argument. Address of memory for packed string.
# There are no return values.
# [x] Add comments to this routine.
# Comments should explain what routine does.
# [x] Add comments explaining what sections do.
# [x] Show format of packed data.
# [x] Show how end of string indicated in packed data.
# [x] Modify so that upper-case translated to lower case.
.data
.align 4
LUT:
.space 256
KEEP:
.asciiz ".,!?;abcdefghijklmnopqrstuvwxyz "
## Part of Problem 3 Solution Below
#
# The string below is part of the solution to problem 3.
#
.asciiz ".,!?;ABCDEFGHIJKLMNOPQRSTUVWXYZ "
#
## Part of Problem 3 Solution Above
.byte 0, 0, 0, 0
.text
pack:
#
## Packed Data Format
#
# Groups of three characters from the original string are packed
# into two characters in the packed string arranged as follows:
#
#
# Start with three characters from original string:
# _________________ _________________ _________________
# | char0 | | char1 | | char2 |
# ^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^
# 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
#
#
# Convert 8-bit characters to 5-bit encodings:
#
# char0 -> enc0 E.g., 'b' -> 6
# char1 -> enc1 E.g., ',' -> 1
# char2 -> enc2 E.g., '#' -> 0 (# cannot be represented)
#
#
# Arrange the encodings into a 16-bit quantity with a
# termination bit (t, position 15) as shown. The termination
# is 1 for the last group of three characters. Note that the
# exact length of the original string is not remembered.
#
# ___________________________________
# | t | enc2 | enc1 | enc0 |
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
# 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#
## Encoding
#
# Each character in the original string is encoded as a 5-bit
# quantity, allowing for 32 different characters. The string
# KEEP specifies a character's encoding by its position. For
# example, 'a' is at position 5, so it is encoded in the
# packed string as a 5. If a character is not in KEEP it
# is encoded as a 0 (a period).
#
## Prepare Lookup Table (LUT)
#
# To speed the encoding of large strings a lookup table (LUT)
# is prepared based on the KEEP string. The i'th element of
# LUT gives the encoding for character i. For example, element
# 96 ('a') of LUT is a 5, meaning in the packed string an 'a'
# appears as a 5.
# Set lookup table to zeros.
#
la $t0, LUT
addi $t2, $t0, 252
PLOOP1:
sw $0, 0($t0)
bne $t0, $t2, PLOOP1
addi $t0, $t0, 4
# Write entries of lookup table based on KEEP string.
#
la $t0, LUT
la $t1, KEEP
addi $t3, $0, 0 # Initialize encoding to zero.
PLOOP2:
lbu $t2, 0($t1) # Load character of KEEP string.
add $t4, $t0, $t2 # Find corresponding position in LUT.
sb $t3, 0($t4) # Write its encoding into LUT.
addi $t1, $t1, 1 # Increment KEEP string address.
bne $t2, $0 PLOOP2
addi $t3, $t3, 1 # Increment encoding, for next character.
## Part of Problem 3 Solution Below
#
# If there is another KEEP string process that.
#
lbu $t2, 0($t1)
bne $t2, $0 PLOOP2
addi $t3, $0, 0
#
## Part of Problem 3 Solution Above
# Initialize first element of LUT (for the NULL character).
#
addi $t3, $0, -1
sb $t3, 0($t0)
#
## Pack String
#
# Each iteration of the loop packs three characters.
PLOOP:
lbu $t4, 0($a0) # Load char from original string.
add $t1, $t0, $t4 # Compute LUT address.
lb $t2, 0($t1) # Load character's encoding.
add $t5, $t2, $0 # Keep copy of encoding to check for end of string.
lbu $t4, 1($a0) # Load next character from original string.
add $t1, $t0, $t4 # Compute LUT address.
lb $t3, 0($t1) # Load character's encoding.
or $t5, $t5, $t3 # OR in encoding to remember if at end of string.
sll $t3, $t3, 5 # Shift second encoding over...
or $t2, $t2, $t3 # ...and OR into packed value. (Two chars so far.)
lbu $t4, 2($a0) # Load next character from original string.
add $t1, $t0, $t4 # Compute LUT address.
lb $t3, 0($t1) # Load character's encoding.
or $t5, $t5, $t3 # OR in encoding to remember if at end of string.
sll $t3, $t3, 10 # Shift third encoding over...
or $t2, $t2, $t3 # and OR into packed value. All three chars packed.
sh $t2, 0($a1) # Store packed characters...
addi $a1, $a1, 2 # ...and increment address.
bgez $t5 PLOOP # If none of the 3 chars NULL continue with loop.
addi $a0, $a0, 3 # Move to next group of three characters.
#
## Terminate String and Return
#
ori $t2, $t2, 0x8000 # Add termination bit to last packed value.
jr $ra
sh $t2, -2($a1) # Re-write last packed value with termination.