################################################################################
##
## LSU EE 4720 Spring 2022 Homework 1
##
##
 ## SOLUTION

 # Assignment https://www.ece.lsu.edu/ee4720/2022/hw01.pdf
 # Solution Discussion https://www.ece.lsu.edu/ee4720/2022/hw01_sol.pdf

################################################################################
## Problem 1
#
#  Instructions: https://www.ece.lsu.edu/ee4720/2022/hw01.pdf

strlen_p1:
        ## Register Usage
        #
        # CALL VALUE
        #  $a0: Address of first character of string.
        #       This address will be a multiple of 4.
        #
        # RETURN
        #  $v0: The length of the string (not including the null).
        #
        # Note:
        #  Can modify registers $t0-$t9, $a0-$a3, $v0, $v1.
        #  DO NOT modify other registers.
        #
        # [✔] The testbench should show 0 errors.
        # [✔] Try to reduce the number of executed instructions.
        # [✔] Do not use pseudoinstructions except for nop.
        #     Do not use: li, la, mov, bgt, blt, etc.
        #
        # [✔] Code should be efficient.
        # [✔] The code should be clearly written.
        # [ ] Comments should be written for an experienced programmer.

        addi $v0, $a0, 0

        # SOLUTION: Prepare masks for two high bytes.
        #
        lui $t6, 0xff00   # t6 ->  0xff000000
        lui $t7, 0xff     # t7 ->    0xff0000
        #
        # In the version using lhu it was possible to mask off each
        # byte using an immediate, such as in "andi $t1, $t0, 0xff".
        # But since immediate values are limited to 16 bits we need to
        # use some other method to get the mask in a register. The
        # method used above is an lui instruction.
        
LOOP:                         # SOLUTION described by comments below.
        lw $t0, 0($a0)        # Change lhu to lw
        and $t1, $t0, $t6     # Mask off most-significant byte ..
        beq $t1, $0, DONE0    # .. if it is zero we are done ..
        and $t1, $t0, $t7     # .. otherwise mask off next byte ..
        beq $t1, $0, DONE1    # .. and if that one is zero we're done ..
        andi $t1, $t0, 0xff00 # .. otherwise mask the next one ......
        beq $t1, $0, DONE2
        andi $t1, $t0, 0xff
        bne $t1, $0, LOOP
        addi $a0, $a0, 4

        sub $v0, $a0, $v0
        jr $ra
        addi $v0, $v0, -1

DONE2:  # SOLUTION: Modify the case for byte 2
        sub $v0, $a0, $v0
        jr $ra
        addi $v0, $v0, 2

DONE1:  # SOLUTION: Add a case for byte 1
        sub $v0, $a0, $v0
        jr $ra
        addi $v0, $v0, 1

DONE0:
        jr $ra
        sub $v0, $a0, $v0

################################################################################
## Problem 2
#

strlen_p2:
        # CALL VALUE
        #  $a0: Address of first character of string.
        #       This address will be a multiple of 4.
        #
        # RETURN
        #  $v0: The length of the string (not including the null).
        #
        # Note:
        #  Can modify registers $t0-$t9, $a0-$a3, $v0, $v1.
        #  DO NOT modify other registers.
        #
        # [✔] Use lw to load from string. (Replace the lbu)
        # [✔] Make use of orc.b insn. Consider using clz, clo.
        # [✔] The testbench should show 0 errors.
        # [✔] Try to reduce the number of executed instructions.
        # [✔] Do not use pseudoinstructions except for nop.
        #     Do not use: li, la, mov, bgt, blt, etc.
        #
        # [✔] Code should be efficient.
        # [✔] The code should be clearly written.
        # [ ] Comments should be written for an experienced programmer.

        ## SOLUTION Approach
        #
        # In main loop check whether there is a null and do so using
        # the minimum number of instructions. The orc.b instruction
        # returns a 0xffffffff if no null is found, and it's easy to
        # check for that. If a null is found, exit the loop and *then*
        # figure out where.

        addi $v0, $a0, 4
        addi $t3, $0, -1  # SOLUTION: Prepare value: 0xffffffff
        
LOOPB:
        lw $t0, 0($a0)
        orc.b $t1, $t0

        # SOLUTION:  Example execution of orc.b:
        #       String at address in $a0: "AB"
        # t0:   A B ∅ X            <-- Value in ASCII. ∅ is null, X unknown.
        # t0: 0x41420099           <-- Value in hex.
        # t0: 0x41 0x42 0x00 0x99  <-- Value separated into four bytes
        # t1: 0xff 0xff 0x00 0xff  <-- Result. Non-zero bytes changed to 0xff
        # t1: 0xffff00ff           <-- Result. Non-zero bytes changed to 0xff

        beq $t1, $t3, LOOPB        # If t1 is 0xffffffff then no null found ..
        addi $a0, $a0, 4           # .. so increment address for next word.

        # A null has been found. Count number  of leading 1's to find out.
        # t1: 0xffff00ff           <-- Continuing with example.
        clo $t2, $t1
        # t2: 16                   <-- Found 16 consecutive 1's starting at MSB.
        sra $t2, $t2, 3            # Effectively divide by 8.
        # t2:  2                   <-- 16 1's means 2 bytes

        sub $v0, $a0, $v0          # Add on number of bytes before null.

        jr $ra
        add $v0, $v0, $t2



##############################################################################
 #

strlen_ref:
        ## Register Usage
        #
        # $a0: Address of first character of string.
        # $v0: Return value, the length of the string.

        addi $v0, $a0, 1        # Set aside a copy of the string start + 1.
        
REF_LOOP:
        lbu $t0, 0($a0)         # Load next character in string into $t0
        bne $t0, $0, REF_LOOP   # If it's not zero, continue
        addi $a0, $a0, 1        # Increment address. (Note: Delay slot insn.)
                
        jr $ra
        sub $v0, $a0, $v0


##############################################################################
#
 ## Test Code
#
#  The code below calls the strlen routines.

        .data
str:
        .align 2
        .asciiz "1"
        .align 2, -1
        .asciiz "12"
        .align 2, -1
        .asciiz "123"
        .align 2, -1
        .asciiz "1234"
        .align 2, -1
        .asciiz "12345"
        .align 2, -1
        .asciiz ""
        .align 2, -1
        .asciiz "\"Per aspera, ad astra!\""
        .align 2, -1
        .ascii "eighteen quintillion, four hundred forty six quadrillion, "
        .ascii "seven hundred forty four trillion, seventy three billion, "
        .ascii "seven hundred nine million, five hundred fifty one thousand, "
        .asciiz "six hundred sixteen"
msg:
        .asciiz "String %/s2/2d: Length %/v1/3d is "
msg_good:
        .asciiz "correct. Took %/s4/3d insn or %/f6/.3f char/insn\n"
msg_bad:
        .asciiz "wrong. Should be %/s7/3d.\n"

mut_strlen_p1:
        .word strlen_p1
        .asciiz "strlen_p1  (Problem 1 - Bit Ops)"
mut_strlen_p2:
        .word strlen_p2
        .asciiz "strlen_p2  (Problem 2 - RISC V orc insn)"
mut_strlen_ref:
        .word strlen_ref
        .asciiz "strlen_ref (Simple strlen routine.)"
muts:
        .word mut_strlen_p1
        .word mut_strlen_p2
        .word mut_strlen_ref
        .word 0

mut_msg:
        .asciiz "\n** Starting Test of Routine \"%/t0/s\" **\n"

        .text
        .globl __start
__start:
        mtc0 $0, $22            # Pause tracing.
        addi $s1, $0, 0
TBOUTER:
        la $t0, muts
        sll $t1, $s1, 2
        add $t1, $t1, $t0
        lw $s0, 0($t1)

        bne $s0, $0, TB_MORE
        nop

        addi $v0, $0, 10        # System call code for exit.
        syscall
TB_MORE:

        la $a0, mut_msg
        addi $t0, $s0, 4
        addi $v0, $0, 11
        syscall

        la $a0, str
        addi $s6, $a0, 0        # Save copy of string starting address.
        addi $s2, $0, 0
TBLOOP:
        addi $a0, $s6, 0
        jal strlen_ref
        addi $s2, $s2, 1
        addi $s7, $v0, 0

        addi $a0, $s6, 0
        addi $v0, $0, -1
        lw $t0, 0($s0)
        mtc0 $v0, $22           # Resume tracing. (No effect if not stepping.)
        jalr $t0
        mfc0 $s5, $9            # Copy current instruction count. (Before.)
        mfc0 $s4, $9            # Copy current instruction count. (After.)
        mtc0 $0, $22            # Pause tracing.
        addi $s4, $s4, -1
        sub $s4, $s4, $s5
        mtc1 $s4, $f4
        mtc1 $v0, $f0
        div.d $f6, $f0, $f4
        
        addi $v1, $v0, 0        # Move length of string to $v1
        addi $v0, $0, 11        # System call code for message.
        la $a0, msg             # Address of message.
        syscall

        la $a0, msg_good
        beq $v1, $s7  TB_CONTINUE
        nop
        la $a0, msg_bad
TB_CONTINUE:
        syscall

        add $s6, $s6, $s7
        ori $s6, $s6, 0x3
        addi $s6, $s6, 1
        la $a0, msg
        slt $t0, $s6, $a0
        bne $t0, $0, TBLOOP
        nop

        j TBOUTER
        addi $s1, $s1, 1