## LSU EE 3755 -- Fall 2005 -- Computer Organization

## LSU EE 3755 -- Fall 2005 -- Computer Organization

## MIPS Notes 2 -- Control-Transfer Instructions

## Contents

# Types of Control-Transfer Instructions

# A Simple Jump Instruction

# A Branch Instruction

# More Branch Instructions

# More Jump Instructions

# Procedure Call Example

# Typical CTI Uses

## References

# :PH: Patterson & Hennessy, "Computer Organization & Design"

# :Mv1: MIPS Technologies, "MIPS32 Architecture for Programmers Vol I: Intro"

# :Mv2: MIPS Technologies, "MIPS32 Architecture for Programmers Vol II: Instr"

################################################################################

## Types of Control-Transfer Instructions

# :PH: Material covered in a different order.

# :Mv1: 4.1.3 (Brief overview.)

## Types of Control Transfers

# Branch

# Used for loops, ifs, etc. Most commonly used CTI.

# Instruction specifies a condition and an address, the target.

# If condition is true continue execution at target.

# Branch-and-Link

# Used for procedure calls.

# Instruction specifies a condition and an address, the target.

# If condition is true save PC in $31 and continue execution at target.

# Jump

# Used for returns and other purposes.

# Instruction specifies a target

# Continue execution at target.

# Jump-and-Link

# Used for procedure calls and returns and other purposes.

# Instruction specifies a target

# Save PC in $31 and continue execution at target.

# Trap (Not covered in detail.)

# If condition true continue at specified entry in OS-prepared trap table.

## Conditions and Targets

# The methods used to specify conditions and targets vary by instruction.

# See descriptions in following sections

## Delayed CTI

# All of the CTIs covered here are delayed.

# (Traps are not delayed.)

# The instruction after an ordinary delayed CTI is always executed.

add $8, $0, $0

j FORJOY

addi $8, $8, 1 #This will be executed

addi $8, $8, 2 # This will not be executed

FORJOY:

addi $8, $8, 4

addi $8, $8, 8

################################################################################

## A Simple Jump Instruction

# :PH: 3.5

# :Mv2:

# Simple jump (j) covered here, others (jr,jal,jalr) covered in a

# later selection.

# Assembler :Syntax: J target

# Coding :Syntax: J ii

# After next instruction go to { PC[31:28], ii, 2'b0 },

# where ii is bits 27:2 of target.

# In assembly language "target" is the line label for the target.

# In machine language "target", the ii field, is bits 27:2 of the

# target address.

# Bits 31:28 of the target address must match bits 31:28 of the jump

# instruction's address. (If not, use the jr instruction.)

# :Example:

# Code used to illustrate how target is determined.

FIRSTLINE: # Address of add: 0x400000

add $8, $0, $0

j FORJOY

addi $8, $8, 1

addi $8, $8, 2

FORJOY: # Address of addi: 0x400014

#simulator(assembler) produces this address

addi $8, $8, 4

addi $8, $8, 8

# In the code above:

# FORJOY is a line label, part of the assembly language.

# Suppose the assembler [really the linker] sets FORJOY = 0x400014.

################################################################################

## A Branch Instruction

# :PH: 3.5

# :Mv2:

# One branch instruction covered here (bne), others (beq, bltz, etc.)

# covered in a later section.

# Assembler :Syntax: BNE rs, rt, target # Branch Not Equal

# Coding :Syntax: BNE rs, rt, offset

# if rs != rt after next instruction

# go to PC + 4 + 4 * sign_extend(offset)

# Why 4 times?

# each instruction is 4bytes long.

# for example)

Memory Addr Contents.

#addi $1,$2,1 400000 1-st instruction

# 400001 1-st

# 400002 1-st

# 400003 1-st

# 400004 2nd instruction

# 400005 2nd

# 400006 2nd

# 400007 2nd

# 400008 3rd

# 400009 3rd

# 40000a 3rd

# 40000b 3rd

# . #

# Why PC +4 ?

# delayed branch ; the next instruction will be executed always

# "target" is the line label to continue at.

# "offset" is the number of instructions to skip, starting at

# the instruction after the branch.

# :Example:

# Code to illustrate the offset.

FIRSTLINE:

add $9, $0, $0

addi $10, $9, 1

bne $9, $10, LINE # actual code will be bne $9,$10,2(offset)

addi $8, $8, 1 # Offset computed from this instruction.

addi $8, $8, 2

LINE:

addi $8, $8, 4 # The branch target, offset = 2 instructions.

addi $8, $8, 8

# :Example:

# Use of a branch in a loop.

# Reg $10 holds i value

add $8, $0, $0 # $8 -> 0

addi $9, $0, 10 # $9 -> 10

add $10, $0, $0 # $10 -> 0 (i)

LOOP:

add $8, $8, $10 # $8 = $8 + i

addi $9, $9, -1

bne $9, $0, LOOP

addi $10, $10, 1 # i = i + 1

# Short HW : What the above code is doing?

################################################################################

## More Branch Instructions

# :PH: 3.5

# :Mv2:

# Assembler :Syntax: BEQ rs, rt, target # Branch Equal

# Coding :Syntax: BEQ rs, rt, offset

# if rs == rt after next instruction

# go to PC + 4 + 4 * sign_extend(offset)

# Assembler :Syntax: BGEZ rs, target # Branch >= Zero

# Coding :Syntax: BGEZ rs, offset

# if rs >= 0 after next instruction

# go to PC + 4 + 4 * sign_extend(offset)

# Assembler :Syntax: BGTZ rs, target # Branch > Zero

# Coding :Syntax: BGTZ rs, offset

# if rs > 0 after next instruction

# go to PC + 4 + 4 * sign_extend(offset)

# Assembler :Syntax: BLEZ rs, target # Branch <= Zero

# Coding :Syntax: BLEZ rs, offset

# if rs <= 0 after next instruction

# go to PC + 4 + 4 * sign_extend(offset)

# Assembler :Syntax: BLTZ rs, target # Branch < Zero

# Coding :Syntax: BLTZ rs, offset

# if rs < 0 after next instruction

# go to PC + 4 + 4 * sign_extend(offset)

################################################################################

## More Jump Instructions

# :PH: 3.5

# :Mv2:

# Assembler :Syntax: JAL target # Jump and link.

# Coding :Syntax: JAL ii

# $31 <- PC + 8

# After next instruction go to { PC[31:28], ii, 2'b0 },

# where ii is bits 27:2 of target.

# Note: $31 a.k.a.(also known as) $ra

# :Syntax: JR rs # Jump register.

# After next instruction go to address in rs.

# :Syntax: JALR rs # Jump and link register.

# $31 <- PC + 8

# After next instruction go to address in rs

# Note: $31 a.k.a. $ra

# :Example:

# Use of j instruction.

# Jump to LINEX

j LINEX

nop

LINEX:

addi $t2, $t2, 1

# :Example:

# Use of jal instruction.

LINEA: # LINEA = 0x12345678

# Instruction below is stored at (PC=) 0x12345678

# Set $ra -> PC + 8 = 0x12345680 and jump to LINEX

jal LINEX

nop

LINEX:

addi $t2, $t2, 1

# :Example:

# Use of jr instruction.

# Load the address of a line into $to.

la $t0, LINEX

# Jump to the line.

jr $t0

nop

LINEX:

addi $t2, $t2, 1

# :Example:

# Use of jalr instruction.

LINEA: # LINEA = 0x12345678

la $t0, LINEX # Note: this becomes two instructions.

# Instruction below is stored at (PC=) 0x12345680

# Set $ra -> PC + 8 = 0x12345688 and jump to LINEX

jalr $t0 # 0x12345680

nop

LINEX:

addi $t2, $t2, 1

# So above code will become :

# LINEA: # LINEA = 0x12345678

# lui $t0, 0x1234 #0x12345678

# ori $t0, 0x 568c #0x1234567c # la instruction.

# assume LINEX = 0x1234568c

# jalr $t0 # 0x12345680

# nop #0x12345684

# nop #0x12345688

# LINEX:

# addi $t2, $t2, 1 #0x1234568c

################################################################################

## Procedure Call Example

# :PH: 3.6

# :Example:

# Use of jal, jr, and jalr instructions to call and return from

# a procedure.

# The called procedure, times_three, puts 3 times $a0 in $v0.

# The procedure is called correctly three times, after which the code

# goes into an infinite loop.

.globl __start

__start:

addi $a0, $0, 12 # Set value to triple.

jal times_three # Jump and save PC + 8 in $ra ($31)

nop

addi $a0, $0, 3755 # Returns here after jal above.

jal times_three # Call times_three again.

nop

nop # Returns here after jal above.

la $s1, times_three # Store address of times_three in $s1

addi $a0, $0, 2001 # Value to triple.

jalr $s1 # Call times_three again, this time using jalr.

nop

addi $a0, $0, 22 #

jr $s1 # THE WRONG WAY TO CALL A PROCEDURE!!

nop

addi $a0, $0, 0 # Never reached.

times_three:

sll $v0, $a0, 1 # Start of times_three.

add $v0, $v0, $a0

jr $ra # Jump using return address.

nop

################################################################################

## Typical CTI Uses

# :PH: 3.5

# Branches (bne, beq, bltz, etc.)

# if/else

# Loops

# Regular Jump (j)

# if/else, loops (along with branches)

# Jump and Link: (jal, jalr)

# Procedure calls.

# Jump Register

# Procedure returns.

# switch statements.

# :Example:

# Typical if/else statement.

# if( a + b > c ) { d = d >> 1; e++; } else { d = d << 1; e--; }

# i++;

# Reg Usage: a, $8; b, $9; c, $10; d, $11; e, $12; i, $13

add $16, $8, $9 # $16 = a + b

slt $17, $10, $16 # $17 = c < a + b

beq $17, $0, ELSEPART

nop

sra $11, $11, 1 # d = d >> 1

addi $12, $12, 1 # e++;

j LINEX

nop

ELSEPART:

sll $11, $11, 1 # d = d << 1

addi $12, $12, -1 # e--

LINEX:

addi $13, $13, 1 # i++

# :Example:

# Typical for loop.

# for( i = a - 5; i < b + c; i = i + d ) x = x ^ ( x >> 1 );

# Initialization Test Increment Body

# ++k;

# Reg Usage: a, $8; b, $9; c, $10; d, $11; x, $12; i, $13; k, $14;

## Initialization

addi $13, $8, -5 # i = a - 5

j TEST

nop

LOOP:

## Body

sra $16, $12, 1 # $16 = x >> 1

xor $12, $12, $16 # x = x ^ $16

## Increment

add $13, $13, $11 # i = i + d

## Test

TEST:

add $16, $9, $10 # $16 = b + c Note: Can be moved out of loop.

slt $17, $13, $16 # i < $16

bne $17, $0, LOOP

nop

addi $14,$14,1 # ++k;

# optimazation

# if for loop repeats many times(say 1000) and

# we include inside the loop body the statements:

## { x = x^(x>>1);

## k = a +c;

## p = 10;}

# k and p are updated only once

# because a,c and 10 don't change. So it makes no sense repeating

# them 1000 times(waste of time)

# if we do that, optimized compiler will put these instructions

# outside of the loop. Anyways a programer shouldn't do it.

# for the for loop if the loop repeats many times(say 1000), the assembly

#code of above has a problem.

# the problem is the instruction

# add $16, $9, $10 # $16 = b + c

# This instruction repeats 1000 times which is not needed

# because b,c are constants so register $16 is updated only once.

# updating $16 1000 times is a waste of time. The solution is to take

# this instruction and put it outside of the loop. It looks like

# addi $13, $8, -5 # i = a - 5

# add $16,$9,$10 # $16 = b+c // this is one of optimized versions

# # the optimized compiler will do this.

# j TEST

# nop

# An even better way is shown below

# addi $13, $8, -5 # i = a - 5

# j TEST

# add $16,$9,$10 # $16 = b+c

# // taking advantage of delayed branch(delay slot)