## LSU EE 3755 --
Fall 2009 -- Computer
Organization
#
## MIPS Notes 2 -- Control-Transfer
Instructions
## Contents
#
# Types of Control-Transfer
Instructions
# A Simple Jump Instruction
# A Branch Instruction
# More Branch Instructions
# More Jump Instructions
# Procedure Call Example
# Typical CTI Uses
## References
#
# :PH:
Patterson & Hennessy, "Computer Organization &
Design"
# :Mv1: MIPS Technologies, "MIPS32 Architecture for
Programmers Vol I: Intro"
# :Mv2: MIPS Technologies, "MIPS32 Architecture for
Programmers Vol II: Instr"
################################################################################
## Types of Control-Transfer
Instructions
# :PH:
Material covered in a different order.
# :Mv1: 4.1.3 (Brief
overview.)
## Types
of Control Transfers
#
# Branch
#
Used for loops, ifs, etc.
Most commonly used CTI.
#
Instruction specifies a condition and an address, the
target.
#
If condition is true continue execution at
target.
#
# Branch-and-Link
#
Used for procedure calls.
#
Instruction specifies a condition and an address, the
target.
#
If condition is true save PC in $31 and continue execution at
target.
#
# Jump
#
Used for returns and other purposes.
#
Instruction specifies a target
#
Continue execution at target.
#
# Jump-and-Link
#
Used for procedure calls and returns and other
purposes.
# Instruction specifies a
target
#
Save PC in $31 and continue execution at
target.
#
# Trap (Not covered in
detail.)
#
If condition true continue at specified entry in OS-prepared trap
table.
#
#
##
Conditions and Targets
#
# The methods used to specify conditions and targets
vary by instruction.
# See descriptions in following
sections
##
Delayed CTI
#
# All of the CTIs covered here are
delayed.
# (Traps are not delayed.)
#
# The instruction after an ordinary delayed CTI is
always executed.
#
add
$8, $0, $0
j
FORJOY
addi
$8, $8, 1 #This will be
executed
addi
$8, $8, 2 # This will not be executed
FORJOY:
addi
$8, $8, 4
addi
$8, $8, 8
################################################################################
## A Simple Jump
Instruction
# :PH:
# :Mv2:
# Simple jump (j) covered here, others (jr,jal,jalr)
covered in a
# later selection.
# Assembler :Syntax: J
target
# Coding :Syntax: J
ii
#
After next instruction go to { PC[31:28], ii, 2'b0
},
#
where ii is bits 27:2 of target.
#
# In assembly language "target" is the line label for
the target.
# In machine language "target", the ii field, is bits
27:2 of the
#
target address.
# Bits 31:28 of the target address must match bits
31:28 of the jump
#
instruction's address. (If not, use the jr
instruction.)
# :Example:
#
# Code used to illustrate how target is
determined.
FIRSTLINE:
# Address of add: 0x400000
add
$8, $0, $0
j
FORJOY
addi
$8, $8, 1
addi
$8, $8, 2
FORJOY: # Address of addi:
0x400014
#simulator(assembler) produces this address
addi
$8, $8, 4
addi
$8, $8, 8
# In the code above:
#
#
FORJOY is a line label, part of the assembly
language.
#
Suppose the assembler [really the linker] sets FORJOY =
0x400014.
################################################################################
## A Branch Instruction
# :PH:
# :Mv2:
# One branch instruction covered here (bne), others
(beq, bltz, etc.)
# covered in a later
section.
# Assembler :Syntax: BNE rs, rt, target # Branch Not
Equal
# Coding :Syntax: BNE rs, rt,
offset
#
if rs != rt after next instruction
#
go
to PC + 4 + 4 * sign_extend(offset)
# Why 4 times?
#
each instruction is 4bytes long.
# for
example)
Memory Addr
Contents.
#addi $1,$2,1 400000 1-st
instruction
#
400001
1-st
#
400002
1-st
#
400003
1-st
#
400004
2nd
instruction
#
400005
2nd
#
400006
2nd
#
400007
2nd
#
400008
3rd
#
400009
3rd
#
40000a
3rd
#
40000b
3rd
#
.
#
#
# Why PC +4 ?
#
delayed branch ; the next instruction will be executed
always
#
# "target" is the line label to continue
at.
# "offset" is the number of instructions to skip,
starting at
#
the instruction after the branch.
# :Example:
#
# Code to illustrate the
offset.
FIRSTLINE:
add
$9, $0, $0
addi
$10, $9, 1
bne
$9, $10, LINE # actual code will be bne
$9,$10,2(offset)
addi
$8, $8, 1 #
Offset computed from this instruction.
addi
$8, $8, 2
LINE:
addi
$8, $8, 4 #
The branch target, offset = 2 instructions.
addi
$8, $8, 8
# :Example:
#
# Use of a branch in a
loop.
# Reg $10
holds i value
add
$8, $0, $0 # $8 ->
0
addi
$9, $0, 10 # $9 ->
10
add
$10, $0, $0 # $10 -> 0
(i)
LOOP:
add
$8, $8, $10 # $8 = $8 +
i
addi $9,
$9, -1
bne
$9, $0, LOOP
addi
$10, $10, 1 # i = i + 1
# Short HW : What the above code is
doing?
################################################################################
## More Branch Instructions
# :PH:
# :Mv2:
# Assembler :Syntax: BEQ rs, rt, target # Branch
Equal
# Coding :Syntax: BEQ rs, rt,
offset
#
if rs == rt after next instruction
#
go to PC + 4 + 4 * sign_extend(offset)
#
# Assembler :Syntax: BGEZ rs, target # Branch >=
Zero
# Coding :Syntax: BGEZ rs,
offset
#
if rs >= 0 after next instruction
#
go to PC + 4 + 4 * sign_extend(offset)
#
# Assembler :Syntax: BGTZ rs, target # Branch >
Zero
# Coding :Syntax: BGTZ rs,
offset
#
if rs > 0 after next instruction
#
go to PC + 4 + 4 * sign_extend(offset)
#
# Assembler :Syntax: BLEZ rs, target # Branch <=
Zero
# Coding :Syntax: BLEZ rs,
offset
#
if rs <= 0 after next instruction
#
go to PC + 4 + 4 * sign_extend(offset)
#
# Assembler :Syntax: BLTZ rs, target # Branch <
Zero
# Coding :Syntax: BLTZ rs,
offset
#
if rs < 0 after next instruction
#
go to PC + 4 + 4 *
sign_extend(offset)
################################################################################
## More Jump Instructions
# :PH:
# :Mv2:
# Assembler :Syntax: JAL target # Jump and
link.
# Coding :Syntax: JAL
ii
#
$31 <- PC + 8
#
After next instruction go to { PC[31:28], ii, 2'b0
},
#
where ii is bits 27:2 of target.
#
Note: $31 a.k.a.(also known as)
$ra
#
# :Syntax: JR rs
# Jump register.
#
After next instruction go to address in rs.
#
# :Syntax: JALR rs
# Jump and link register.
#
$31 <- PC + 8
#
After next instruction go to address in rs
#
Note: $31 a.k.a. $ra
#
# :Example:
#
# Use of j instruction.
# Jump to
LINEX
j
LINEX
nop
nop
LINEX:
addi
$t2, $t2, 1
# :Example:
#
# Use of jal instruction.
LINEA: #
LINEA = 0x12345678
#
Instruction below is stored at (PC=) 0x12345678
#
Set $ra -> PC + 8 = 0x12345680 and jump to LINEX
jal
LINEX
nop
nop
LINEX:
addi
$t2, $t2, 1
# :Example:
#
# Use of jr instruction.
#
Load the address of a line into $to.
la
$t0, LINEX
#
Jump to the line.
jr
$t0
nop
nop
LINEX:
addi
$t2, $t2, 1
# :Example:
#
# Use of jalr instruction.
LINEA: #
LINEA = 0x12345678
la
$t0, LINEX # Note: this
becomes two
instructions.
#
Instruction below is stored at (PC=) 0x12345680
#
Set $ra -> PC + 8 = 0x12345688 and jump to LINEX
jalr
$t0 #
0x12345680
nop
nop
LINEX:
addi
$t2, $t2, 1
# So
above code will become :
#
#
#
# LINEA: # LINEA =
0x12345678
# lui
$t0, 0x1234
#0x12345678
# ori $t0,
0x 568c
#0x1234567c # la
instruction.
#
assume LINEX = 0x1234568c
# jalr
$t0
# 0x12345680
# nop
#0x12345684
# nop
#0x12345688
#
# LINEX:
# addi
$t2, $t2, 1
#0x1234568c
################################################################################
## Procedure Call Example
# :PH:
# :Example:
#
# Use of jal, jr, and jalr instructions to call and
return from
# a procedure.
#
# The called procedure, times_three, puts 3 times $a0
in $v0.
#
# The procedure is called correctly three times,
after which the code
# goes into an infinite
loop.
.globl __start
__start:
addi
$a0, $0, 12 # Set
value to triple.
jal
times_three
# Jump and save PC + 8 in $ra ($31)
nop
addi
$a0, $0, 3755 # Returns here
after jal above.
jal
times_three
# Call times_three again.
nop
nop
# Returns here after jal above.
la
$s1, times_three # Store address of
times_three in $s1
addi
$a0, $0, 2001 # Value to
triple.
jalr
$s1
# Call times_three again, this time using jalr.
nop
addi
$a0, $0, 22 #
jr
$s1 #
THE WRONG WAY TO CALL A PROCEDURE!!
nop
addi
$a0, $0, 0
# Never reached.
times_three:
sll
$v0, $a0, 1 #
Start of times_three.
add
$v0, $v0, $a0
jr
$ra
# Jump using return address.
nop
################################################################################
## Typical CTI Uses
# :PH:
# Branches (bne, beq, bltz,
etc.)
#
if/else
#
Loops
#
# Regular Jump (j)
#
if/else, loops (along with branches)
#
# Jump and Link: (jal,
jalr)
#
Procedure calls.
#
# Jump Register
#
Procedure returns.
#
switch statements.
# :Example:
#
# Typical if/else
statement.
#
# if( a + b > c ) { d = d >> 1; e++; } else
{ d = d << 1; e--; }
# i++;
#
# Reg Usage: a, $8; b, $9; c, $10; d, $11; e, $12; i, $13
add
$16, $8, $9 # $16
= a + b
slt
$17, $10, $16 # $17 = c <
a + b
beq
$17, $0, ELSEPART
nop
sra
$11, $11, 1 # d =
d >> 1
addi
$12, $12, 1 #
e++;
j
LINEX
nop
ELSEPART:
sll
$11, $11, 1 # d =
d << 1
addi
$12, $12, -1 #
e--
LINEX:
addi
$13, $13, 1 #
i++
# :Example:
#
# Typical for loop.
#
# for( i = a - 5; i < b +
c; i = i + d ) x = x ^ ( x >> 1
);
#
Initialization
Test
Increment
Body
# ++k;
# Reg Usage: a, $8; b, $9; c, $10; d, $11; x, $12; i, $13; k, $14;
##
Initialization
addi
$13, $8, -5 # i = a -
5
j
TEST
nop
LOOP:
##
Body
sra
$16, $12, 1 # $16
= x >> 1
xor
$12, $12, $16 # x = x ^
$16
##
Increment
add
$13, $13, $11 # i = i +
d
##
Test
TEST:
add
$16, $9, $10 # $16 = b + c Note: Can be moved out of
loop.
slt
$17, $13, $16 # i < $16
bne
$17, $0, LOOP
nop
addi
$14,$14,1 #
++k;
# optimazation
# if for loop repeats
many times(say 1000) and
# we include inside the
loop body the statements:
##
{ x =
x^(x>>1);
## k = a
+c;
## p =
10;}
# k and
p are updated only once
# because a,c and 10 don't change. So it makes no
sense repeating
# them 1000 times(waste of
time)
# if we do that, optimized compiler will put these
instructions
# outside of the loop. Anyways a programer shouldn't
do it.
# for the for loop if the loop repeats many times(say
1000), the assembly
#code of above has a
problem.
# the problem is the
instruction
# add $16, $9,
$10 # $16 = b + c
# This
instruction repeats 1000 times
which is not needed
#
because b,c are constants so register $16 is updated only
once.
# updating $16 1000 times is a waste of time. The
solution is to take
# this instruction and put it outside of the loop. It
looks like
#
addi $13, $8, -5
# i = a - 5
#
add $16,$9,$10 # $16 = b+c // this is one of optimized
versions
#
# the optimized compiler will do this.
# j
TEST
#
nop
# An even better way is shown
below
#
addi $13, $8, -5
# i = a - 5
#
j TEST
#
add $16,$9,$10 # $16 = b+c
#
// taking advantage of delayed branch(delay
slot)