newlib/libgloss/mips/vr5xxx.S

/*
 * vr5xxx.S -- CPU specific support routines
 *
 * Copyright (c) 1999 Cygnus Solutions
 *
 * The authors hereby grant permission to use, copy, modify, distribute,
 * and license this software and its documentation for any purpose, provided
 * that existing copyright notices are retained in all copies and that this
 * notice is included verbatim in any distributions. No written agreement,
 * license, or royalty fee is required for any of the authorized uses.
 * Modifications to this software may be copyrighted by their authors
 * and need not follow the licensing terms described here, provided that
 * the new terms are clearly indicated on the first page of each file where
 * they apply.
 */

/* This file cloned from vr4300.S by dlindsay@cygnus.com
 * and recoded to suit Vr5432 and Vr5000.
 * Should be no worse for Vr43{00,05,10}.
 * Specifically, __cpu_flush() has been changed (a) to allow for the hardware
 * difference (in set associativity) between the Vr5432 and Vr5000,
 * and (b) to flush the optional secondary cache of the Vr5000.
 */

/* Processor Revision Identifier (PRID) Register: Implementation Numbers */
#define IMPL_VR5432	0x54

/* Cache Constants not determinable dynamically */
#define VR5000_2NDLINE 32	/* secondary cache line size */
#define VR5432_LINE 32		/* I,Dcache line sizes */
#define VR5432_SIZE (16*1024)	/* I,Dcache half-size */


#ifndef __mips64
	.set mips3
#endif
#ifdef __mips16
/* This file contains 32 bit assembly code.  */
	.set nomips16
#endif

#include "regs.S"

	.text
	.align	2

	# Taken from "R4300 Preliminary RISC Processor Specification
	# Revision 2.0 January 1995" page 39: "The Count
	# register... increments at a constant rate... at one-half the
	# PClock speed."
	# We can use this fact to provide small polled delays.
	.globl	__cpu_timer_poll
	.ent	__cpu_timer_poll
__cpu_timer_poll:
	.set	noreorder
	# in:	a0 = (unsigned int) number of PClock ticks to wait for
	# out:	void

	# The Vr4300 counter updates at half PClock, so divide by 2 to
	# get counter delta:
	bnezl	a0, 1f		# continue if delta non-zero
	srl	a0, a0, 1	# divide ticks by 2		{DELAY SLOT}
	# perform a quick return to the caller:
	j	ra
	nop			#				{DELAY SLOT}
1:
	mfc0	v0, C0_COUNT	# get current counter value
	nop
	nop
	# We cannot just do the simple test, of adding our delta onto
	# the current value (ignoring overflow) and then checking for
	# equality. The counter is incrementing every two PClocks,
	# which means the counter value can change between
	# instructions, making it hard to sample at the exact value
	# desired.

	# However, we do know that our entry delta value is less than
	# half the number space (since we divide by 2 on entry). This
	# means we can use a difference in signs to indicate timer
	# overflow.
	addu	a0, v0, a0	# unsigned add (ignore overflow)
	# We know have our end value (which will have been
	# sign-extended to fill the 64bit register value).
2:
	# get current counter value:
	mfc0	v0, C0_COUNT
	nop
	nop
	# This is an unsigned 32bit subtraction:
	subu	v0, a0, v0	# delta = (end - now)		{DELAY SLOT}
	bgtzl	v0, 2b		# looping back is most likely
	nop
	# We have now been delayed (in the foreground) for AT LEAST
	# the required number of counter ticks.
	j	ra		# return to caller
	nop			#				{DELAY SLOT}
	.set	reorder
	.end	__cpu_timer_poll

	# Flush the processor caches to memory:

	.globl	__cpu_flush
	.ent	__cpu_flush
__cpu_flush:
	.set	noreorder
	# NOTE: The Vr4300 and Vr5432 *CANNOT* have any secondary cache.
	# On those, SC (bit 17 of CONFIG register) is hard-wired to 1,
	# except that email from Dennis_Han@el.nec.com says that old
	# versions of the Vr5432 incorrectly hard-wired this bit to 0.
	# The Vr5000 has an optional direct-mapped secondary cache,
	# and the SC bit correctly indicates this.

	# So, for the 4300 and 5432 we want to just
	# flush the primary Data and Instruction caches.
	# For the 5000 it is desired to flush the secondary cache too.
	# There is an operation difference worth noting.
	# The 4300 and 5000 primary caches use VA bit 14 to choose cache set,
	# whereas 5432 primary caches use VA bit 0.

	# This code interprets the relevant Config register bits as
	# much as possible, except for the 5432.
	# The code therefore has some portability.
	# However, the associativity issues mean you should not just assume
	# that this code works anywhere. Also, the secondary cache set
	# size is hardwired, since the 5000 series does not define codes
	# for variant sizes.

	# Note: this version of the code flushes D$ before I$.
	#   It is difficult to construct a case where that matters, 
	#   but it cant hurt.

	mfc0	a0, C0_PRID	# a0 = Processor Revision register
	nop			# dlindsay: unclear why the nops, but
	nop			# vr4300.S had such so I do too.
	srl	a2, a0, PR_IMP	# want bits 8..15
	andi	a2, a2, 0x255	# mask: now a2 = Implementation # field
	li	a1, IMPL_VR5432
	beq	a1, a2, 8f	# use Vr5432-specific flush algorithm
	nop
	
	# Non-Vr5432 version of the code.
	# (The distinctions being: CONFIG is truthful about secondary cache, 
	# and we act as if the primary Icache and Dcache are direct mapped.)

	mfc0	t0, C0_CONFIG	# t0 = CONFIG register
	nop
	nop
	li	a1, 1		# a1=1, a useful constant

	srl	a2, t0, CR_IC	# want IC field of CONFIG
	andi	a2, a2, 0x7	# mask: now a2= code for Icache size
	add	a2, a2, 12	# +12
	sllv	a2, a1, a2	# a2=primary instruction cache size in bytes

	srl	a3, t0, CR_DC	# DC field of CONFIG
	andi	a3, a3, 0x7	# mask: now a3= code for Dcache size
	add	a3, a3, 12	# +12
	sllv	a3, a1, a3	# a3=primary data cache size in bytes

	li	t2, (1 << CR_IB) # t2=mask over IB boolean
	and	t2, t2, t0	# test IB field of CONFIG register value
	beqz	t2, 1f		# 
	li	a1, 16		# 16 bytes (branch shadow: always loaded.)
	li	a1, 32		# non-zero, then 32bytes
1:

	li	t2, (1 << CR_DB) # t2=mask over DB boolean
	and	t2, t2, t0	# test BD field of CONFIG register value
	beqz	t2, 2f		# 
	li	a0, 16		# 16bytes (branch shadow: always loaded.)
	li	a0, 32		# non-zero, then 32bytes
2:
	lui	t1, ((K0BASE >> 16) & 0xFFFF)
	ori	t1, t1, (K0BASE & 0xFFFF)

	# At this point,
	# a0 = primary Dcache line size in bytes
	# a1 = primary Icache line size in bytes
	# a2 = primary Icache size in bytes
	# a3 = primary Dcache size in bytes
	# t0 = CONFIG value
	# t1 = a round unmapped cached base address (we are in kernel mode)
	# t2,t3 scratch

	addi	t3, t1, 0	# t3=t1=start address for any cache
	add	t2, t3, a3	# t2=end adress+1 of Dcache
	sub	t2, t2, a0	# t2=address of last line in Dcache
3:
	cache	INDEX_WRITEBACK_INVALIDATE_D,0(t3)
	bne	t3, t2, 3b	# 
	addu	t3, a0		# (delay slot) increment by Dcache line size


	# Now check CONFIG to see if there is a secondary cache
	lui	t2, (1 << (CR_SC-16)) # t2=mask over SC boolean
	and	t2, t2, t0	# test SC in CONFIG
	bnez	t2, 6f
	
	# There is a secondary cache. Find out its sizes.
	
	srl	t3, t0, CR_SS	# want SS field of CONFIG
	andi	t3, t3, 0x3	# mask: now t3= code for cache size.
	beqz	t3, 4f
	lui	a3, ((512*1024)>>16)	# a3= 512K, code was 0
	addu	t3, -1			# decrement code
	beqz	t3, 4f
	lui	a3, ((1024*1024)>>16)	# a3= 1 M, code  1
	addu	t3, -1			# decrement code
	beqz	t3, 4f
	lui	a3, ((2*1024*1024)>>16)	# a3= 2 M, code 2
	j	6f			# no secondary cache, code 3

4:	# a3 = secondary cache size in bytes
	li	a0, VR5000_2NDLINE	# no codes assigned for other than 32

	# At this point,
	# a0 = secondary cache line size in bytes
	# a1 = primary Icache line size in bytes
	# a2 = primary Icache size in bytes
	# a3 = secondary cache size in bytes
	# t1 = a round unmapped cached base address (we are in kernel mode)
	# t2,t3 scratch
	
	addi	t3, t1, 0	# t3=t1=start address for any cache
	add	t2, t3, a3	# t2=end address+1 of secondary cache
	sub	t2, t2, a0	# t2=address of last line in secondary cache
5:
	cache	INDEX_WRITEBACK_INVALIDATE_SD,0(t3)
	bne	t3, t2, 5b
	addu	t3, a0		# (delay slot) increment by line size

	
6:	# Any optional secondary cache done.  Now do I-cache and return.

	# At this point,
	# a1 = primary Icache line size in bytes
	# a2 = primary Icache size in bytes
	# t1 = a round unmapped cached base address (we are in kernel mode)
	# t2,t3 scratch

	add	t2, t1, a2	# t2=end adress+1 of Icache
	sub	t2, t2, a1	# t2=address of last line in Icache
7:
	cache	INDEX_INVALIDATE_I,0(t1)
	bne	t1, t2, 7b
	addu	t1, a1		# (delay slot) increment by Icache line size

	j	ra	# return to the caller
	nop

8:

# Vr5432 version of the cpu_flush code.
# (The distinctions being: CONFIG can not be trusted about secondary
# cache (which does not exist). The primary caches use Virtual Address Bit 0
# to control set selection.

# Code does not consult CONFIG about cache sizes: knows the hardwired sizes.
# Since both I and D have the same size and line size, uses a merged loop.

	li	a0, VR5432_LINE
	li	a1, VR5432_SIZE
	lui	t1, ((K0BASE >> 16) & 0xFFFF)
	ori	t1, t1, (K0BASE & 0xFFFF)

	# a0 = cache line size in bytes
	# a1 = 1/2 cache size in bytes
	# t1 = a round unmapped cached base address (we are in kernel mode)

	add	t2, t1,	a1	# t2=end address+1
	sub	t2, t2, a0	# t2=address of last line in Icache

9:
	cache	INDEX_WRITEBACK_INVALIDATE_D,0(t1)	# set 0
	cache	INDEX_WRITEBACK_INVALIDATE_D,1(t1)	# set 1
	cache	INDEX_INVALIDATE_I,0(t1)	# set 0
	cache	INDEX_INVALIDATE_I,1(t1)	# set 1
	bne	t1, t2, 9b
	addu	t1, a0

	j	ra	# return to the caller
	nop
	.set	reorder
	.end	__cpu_flush

	# NOTE: This variable should *NOT* be addressed relative to
	# the $gp register since this code is executed before $gp is
	# initialised... hence we leave it in the text area. This will
	# cause problems if this routine is ever ROMmed:

	.globl	__buserr_cnt
__buserr_cnt:
	.word	0
	.align	3
__k1_save:
	.word	0
	.word	0
	.align	2

        .ent __buserr
        .globl __buserr
__buserr:
        .set noat
	.set noreorder
	# k0 and k1 available for use:
	mfc0	k0,C0_CAUSE
	nop
	nop
	andi	k0,k0,0x7c
	sub	k0,k0,7 << 2
	beq	k0,$0,__buserr_do
	nop
	# call the previous handler
	la	k0,__previous
	jr	k0
	nop
	#
__buserr_do:
	# TODO: check that the cause is indeed a bus error
	# - if not then just jump to the previous handler
	la	k0,__k1_save
	sd	k1,0(k0)
	#
        la      k1,__buserr_cnt
        lw      k0,0(k1)        # increment counter
        addu    k0,1
        sw      k0,0(k1)
	#
	la	k0,__k1_save
	ld	k1,0(k0)
	#
        mfc0    k0,C0_EPC
	nop
	nop
        addu    k0,k0,4		# skip offending instruction
	mtc0	k0,C0_EPC	# update EPC
	nop
	nop
	eret
#        j       k0
#        rfe
        .set reorder
        .set at
        .end __buserr

__exception_code:
	.set noreorder
	lui	k0,%hi(__buserr)
	daddiu	k0,k0,%lo(__buserr)
	jr	k0
	nop
	.set reorder
__exception_code_end:

	.data
__previous:
	.space	(__exception_code_end - __exception_code)
	# This subtracting two addresses is working
	# but is not garenteed to continue working.
	# The assemble reserves the right to put these
	# two labels into different frags, and then
	# cant take their difference.

	.text

	.ent	__default_buserr_handler
	.globl	__default_buserr_handler
__default_buserr_handler:
        .set noreorder
	# attach our simple bus error handler:
	# in:  void
	# out: void
	mfc0	a0,C0_SR
	nop
	li	a1,SR_BEV
	and	a1,a1,a0
	beq	a1,$0,baseaddr
	lui	a0,0x8000	# delay slot
	lui	a0,0xbfc0
	daddiu	a0,a0,0x0200
baseaddr:
	daddiu	a0,a0,0x0180
	# a0 = base vector table address
	la	a1,__exception_code_end
	la	a2,__exception_code
	subu	a1,a1,a2
	la	a3,__previous
	# there must be a better way of doing this????
copyloop:
	lw	v0,0(a0)
	sw	v0,0(a3)
	lw	v0,0(a2)
	sw	v0,0(a0)
	daddiu	a0,a0,4
	daddiu	a2,a2,4
	daddiu	a3,a3,4
	subu	a1,a1,4
	bne	a1,$0,copyloop
	nop
        la      a0,__buserr_cnt
	sw	$0,0(a0)
	j	ra
	nop
        .set reorder
	.end	__default_buserr_handler

	.ent	__restore_buserr_handler
	.globl	__restore_buserr_handler
__restore_buserr_handler:
        .set noreorder
	# restore original (monitor) bus error handler
	# in:  void
	# out: void
	mfc0	a0,C0_SR
	nop
	li	a1,SR_BEV
	and	a1,a1,a0
	beq	a1,$0,res_baseaddr
	lui	a0,0x8000	# delay slot
	lui	a0,0xbfc0
	daddiu	a0,a0,0x0200
res_baseaddr:
	daddiu	a0,a0,0x0180
	# a0 = base vector table address
	la	a1,__exception_code_end
	la	a3,__exception_code
	subu	a1,a1,a3
	la	a3,__previous
	# there must be a better way of doing this????
res_copyloop:
	lw	v0,0(a3)
	sw	v0,0(a0)
	daddiu	a0,a0,4
	daddiu	a3,a3,4
	subu	a1,a1,4
	bne	a1,$0,res_copyloop
	nop
	j	ra
	nop
        .set reorder
	.end	__restore_buserr_handler

	.ent	__buserr_count
	.globl	__buserr_count
__buserr_count:
        .set noreorder
	# restore original (monitor) bus error handler
	# in:  void
	# out: unsigned int __buserr_cnt
        la      v0,__buserr_cnt
	lw	v0,0(v0)
	j	ra
	nop
        .set reorder
	.end	__buserr_count

/* EOF vr5xxx.S */
20000317 sourceware import 2000-03-17 23:48:54 +01:00			`/*`
			`* vr5xxx.S -- CPU specific support routines`
			`*`
			`* Copyright (c) 1999 Cygnus Solutions`
			`*`
			`* The authors hereby grant permission to use, copy, modify, distribute,`
			`* and license this software and its documentation for any purpose, provided`
			`* that existing copyright notices are retained in all copies and that this`
			`* notice is included verbatim in any distributions. No written agreement,`
			`* license, or royalty fee is required for any of the authorized uses.`
			`* Modifications to this software may be copyrighted by their authors`
			`* and need not follow the licensing terms described here, provided that`
			`* the new terms are clearly indicated on the first page of each file where`
			`* they apply.`
			`*/`

			`/* This file cloned from vr4300.S by dlindsay@cygnus.com`
			`* and recoded to suit Vr5432 and Vr5000.`
			`* Should be no worse for Vr43{00,05,10}.`
			`* Specifically, __cpu_flush() has been changed (a) to allow for the hardware`
			`* difference (in set associativity) between the Vr5432 and Vr5000,`
			`* and (b) to flush the optional secondary cache of the Vr5000.`
			`*/`

			`/* Processor Revision Identifier (PRID) Register: Implementation Numbers */`
			`#define IMPL_VR5432 0x54`

			`/* Cache Constants not determinable dynamically */`
			`#define VR5000_2NDLINE 32 /* secondary cache line size */`
			`#define VR5432_LINE 32 /* I,Dcache line sizes */`
			`#define VR5432_SIZE (161024) / I,Dcache half-size */`


			`#ifndef __mips64`
			`.set mips3`
			`#endif`
			`#ifdef __mips16`
			`/* This file contains 32 bit assembly code. */`
			`.set nomips16`
			`#endif`

			`#include "regs.S"`

			`.text`
			`.align 2`

			`# Taken from "R4300 Preliminary RISC Processor Specification`
			`# Revision 2.0 January 1995" page 39: "The Count`
			`# register... increments at a constant rate... at one-half the`
			`# PClock speed."`
			`# We can use this fact to provide small polled delays.`
			`.globl __cpu_timer_poll`
			`.ent __cpu_timer_poll`
			`__cpu_timer_poll:`
			`.set noreorder`
			`# in: a0 = (unsigned int) number of PClock ticks to wait for`
			`# out: void`

			`# The Vr4300 counter updates at half PClock, so divide by 2 to`
			`# get counter delta:`
			`bnezl a0, 1f # continue if delta non-zero`
			`srl a0, a0, 1 # divide ticks by 2 {DELAY SLOT}`
			`# perform a quick return to the caller:`
			`j ra`
			`nop # {DELAY SLOT}`
			`1:`
2004-04-02 Chris Demetriou <cgd@broadcom.com> * mips/regs.S (C0_COUNT): Fix comment that kept this from being defined. * mips/vr4300.S: Use C0_COUNT as appropriate instead of hardcoding $9. * mips/vr5xxx.S: Likewise. 2004-04-03 03:02:51 +02:00			`mfc0 v0, C0_COUNT # get current counter value`
20000317 sourceware import 2000-03-17 23:48:54 +01:00			`nop`
			`nop`
			`# We cannot just do the simple test, of adding our delta onto`
			`# the current value (ignoring overflow) and then checking for`
			`# equality. The counter is incrementing every two PClocks,`
			`# which means the counter value can change between`
			`# instructions, making it hard to sample at the exact value`
			`# desired.`

			`# However, we do know that our entry delta value is less than`
			`# half the number space (since we divide by 2 on entry). This`
			`# means we can use a difference in signs to indicate timer`
			`# overflow.`
			`addu a0, v0, a0 # unsigned add (ignore overflow)`
			`# We know have our end value (which will have been`
			`# sign-extended to fill the 64bit register value).`
			`2:`
			`# get current counter value:`
2004-04-02 Chris Demetriou <cgd@broadcom.com> * mips/regs.S (C0_COUNT): Fix comment that kept this from being defined. * mips/vr4300.S: Use C0_COUNT as appropriate instead of hardcoding $9. * mips/vr5xxx.S: Likewise. 2004-04-03 03:02:51 +02:00			`mfc0 v0, C0_COUNT`
20000317 sourceware import 2000-03-17 23:48:54 +01:00			`nop`
			`nop`
			`# This is an unsigned 32bit subtraction:`
			`subu v0, a0, v0 # delta = (end - now) {DELAY SLOT}`
			`bgtzl v0, 2b # looping back is most likely`
			`nop`
			`# We have now been delayed (in the foreground) for AT LEAST`
			`# the required number of counter ticks.`
			`j ra # return to caller`
			`nop # {DELAY SLOT}`
			`.set reorder`
			`.end __cpu_timer_poll`

			`# Flush the processor caches to memory:`

			`.globl __cpu_flush`
			`.ent __cpu_flush`
			`__cpu_flush:`
			`.set noreorder`
			`# NOTE: The Vr4300 and Vr5432 CANNOT have any secondary cache.`
			`# On those, SC (bit 17 of CONFIG register) is hard-wired to 1,`
			`# except that email from Dennis_Han@el.nec.com says that old`
			`# versions of the Vr5432 incorrectly hard-wired this bit to 0.`
			`# The Vr5000 has an optional direct-mapped secondary cache,`
			`# and the SC bit correctly indicates this.`

			`# So, for the 4300 and 5432 we want to just`
			`# flush the primary Data and Instruction caches.`
			`# For the 5000 it is desired to flush the secondary cache too.`
			`# There is an operation difference worth noting.`
			`# The 4300 and 5000 primary caches use VA bit 14 to choose cache set,`
			`# whereas 5432 primary caches use VA bit 0.`

			`# This code interprets the relevant Config register bits as`
			`# much as possible, except for the 5432.`
			`# The code therefore has some portability.`
			`# However, the associativity issues mean you should not just assume`
			`# that this code works anywhere. Also, the secondary cache set`
			`# size is hardwired, since the 5000 series does not define codes`
			`# for variant sizes.`

			`# Note: this version of the code flushes D$ before I$.`
			`# It is difficult to construct a case where that matters,`
			`# but it cant hurt.`

			`mfc0 a0, C0_PRID # a0 = Processor Revision register`
			`nop # dlindsay: unclear why the nops, but`
			`nop # vr4300.S had such so I do too.`
			`srl a2, a0, PR_IMP # want bits 8..15`
			`andi a2, a2, 0x255 # mask: now a2 = Implementation # field`
			`li a1, IMPL_VR5432`
			`beq a1, a2, 8f # use Vr5432-specific flush algorithm`
			`nop`

			`# Non-Vr5432 version of the code.`
			`# (The distinctions being: CONFIG is truthful about secondary cache,`
			`# and we act as if the primary Icache and Dcache are direct mapped.)`

			`mfc0 t0, C0_CONFIG # t0 = CONFIG register`
			`nop`
			`nop`
			`li a1, 1 # a1=1, a useful constant`

			`srl a2, t0, CR_IC # want IC field of CONFIG`
			`andi a2, a2, 0x7 # mask: now a2= code for Icache size`
			`add a2, a2, 12 # +12`
			`sllv a2, a1, a2 # a2=primary instruction cache size in bytes`

			`srl a3, t0, CR_DC # DC field of CONFIG`
			`andi a3, a3, 0x7 # mask: now a3= code for Dcache size`
			`add a3, a3, 12 # +12`
			`sllv a3, a1, a3 # a3=primary data cache size in bytes`

			`li t2, (1 << CR_IB) # t2=mask over IB boolean`
			`and t2, t2, t0 # test IB field of CONFIG register value`
			`beqz t2, 1f #`
			`li a1, 16 # 16 bytes (branch shadow: always loaded.)`
			`li a1, 32 # non-zero, then 32bytes`
			`1:`

			`li t2, (1 << CR_DB) # t2=mask over DB boolean`
			`and t2, t2, t0 # test BD field of CONFIG register value`
			`beqz t2, 2f #`
			`li a0, 16 # 16bytes (branch shadow: always loaded.)`
			`li a0, 32 # non-zero, then 32bytes`
			`2:`
			`lui t1, ((K0BASE >> 16) & 0xFFFF)`
			`ori t1, t1, (K0BASE & 0xFFFF)`

			`# At this point,`
			`# a0 = primary Dcache line size in bytes`
			`# a1 = primary Icache line size in bytes`
			`# a2 = primary Icache size in bytes`
			`# a3 = primary Dcache size in bytes`
			`# t0 = CONFIG value`
			`# t1 = a round unmapped cached base address (we are in kernel mode)`
			`# t2,t3 scratch`

			`addi t3, t1, 0 # t3=t1=start address for any cache`
			`add t2, t3, a3 # t2=end adress+1 of Dcache`
			`sub t2, t2, a0 # t2=address of last line in Dcache`
			`3:`
			`cache INDEX_WRITEBACK_INVALIDATE_D,0(t3)`
			`bne t3, t2, 3b #`
			`addu t3, a0 # (delay slot) increment by Dcache line size`


			`# Now check CONFIG to see if there is a secondary cache`
			`lui t2, (1 << (CR_SC-16)) # t2=mask over SC boolean`
			`and t2, t2, t0 # test SC in CONFIG`
			`bnez t2, 6f`

			`# There is a secondary cache. Find out its sizes.`

			`srl t3, t0, CR_SS # want SS field of CONFIG`
			`andi t3, t3, 0x3 # mask: now t3= code for cache size.`
			`beqz t3, 4f`
			`lui a3, ((512*1024)>>16) # a3= 512K, code was 0`
			`addu t3, -1 # decrement code`
			`beqz t3, 4f`
			`lui a3, ((1024*1024)>>16) # a3= 1 M, code 1`
			`addu t3, -1 # decrement code`
			`beqz t3, 4f`
			`lui a3, ((210241024)>>16) # a3= 2 M, code 2`
			`j 6f # no secondary cache, code 3`

			`4: # a3 = secondary cache size in bytes`
			`li a0, VR5000_2NDLINE # no codes assigned for other than 32`

			`# At this point,`
			`# a0 = secondary cache line size in bytes`
			`# a1 = primary Icache line size in bytes`
			`# a2 = primary Icache size in bytes`
			`# a3 = secondary cache size in bytes`
			`# t1 = a round unmapped cached base address (we are in kernel mode)`
			`# t2,t3 scratch`

			`addi t3, t1, 0 # t3=t1=start address for any cache`
			`add t2, t3, a3 # t2=end address+1 of secondary cache`
			`sub t2, t2, a0 # t2=address of last line in secondary cache`
			`5:`
			`cache INDEX_WRITEBACK_INVALIDATE_SD,0(t3)`
			`bne t3, t2, 5b`
			`addu t3, a0 # (delay slot) increment by line size`


			`6: # Any optional secondary cache done. Now do I-cache and return.`

			`# At this point,`
			`# a1 = primary Icache line size in bytes`
			`# a2 = primary Icache size in bytes`
			`# t1 = a round unmapped cached base address (we are in kernel mode)`
			`# t2,t3 scratch`

			`add t2, t1, a2 # t2=end adress+1 of Icache`
			`sub t2, t2, a1 # t2=address of last line in Icache`
			`7:`
			`cache INDEX_INVALIDATE_I,0(t1)`
			`bne t1, t2, 7b`
			`addu t1, a1 # (delay slot) increment by Icache line size`

			`j ra # return to the caller`
			`nop`

			`8:`

			`# Vr5432 version of the cpu_flush code.`
			`# (The distinctions being: CONFIG can not be trusted about secondary`
			`# cache (which does not exist). The primary caches use Virtual Address Bit 0`
			`# to control set selection.`

			`# Code does not consult CONFIG about cache sizes: knows the hardwired sizes.`
			`# Since both I and D have the same size and line size, uses a merged loop.`

			`li a0, VR5432_LINE`
			`li a1, VR5432_SIZE`
			`lui t1, ((K0BASE >> 16) & 0xFFFF)`
			`ori t1, t1, (K0BASE & 0xFFFF)`

			`# a0 = cache line size in bytes`
			`# a1 = 1/2 cache size in bytes`
			`# t1 = a round unmapped cached base address (we are in kernel mode)`

			`add t2, t1, a1 # t2=end address+1`
			`sub t2, t2, a0 # t2=address of last line in Icache`

			`9:`
			`cache INDEX_WRITEBACK_INVALIDATE_D,0(t1) # set 0`
			`cache INDEX_WRITEBACK_INVALIDATE_D,1(t1) # set 1`
			`cache INDEX_INVALIDATE_I,0(t1) # set 0`
			`cache INDEX_INVALIDATE_I,1(t1) # set 1`
			`bne t1, t2, 9b`
			`addu t1, a0`

			`j ra # return to the caller`
			`nop`
			`.set reorder`
			`.end __cpu_flush`

			`# NOTE: This variable should NOT be addressed relative to`
			`# the $gp register since this code is executed before $gp is`
			`# initialised... hence we leave it in the text area. This will`
			`# cause problems if this routine is ever ROMmed:`

			`.globl __buserr_cnt`
			`__buserr_cnt:`
			`.word 0`
			`.align 3`
			`__k1_save:`
			`.word 0`
			`.word 0`
			`.align 2`

			`.ent __buserr`
			`.globl __buserr`
			`__buserr:`
			`.set noat`
			`.set noreorder`
			`# k0 and k1 available for use:`
			`mfc0 k0,C0_CAUSE`
			`nop`
			`nop`
			`andi k0,k0,0x7c`
			`sub k0,k0,7 << 2`
			`beq k0,$0,__buserr_do`
			`nop`
			`# call the previous handler`
			`la k0,__previous`
			`jr k0`
			`nop`
			`#`
			`__buserr_do:`
			`# TODO: check that the cause is indeed a bus error`
			`# - if not then just jump to the previous handler`
			`la k0,__k1_save`
			`sd k1,0(k0)`
			`#`
			`la k1,__buserr_cnt`
			`lw k0,0(k1) # increment counter`
			`addu k0,1`
			`sw k0,0(k1)`
			`#`
			`la k0,__k1_save`
			`ld k1,0(k0)`
			`#`
			`mfc0 k0,C0_EPC`
			`nop`
			`nop`
			`addu k0,k0,4 # skip offending instruction`
			`mtc0 k0,C0_EPC # update EPC`
			`nop`
			`nop`
			`eret`
			`# j k0`
			`# rfe`
			`.set reorder`
			`.set at`
			`.end __buserr`

			`__exception_code:`
			`.set noreorder`
			`lui k0,%hi(__buserr)`
			`daddiu k0,k0,%lo(__buserr)`
			`jr k0`
			`nop`
			`.set reorder`
			`__exception_code_end:`

			`.data`
			`__previous:`
			`.space (__exception_code_end - __exception_code)`
			`# This subtracting two addresses is working`
			`# but is not garenteed to continue working.`
			`# The assemble reserves the right to put these`
			`# two labels into different frags, and then`
			`# cant take their difference.`

			`.text`

			`.ent __default_buserr_handler`
			`.globl __default_buserr_handler`
			`__default_buserr_handler:`
			`.set noreorder`
			`# attach our simple bus error handler:`
			`# in: void`
			`# out: void`
			`mfc0 a0,C0_SR`
			`nop`
			`li a1,SR_BEV`
			`and a1,a1,a0`
			`beq a1,$0,baseaddr`
			`lui a0,0x8000 # delay slot`
			`lui a0,0xbfc0`
			`daddiu a0,a0,0x0200`
			`baseaddr:`
			`daddiu a0,a0,0x0180`
			`# a0 = base vector table address`
			`la a1,__exception_code_end`
			`la a2,__exception_code`
			`subu a1,a1,a2`
			`la a3,__previous`
			`# there must be a better way of doing this????`
			`copyloop:`
			`lw v0,0(a0)`
			`sw v0,0(a3)`
			`lw v0,0(a2)`
			`sw v0,0(a0)`
			`daddiu a0,a0,4`
			`daddiu a2,a2,4`
			`daddiu a3,a3,4`
			`subu a1,a1,4`
			`bne a1,$0,copyloop`
			`nop`
			`la a0,__buserr_cnt`
			`sw $0,0(a0)`
			`j ra`
			`nop`
			`.set reorder`
			`.end __default_buserr_handler`

			`.ent __restore_buserr_handler`
			`.globl __restore_buserr_handler`
			`__restore_buserr_handler:`
			`.set noreorder`
			`# restore original (monitor) bus error handler`
			`# in: void`
			`# out: void`
			`mfc0 a0,C0_SR`
			`nop`
			`li a1,SR_BEV`
			`and a1,a1,a0`
			`beq a1,$0,res_baseaddr`
			`lui a0,0x8000 # delay slot`
			`lui a0,0xbfc0`
			`daddiu a0,a0,0x0200`
			`res_baseaddr:`
			`daddiu a0,a0,0x0180`
			`# a0 = base vector table address`
			`la a1,__exception_code_end`
			`la a3,__exception_code`
			`subu a1,a1,a3`
			`la a3,__previous`
			`# there must be a better way of doing this????`
			`res_copyloop:`
			`lw v0,0(a3)`
			`sw v0,0(a0)`
			`daddiu a0,a0,4`
			`daddiu a3,a3,4`
			`subu a1,a1,4`
			`bne a1,$0,res_copyloop`
			`nop`
			`j ra`
			`nop`
			`.set reorder`
			`.end __restore_buserr_handler`

			`.ent __buserr_count`
			`.globl __buserr_count`
			`__buserr_count:`
			`.set noreorder`
			`# restore original (monitor) bus error handler`
			`# in: void`
			`# out: unsigned int __buserr_cnt`
			`la v0,__buserr_cnt`
			`lw v0,0(v0)`
			`j ra`
			`nop`
			`.set reorder`
			`.end __buserr_count`

			`/* EOF vr5xxx.S */`