[PATCH v1 01/10] mips: octeon: Initial minimal support for the Marvell Octeon SoC

Daniel Schwierzeck daniel.schwierzeck at gmail.com
Wed May 13 14:49:33 CEST 2020


sorry for the delay ;)

Am 02.05.20 um 10:59 schrieb Stefan Roese:
> From: Aaron Williams <awilliams at marvell.com>
> 
> This patch adds very basic support for the Octeon III SoCs. Only
> CFI parallel NOR flash and UART is supported for now.
> 
> Please note that the basic Octeon port does not include the DDR3/4
> initialization yet. This will be added in some follow-up patches
> later. To still use U-Boot on with this port, the L2 cache (4MiB on
> Octeon III CN73xx) is used as RAM. This way, U-Boot can boot to the
> prompt on such boards.

this patch should come after the common MIPS patches

> 
> Signed-off-by: Aaron Williams <awilliams at marvell.com>
> Signed-off-by: Stefan Roese <sr at denx.de>
> ---
> 
>  MAINTAINERS                                  |    6 +
>  arch/Kconfig                                 |    1 +
>  arch/mips/Kconfig                            |   49 +-
>  arch/mips/Makefile                           |    7 +
>  arch/mips/cpu/Makefile                       |    4 +-
>  arch/mips/include/asm/arch-octeon/cavm-reg.h |   42 +
>  arch/mips/include/asm/arch-octeon/clock.h    |   24 +
>  arch/mips/mach-octeon/Kconfig                |   92 ++
>  arch/mips/mach-octeon/Makefile               |   10 +
>  arch/mips/mach-octeon/clock.c                |   22 +
>  arch/mips/mach-octeon/cpu.c                  |   55 +
>  arch/mips/mach-octeon/dram.c                 |   27 +
>  arch/mips/mach-octeon/include/ioremap.h      |   30 +
>  arch/mips/mach-octeon/start.S                | 1241 ++++++++++++++++++
>  14 files changed, 1608 insertions(+), 2 deletions(-)
>  create mode 100644 arch/mips/include/asm/arch-octeon/cavm-reg.h
>  create mode 100644 arch/mips/include/asm/arch-octeon/clock.h
>  create mode 100644 arch/mips/mach-octeon/Kconfig
>  create mode 100644 arch/mips/mach-octeon/Makefile
>  create mode 100644 arch/mips/mach-octeon/clock.c
>  create mode 100644 arch/mips/mach-octeon/cpu.c
>  create mode 100644 arch/mips/mach-octeon/dram.c
>  create mode 100644 arch/mips/mach-octeon/include/ioremap.h
>  create mode 100644 arch/mips/mach-octeon/start.S
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 66f0b07263..29f2d7328c 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -749,6 +749,12 @@ M:	Ezequiel Garcia <ezequiel at collabora.com>
>  S:	Maintained
>  F:	arch/mips/mach-jz47xx/
>  
> +MIPS Octeon
> +M:	Aaron Williams <awilliams at marvell.com>
> +S:	Maintained
> +F:	arch/mips/mach-octeon/
> +F:	arch/mips/include/asm/arch-octeon/
> +
>  MMC
>  M:	Peng Fan <peng.fan at nxp.com>
>  S:	Maintained
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 91e049b322..1cd3e1dc0b 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -37,6 +37,7 @@ config MICROBLAZE
>  
>  config MIPS
>  	bool "MIPS architecture"
> +	select CREATE_ARCH_SYMLINK

you should not need that. The path arch/mips/mach-octeon/include/ will
be automatically added to the include search paths. Thus move all files
in arch/mips/include/asm/arch-octeon/ to arch/mips/mach-octeon/include/

>  	select HAVE_ARCH_IOREMAP
>  	select HAVE_PRIVATE_LIBGCC
>  	select SUPPORT_OF_CONTROL
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index 48e754cc46..3c7f3eb94f 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -106,6 +106,24 @@ config ARCH_JZ47XX
>  	select OF_CONTROL
>  	select DM
>  
> +config ARCH_OCTEON
> +	bool "Support Marvell Octeon CN7xxx platforms"
> +	select DISPLAY_CPUINFO
> +	select DMA_ADDR_T_64BIT
> +	select DM
> +	select DM_SERIAL
> +	select MIPS_CACHE_COHERENT
> +	select MIPS_INIT_STACK_IN_SRAM
> +	select MIPS_L2_CACHE
> +	select MIPS_TUNE_OCTEON3
> +	select ROM_EXCEPTION_VECTORS
> +	select SUPPORTS_BIG_ENDIAN
> +	select SUPPORTS_CPU_MIPS64_OCTEON
> +	select PHYS_64BIT
> +	select OF_CONTROL
> +	select OF_LIVE
> +	imply CMD_DM
> +
>  config MACH_PIC32
>  	bool "Support Microchip PIC32"
>  	select DM
> @@ -160,6 +178,7 @@ source "arch/mips/mach-bmips/Kconfig"
>  source "arch/mips/mach-jz47xx/Kconfig"
>  source "arch/mips/mach-pic32/Kconfig"
>  source "arch/mips/mach-mtmips/Kconfig"
> +source "arch/mips/mach-octeon/Kconfig"
>  
>  if MIPS
>  
> @@ -233,6 +252,14 @@ config CPU_MIPS64_R6
>  	  Choose this option to build a kernel for release 6 or later of the
>  	  MIPS64 architecture.
>  
> +config CPU_MIPS64_OCTEON
> +	bool "Marvell Octeon series of CPUs"
> +	depends on SUPPORTS_CPU_MIPS64_OCTEON
> +	select 64BIT
> +	help
> +	 Choose this option for Marvell Octeon CPUs.  These CPUs are between
> +	 MIPS64 R5 and R6 with other extensions.
> +
>  endchoice
>  
>  menu "General setup"
> @@ -261,7 +288,7 @@ config MIPS_CM_BASE
>  config MIPS_CACHE_INDEX_BASE
>  	hex "Index base address for cache initialisation"
>  	default 0x80000000 if CPU_MIPS32
> -	default 0xffffffff80000000 if CPU_MIPS64
> +	default 0xFFFFFFFFC0000000 if ARCH_OCTEON
>  	help
>  	  This is the base address for a memory block, which is used for
>  	  initialising the cache lines. This is also the base address of a memory
> @@ -342,6 +369,14 @@ config SPL_LOADER_SUPPORT
>  	help
>  	  Enable this option if you want to use SPL loaders without DM enabled.
>  
> +config MIPS_CACHE_COHERENT
> +	bool "Set if MIPS processor is cache coherent"
> +	help
> +	 Enable this if the MIPS architecture is cache coherent like the
> +	 Marvell Octeon series of SoCs.  When this is set, cache flushes
> +	 and invalidates only flush the write buffer since the hardware
> +	 maintains cache coherency.
> +
>  endmenu
>  
>  menu "OS boot interface"
> @@ -398,6 +433,9 @@ config SUPPORTS_CPU_MIPS64_R2
>  config SUPPORTS_CPU_MIPS64_R6
>  	bool
>  
> +config SUPPORTS_CPU_MIPS64_OCTEON
> +	bool
> +
>  config CPU_MIPS32
>  	bool
>  	default y if CPU_MIPS32_R1 || CPU_MIPS32_R2 || CPU_MIPS32_R6
> @@ -405,6 +443,7 @@ config CPU_MIPS32
>  config CPU_MIPS64
>  	bool
>  	default y if CPU_MIPS64_R1 || CPU_MIPS64_R2 || CPU_MIPS64_R6
> +	default y if CPU_MIPS64_OCTEON
>  
>  config MIPS_TUNE_4KC
>  	bool
> @@ -421,6 +460,9 @@ config MIPS_TUNE_34KC
>  config MIPS_TUNE_74KC
>  	bool
>  
> +config MIPS_TUNE_OCTEON3
> +	bool
> +
>  config 32BIT
>  	bool
>  
> @@ -453,6 +495,11 @@ config MIPS_SRAM_INIT
>  	  before it can be used. If enabled, a function mips_sram_init() will
>  	  be called just before setup_stack_gd.
>  
> +config DMA_ADDR_T_64BIT
> +	bool
> +	help
> +	 Select this to enable 64-bit DMA addressing
> +
>  config SYS_DCACHE_SIZE
>  	int
>  	default 0
> diff --git a/arch/mips/Makefile b/arch/mips/Makefile
> index af3f227436..fa1ba7855a 100644
> --- a/arch/mips/Makefile
> +++ b/arch/mips/Makefile
> @@ -1,6 +1,10 @@
>  # SPDX-License-Identifier: GPL-2.0+
>  
> +ifneq ($(CONFIG_ARCH_OCTEON),y)
>  head-y := arch/mips/cpu/start.o
> +else
> +head-y := arch/mips/mach-octeon/start.o
> +endif
>  
>  ifeq ($(CONFIG_SPL_BUILD),y)
>  ifneq ($(CONFIG_SPL_START_S_PATH),)
> @@ -17,6 +21,7 @@ machine-$(CONFIG_ARCH_JZ47XX) += jz47xx
>  machine-$(CONFIG_MACH_PIC32) += pic32
>  machine-$(CONFIG_ARCH_MTMIPS) += mtmips
>  machine-$(CONFIG_ARCH_MSCC) += mscc
> +machine-${CONFIG_ARCH_OCTEON} += octeon
>  
>  machdirs := $(patsubst %,arch/mips/mach-%/,$(machine-y))
>  libs-y += $(machdirs)
> @@ -30,6 +35,7 @@ arch-$(CONFIG_CPU_MIPS32_R6) += -march=mips32r6 -Wa,-mips32r6
>  arch-$(CONFIG_CPU_MIPS64_R1) += -march=mips64 -Wa,-mips64
>  arch-$(CONFIG_CPU_MIPS64_R2) += -march=mips64r2 -Wa,-mips64r2
>  arch-$(CONFIG_CPU_MIPS64_R6) += -march=mips64r6 -Wa,-mips64r6
> +arch-${CONFIG_CPU_MIPS64_OCTEON} += -march=octeon3
>  
>  # Allow extra optimization for specific CPUs/SoCs
>  tune-$(CONFIG_MIPS_TUNE_4KC) += -mtune=4kc
> @@ -37,6 +43,7 @@ tune-$(CONFIG_MIPS_TUNE_14KC) += -mtune=14kc
>  tune-$(CONFIG_MIPS_TUNE_24KC) += -mtune=24kc
>  tune-$(CONFIG_MIPS_TUNE_34KC) += -mtune=34kc
>  tune-$(CONFIG_MIPS_TUNE_74KC) += -mtune=74kc
> +tune-${CONFIG_MIPS_TUNE_OCTEON3} += -mtune=octeon3
>  
>  # Include default header files
>  cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic
> diff --git a/arch/mips/cpu/Makefile b/arch/mips/cpu/Makefile
> index 6df7bb4e48..732015d6f3 100644
> --- a/arch/mips/cpu/Makefile
> +++ b/arch/mips/cpu/Makefile
> @@ -1,6 +1,8 @@
>  # SPDX-License-Identifier: GPL-2.0+
>  
> -extra-y	= start.o
> +ifneq ($(CONFIG_ARCH_OCTEON),y)
> +extra-y = start.o
> +endif
>  
>  obj-y += time.o
>  obj-y += interrupts.o
> diff --git a/arch/mips/include/asm/arch-octeon/cavm-reg.h b/arch/mips/include/asm/arch-octeon/cavm-reg.h
> new file mode 100644
> index 0000000000..b961e54956
> --- /dev/null
> +++ b/arch/mips/include/asm/arch-octeon/cavm-reg.h
> @@ -0,0 +1,42 @@
> +/* SPDX-License-Identifier:    GPL-2.0 */
> +/*
> + * Copyright (C) 2020 Marvell International Ltd.
> + */
> +
> +#ifndef __CAVM_REG_H__
> +
> +/* Register offsets */
> +#define CAVM_CIU_FUSE			((u64 *)0x80010100000001a0)
> +#define CAVM_MIO_BOOT_REG_CFG0		((u64 *)0x8001180000000000)
> +#define CAVM_RST_BOOT			((u64 *)0x8001180006001600)
> +
> +/* Register structs */
> +
> +/**
> + * Register (RSL) rst_boot
> + *
> + * RST Boot Register
> + */
> +union cavm_rst_boot {
> +	u64 u;
> +	struct cavm_rst_boot_s {
> +		u64 chipkill                         : 1;
> +		u64 jtcsrdis                         : 1;
> +		u64 ejtagdis                         : 1;
> +		u64 romen                            : 1;
> +		u64 ckill_ppdis                      : 1;
> +		u64 jt_tstmode                       : 1;
> +		u64 vrm_err                          : 1;
> +		u64 reserved_37_56                   : 20;
> +		u64 c_mul                            : 7;
> +		u64 pnr_mul                          : 6;
> +		u64 reserved_21_23                   : 3;
> +		u64 lboot_oci                        : 3;
> +		u64 lboot_ext                        : 6;
> +		u64 lboot                            : 10;
> +		u64 rboot                            : 1;
> +		u64 rboot_pin                        : 1;
> +	} s;
> +};
> +
> +#endif /* __CAVM_REG_H__ */
> diff --git a/arch/mips/include/asm/arch-octeon/clock.h b/arch/mips/include/asm/arch-octeon/clock.h
> new file mode 100644
> index 0000000000..a844a222c9
> --- /dev/null
> +++ b/arch/mips/include/asm/arch-octeon/clock.h
> @@ -0,0 +1,24 @@
> +/* SPDX-License-Identifier:    GPL-2.0 */
> +/*
> + * Copyright (C) 2018, 2019 Marvell International Ltd.
> + *
> + * https://spdx.org/licenses
> + */
> +
> +#ifndef __CLOCK_H__
> +
> +/** System PLL reference clock */
> +#define PLL_REF_CLK                     50000000        /* 50 MHz */
> +#define NS_PER_REF_CLK_TICK             (1000000000 / PLL_REF_CLK)
> +
> +/**
> + * Returns the I/O clock speed in Hz
> + */
> +u64 octeon_get_io_clock(void);
> +
> +/**
> + * Returns the core clock speed in Hz
> + */
> +u64 octeon_get_core_clock(void);
> +
> +#endif /* __CLOCK_H__ */
> diff --git a/arch/mips/mach-octeon/Kconfig b/arch/mips/mach-octeon/Kconfig
> new file mode 100644
> index 0000000000..67fcb6058c
> --- /dev/null
> +++ b/arch/mips/mach-octeon/Kconfig
> @@ -0,0 +1,92 @@
> +menu "Octeon platforms"
> +	depends on ARCH_OCTEON
> +
> +config SYS_SOC
> +	string
> +	default "octeon"
> +
> +config OCTEON_CN7XXX
> +	bool "Octeon CN7XXX SoC"
> +
> +config OCTEON_CN70XX
> +	bool "Octeon CN70XX SoC"
> +	select OCTEON_CN7XXX
> +
> +config OCTEON_CN73XX
> +	bool "Octeon CN73XX SoC"
> +	select OCTEON_CN7XXX
> +
> +config OCTEON_CN78XX
> +	bool "Octeon CN78XX SoC"
> +	select OCTEON_CN7XXX
> +
> +choice
> +	prompt "Octeon MIPS family select"
> +
> +config SOC_OCTEON2
> +	bool "Octeon II family"
> +	help
> +	 This selects the Octeon II SoC family

this should be added later when needed

> +
> +config SOC_OCTEON3
> +	bool "Octeon III family"
> +	help
> +	 This selects the Octeon III SoC family CN70xx, CN73XX, CN78xx
> +	 and CNF75XX.
> +
> +endchoice
> +
> +config SYS_DCACHE_SIZE
> +	default 32768
> +
> +config SYS_DCACHE_LINE_SIZE
> +	default 128
> +
> +config SYS_ICACHE_SIZE
> +	default	79872
> +
> +config SYS_ICACHE_LINE_SIZE
> +	default 128
> +
> +config OCTEON_BIG_STACK_SIZE
> +	hex
> +	default 0x4000
> +	help
> +	 This enables a larger stack needed for Octeon 3 DRAM initialization.
> +	 If this is disabled then a part of the L1 cache will be reserved for
> +	 the stack, resulting in a smaller image.  If this  is true then
> +	 a portion of the TEXT address space will be reserved for the stack.
> +	 Note that this requires that U-Boot MUST be able to fit entirely
> +	 within the L2 cache and cannot be executed from a parallel NOR flash.
> +	 The default size is 16KiB.
> +
> +config OCTEON_COPY_FROM_FLASH_TO_L2
> +	bool
> +	default y
> +	help
> +	 Set this for U-Boot to attempt to copy itself from flash memory into
> +	 the L2 cache.  This significantly improvess the boot performance.
> +
> +config OCTEON_L2_MEMCPY_IN_CACHE
> +	bool
> +	default y
> +	help
> +	 If this is set then the memcpy code that is used to copy U-Boot from
> +	 the flash to the L2 cache is written to the L2 cache.  This
> +	 significantly speeds up the memcpy operation.
> +
> +config OCTEON_L2_UBOOT_ADDR
> +	hex
> +	default 0xffffffff81000000
> +	help
> +	 This specifies the address where U-Boot will be copied into the L2
> +	 cache.
> +
> +config OCTEON_L2_MEMCPY_ADDR
> +	hex
> +	default 0xffffffff81400000
> +	help
> +	 This specifies where U-Boot will place the memcpy routine used for
> +	 copying U-Boot from flash to L2 cache.
> +
> +endmenu
> diff --git a/arch/mips/mach-octeon/Makefile b/arch/mips/mach-octeon/Makefile
> new file mode 100644
> index 0000000000..a5fda682a7
> --- /dev/null
> +++ b/arch/mips/mach-octeon/Makefile
> @@ -0,0 +1,10 @@
> +# (C) Copyright 2019 Marvell, Inc.
> +#
> +# SPDX-License-Identifier:	GPL-2.0+
> +#
> +
> +extra-y = start.o
> +
> +obj-y += clock.o
> +obj-y += cpu.o
> +obj-y += dram.o
> diff --git a/arch/mips/mach-octeon/clock.c b/arch/mips/mach-octeon/clock.c
> new file mode 100644
> index 0000000000..6e32008641
> --- /dev/null
> +++ b/arch/mips/mach-octeon/clock.c
> @@ -0,0 +1,22 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2018, 2019 Marvell International Ltd.
> + */
> +
> +#include <common.h>
> +#include <asm/arch/clock.h>
> +
> +DECLARE_GLOBAL_DATA_PTR;
> +
> +int octeon_get_timer_freq(void)
> +{
> +	return gd->cpu_clk;
> +}
> +
> +/**
> + * Returns the I/O clock speed in Hz
> + */
> +u64 octeon_get_io_clock(void)
> +{
> +	return gd->bus_clk;
> +}
> diff --git a/arch/mips/mach-octeon/cpu.c b/arch/mips/mach-octeon/cpu.c
> new file mode 100644
> index 0000000000..a1373c6d56
> --- /dev/null
> +++ b/arch/mips/mach-octeon/cpu.c
> @@ -0,0 +1,55 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Copyright (C) 2020 Marvell International Ltd.
> + */
> +
> +#include <common.h>
> +#include <linux/io.h>
> +#include <asm/arch/clock.h>
> +#include <asm/arch-octeon/cavm-reg.h>
> +
> +DECLARE_GLOBAL_DATA_PTR;
> +
> +static int get_clocks(void)
> +{
> +	const u64 ref_clock = PLL_REF_CLK;
> +	union cavm_rst_boot rst_boot;
> +
> +	rst_boot.u = ioread64(CAVM_RST_BOOT);
> +	gd->cpu_clk = ref_clock * rst_boot.s.c_mul;
> +	gd->bus_clk = ref_clock * rst_boot.s.pnr_mul;
> +
> +	debug("%s: cpu: %lu, bus: %lu\n", __func__, gd->cpu_clk, gd->bus_clk);
> +
> +	return 0;
> +}
> +
> +/* Early mach init code run from flash */
> +int mach_cpu_init(void)
> +{
> +	/* Remap boot-bus 0x1fc0.0000 -> 0x1f40.0000 */
> +	/* ToDo: Move this to an early running bus (bootbus) DM driver */
> +	clrsetbits_be64(CAVM_MIO_BOOT_REG_CFG0, 0xffff, 0x1f40);
> +
> +	/* Get clocks and store them in GD */
> +	get_clocks();
> +
> +	return 0;
> +}
> +
> +/**
> + * Returns number of cores
> + *
> + * @return	number of CPU cores for the specified node
> + */
> +static int cavm_octeon_num_cores(void)
> +{
> +	return fls64(ioread64(CAVM_CIU_FUSE) & 0xffffffffffff);
> +}
> +
> +int print_cpuinfo(void)
> +{
> +	printf("SoC:   Octeon CN73xx (%d cores)\n", cavm_octeon_num_cores());
> +
> +	return 0;
> +}
> diff --git a/arch/mips/mach-octeon/dram.c b/arch/mips/mach-octeon/dram.c
> new file mode 100644
> index 0000000000..c16a73e8e6
> --- /dev/null
> +++ b/arch/mips/mach-octeon/dram.c
> @@ -0,0 +1,27 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Copyright (C) 2020 Marvell International Ltd.
> + */
> +
> +#include <common.h>
> +#include <dm.h>
> +#include <ram.h>
> +
> +DECLARE_GLOBAL_DATA_PTR;
> +
> +int dram_init(void)
> +{
> +	/*
> +	 * No DDR init yet -> run in L2 cache
> +	 */
> +	gd->ram_size = (2 << 20);
> +	gd->bd->bi_dram[0].size = gd->ram_size;
> +	gd->bd->bi_dram[1].size = 0;
> +
> +	return 0;
> +}
> +
> +ulong board_get_usable_ram_top(ulong total_size)
> +{
> +	return gd->ram_top;
> +}
> diff --git a/arch/mips/mach-octeon/include/ioremap.h b/arch/mips/mach-octeon/include/ioremap.h
> new file mode 100644
> index 0000000000..59b75008a2
> --- /dev/null
> +++ b/arch/mips/mach-octeon/include/ioremap.h
> @@ -0,0 +1,30 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __ASM_MACH_OCTEON_IOREMAP_H
> +#define __ASM_MACH_OCTEON_IOREMAP_H
> +
> +#include <linux/types.h>
> +
> +/*
> + * Allow physical addresses to be fixed up to help peripherals located
> + * outside the low 32-bit range -- generic pass-through version.
> + */
> +static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr,
> +					     phys_addr_t size)
> +{
> +	return phys_addr;
> +}
> +
> +static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size,
> +					 unsigned long flags)
> +{
> +	return (void __iomem *)(XKPHYS | offset);
> +}
> +
> +static inline int plat_iounmap(const volatile void __iomem *addr)
> +{
> +	return 0;
> +}
> +
> +#define _page_cachable_default	_CACHE_CACHABLE_NONCOHERENT
> +
> +#endif /* __ASM_MACH_OCTEON_IOREMAP_H */
> diff --git a/arch/mips/mach-octeon/start.S b/arch/mips/mach-octeon/start.S
> new file mode 100644
> index 0000000000..acb967201a
> --- /dev/null
> +++ b/arch/mips/mach-octeon/start.S
> @@ -0,0 +1,1241 @@
> +/* SPDX-License-Identifier: GPL-2.0+ */
> +/*
> + *  Startup Code for OCTEON 64-bit CPU-core
> + *
> + *  Copyright (c) 2003	Wolfgang Denk <wd at denx.de>
> + *  Copyright 2004, 2005, 2010 - 2015 Cavium Inc..
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <asm/regdef.h>
> +#include <asm/mipsregs.h>
> +#include <asm/asm.h>
> +
> +#define BOOT_VECTOR_NUM_WORDS		8
> +
> +#define OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET	0x70
> +#define OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET	0x78
> +
> +#define OCTEON_BOOT_MOVEABLE_MAGIC1_RAW	0xdb00110ad358eacd
> +#define OCTEON_BOOT_MOVEABLE_MAGIC1	OCTEON_BOOT_MOVEABLE_MAGIC1_RAW
> +
> +#define OCTEON_CIU_SOFT_RST		0x8001070000000740
> +
> +#define	OCTEON_L2C_WPAR_PP0		0x8001180080840000
> +#define OCTEON_MIO_BOOT_BASE		0x8001180000000000
> +#define OCTEON_MIO_BOOT_REG_CFG0_OFF	0x0000
> +#define OCTEON_MIO_BOOT_LOC_CFG0_OFF	0x0080
> +#define OCTEON_MIO_BOOT_LOC_ADR_OFF	0x0090
> +#define OCTEON_MIO_BOOT_LOC_DAT_OFF	0x0098
> +#define	OCTEON_MIO_RST_BOOT		0x8001180000001600
> +#define OCTEON_MIO_BOOT_REG_CFG0	0x8001180000000000
> +#define	OCTEON_MIO_BOOT_REG_TIM0	0x8001180000000040
> +#define OCTEON_MIO_BOOT_LOC_CFG0	0x8001180000000080
> +#define OCTEON_MIO_BOOT_LOC_ADR		0x8001180000000090
> +#define OCTEON_MIO_BOOT_LOC_DAT		0x8001180000000098
> +#define	OCTEON_MIO_FUSE_DAT3		0x8001180000001418
> +#define OCTEON_L2D_FUS3			0x80011800800007B8
> +#define	OCTEON_LMC0_DDR_PLL_CTL		0x8001180088000258
> +
> +#define OCTEON_RST			0x8001180006000000
> +#define OCTEON_RST_BOOT_OFFSET		0x1600
> +#define OCTEON_RST_SOFT_RST_OFFSET	0x1680
> +#define OCTEON_RST_COLD_DATAX_OFFSET(X)	(0x17C0 + (X) * 8)
> +#define OCTEON_RST_BOOT			0x8001180006001600
> +#define OCTEON_RST_SOFT_RST		0x8001180006001680
> +#define OCTEON_RST_COLD_DATAX(X)	(0x80011800060017C0 + (X) * 8)
> +
> +#define OCTEON_OCX_COM_NODE		0x8001180011000000
> +#define OCTEON_L2C_OCI_CTL		0x8001180080800020
> +#define OCTEON_L2C_TAD_CTL		0x8001180080800018
> +#define OCTEON_L2C_CTL			0x8001180080800000
> +
> +#define OCTEON_DBG_DATA			0x80011F00000001E8
> +#define OCTEON_PCI_READ_CMD_E		0x80011F0000001188
> +#define OCTEON_NPEI_DBG_DATA		0x80011F0000008510
> +#define OCTEON_CIU_WDOG(X)		(0x8001070000000500 + (X) * 8)
> +#define OCTEON_CIU_PP_POKE(X)		(0x8001070000000580 + (X) * 8)
> +#define OCTEON_CIU3_WDOG(X)		(0x8001010000020000 + (X) * 8)
> +#define OCTEON_CIU3_PP_POKE(X)		(0x8001010000030000 + (X) * 8)
> +#define OCTEON_OCX_COM_LINKX_CTL(X)	(0x8001180011000020 + (X) * 8)
> +#define OCTEON_SLI_CTL_STATUS		0x80011F0000028570
> +#define OCTEON_GSERX_SCRATCH(X)		(0x8001180090000020 + (X) * 0x1000000)
> +
> +/** PRID for CN56XX */
> +#define OCTEON_PRID_CN56XX		0x04
> +/** PRID for CN52XX */
> +#define OCTEON_PRID_CN52XX		0x07
> +/** PRID for CN63XX */
> +#define OCTEON_PRID_CN63XX		0x90
> +/** PRID for CN68XX */
> +#define OCTEON_PRID_CN68XX		0x91
> +/** PRID for CN66XX */
> +#define OCTEON_PRID_CN66XX		0x92
> +/** PRID for CN61XX */
> +#define OCTEON_PRID_CN61XX		0x93
> +/** PRID for CNF71XX */
> +#define OCTEON_PRID_CNF71XX		0x94
> +/** PRID for CN78XX */
> +#define OCTEON_PRID_CN78XX		0x95
> +/** PRID for CN70XX */
> +#define OCTEON_PRID_CN70XX		0x96
> +/** PRID for CN73XX */
> +#define OCTEON_PRID_CN73XX		0x97
> +/** PRID for CNF75XX */
> +#define OCTEON_PRID_CNF75XX		0x98
> +
> +/* func argument is used to create a  mark, must be unique */
> +#define GETOFFSET(reg, func)	\
> +	.balign	8;		\
> +	bal	func ##_mark;	\
> +	nop;			\
> +	.dword	.;		\
> +func ##_mark:			\
> +	ld	reg, 0(ra);	\
> +	dsubu	reg, ra, reg;
> +
> +#define JAL(func)		\
> +	.balign	8;		\
> +	bal	func ##_mark;	\
> +	 nop;			\
> +	.dword .;		\
> +func ##_mark:			\
> +	ld	t8, 0(ra);	\
> +	dsubu	t8, ra, t8;	\
> +	dla	t9, func;	\
> +	daddu	t9, t9, t8;	\
> +	jalr	t9;		\
> +	 nop;
> +
> +	.set	arch=octeon3
> +	.set	noreorder
> +
> +	.macro uhi_mips_exception
> +	move	k0, t9		# preserve t9 in k0
> +	move	k1, a0		# preserve a0 in k1
> +	li	t9, 15		# UHI exception operation
> +	li	a0, 0		# Use hard register context
> +	sdbbp	1		# Invoke UHI operation
> +	.endm
> +
> +	.macro setup_stack_gd
> +	li	t0, -16
> +	PTR_LI	t1, big_stack_start
> +	and	sp, t1, t0		# force 16 byte alignment
> +	PTR_SUBU \
> +		sp, sp, GD_SIZE		# reserve space for gd
> +	and	sp, sp, t0		# force 16 byte alignment
> +	move	k0, sp			# save gd pointer
> +#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \
> +    !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F)
> +	li	t2, CONFIG_VAL(SYS_MALLOC_F_LEN)
> +	PTR_SUBU \
> +		sp, sp, t2		# reserve space for early malloc
> +	and	sp, sp, t0		# force 16 byte alignment
> +#endif
> +	move	fp, sp
> +
> +	/* Clear gd */
> +	move	t0, k0
> +1:
> +	PTR_S	zero, 0(t0)
> +	PTR_ADDIU t0, PTRSIZE
> +	blt	t0, t1, 1b
> +	 nop
> +
> +#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \
> +    !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F)
> +	PTR_S	sp, GD_MALLOC_BASE(k0)	# gd->malloc_base offset
> +#endif
> +	.endm
> +
> +/* Saved register usage:
> + * s0:	not used
> + * s1:	not used
> + * s2:	Address U-Boot loaded into in L2 cache
> + * s3:	Start address
> + * s4:	flags
> + *		1:	booting from RAM
> + *		2:	executing out of cache
> + *		4:	booting from flash
> + * s5:	u-boot size (data end - _start)
> + * s6:	offset in flash.
> + * s7:	_start physical address
> + * s8:
> + */
> +
> +ENTRY(_start)
> +	/* U-Boot entry point */
> +	b	reset
> +
> +	/* The above jump instruction/nop are considered part of the
> +	 * bootloader_header_t structure but are not changed when the header is
> +	 * updated.
> +	 */
> +
> +	/* Leave room for bootloader_header_t header at start of binary.  This
> +	 * header is used to identify the board the bootloader is for, what
> +	 * address it is linked at, failsafe/normal, etc.  It also contains a
> +	 * CRC of the entire image.
> +	 */
> +
> +#if defined(CONFIG_ROM_EXCEPTION_VECTORS)
> +	/*
> +	 * Exception vector entry points. When running from ROM, an exception
> +	 * cannot be handled. Halt execution and transfer control to debugger,
> +	 * if one is attached.
> +	 */
> +	.org 0x200
> +	/* TLB refill, 32 bit task */
> +	uhi_mips_exception
> +
> +	.org 0x280
> +	/* XTLB refill, 64 bit task */
> +	uhi_mips_exception
> +
> +	.org 0x300
> +	/* Cache error exception */
> +	uhi_mips_exception
> +
> +	.org 0x380
> +	/* General exception */
> +	uhi_mips_exception
> +
> +	.org 0x400
> +	/* Catch interrupt exceptions */
> +	uhi_mips_exception
> +
> +	.org 0x480
> +	/* EJTAG debug exception */
> +1:	b	1b
> +	 nop
> +
> +	.org 0x500
> +#endif
> +
> +/* Reserve extra space so that when we use the boot bus local memory
> + * segment to remap the debug exception vector we don't overwrite
> + * anything useful
> + */
> +
> +/* Basic exception handler (dump registers) in all ASM.	 When using the TLB for
> + * mapping u-boot C code, we can't branch to that C code for exception handling
> + * (TLB is disabled for some exceptions.
> + */
> +
> +/* RESET/start here */
> +	.balign	8
> +reset:
> +	nop
> +	synci	0(zero)
> +	mfc0	k0, CP0_STATUS
> +	ori	k0, 0x00E0		/* enable 64 bit mode for CSR access */
> +	mtc0	k0, CP0_STATUS
> +
> +	/* Save the address we're booting from, strip off low bits */
> +	bal	1f
> +	 nop
> +1:
> +	move	s3, ra
> +	dins	s3, zero, 0, 12
> +
> +	/* Disable boot bus moveable regions */
> +	PTR_LI	k0, OCTEON_MIO_BOOT_LOC_CFG0
> +	sd	zero, 0(k0)
> +	sd	zero, 8(k0)
> +
> +	/* Disable the watchdog timer
> +	 * First we check if we're running on CN78XX, CN73XX or CNF75XX to see
> +	 * if we use CIU3 or CIU.
> +	 */
> +	mfc0	t0, CP0_PRID
> +	ext	t0, t0, 8, 8
> +	/* Assume CIU */
> +	PTR_LI	t1, OCTEON_CIU_WDOG(0)
> +	PTR_LI	t2, OCTEON_CIU_PP_POKE(0)
> +	blt	t0, OCTEON_PRID_CN78XX, wd_use_ciu
> +	 nop
> +	beq	t0, OCTEON_PRID_CN70XX, wd_use_ciu
> +	 nop
> +	/* Use CIU3 */
> +	PTR_LI	t1, OCTEON_CIU3_WDOG(0)
> +	PTR_LI	t2, OCTEON_CIU3_PP_POKE(0)
> +wd_use_ciu:
> +	sd	zero, 0(t2)		/* Pet the dog */
> +	sd	zero, 0(t1)		/* Disable watchdog timer */
> +
> +	/* Errata: CN76XX has a node ID of 3. change it to zero here.
> +	 * This needs to be done before we relocate to L2 as addresses change
> +	 * For 76XX pass 1.X we need to zero out the OCX_COM_NODE[ID],
> +	 * L2C_OCI_CTL[GKSEGNODE] and CP0 of Root.CvmMemCtl2[KSEGNODE].
> +	 */
> +	mfc0	a4, CP0_PRID
> +	/* Check for 78xx pass 1.x processor ID */
> +	andi	a4, 0xffff
> +	blt	a4, (OCTEON_PRID_CN78XX << 8), 1f
> +	 nop
> +
> +	/* Zero out alternate package for now */
> +	dins	a4, zero, 6, 1
> +	bge	a4, ((OCTEON_PRID_CN78XX << 8) | 0x08), 1f
> +	 nop
> +
> +	/* 78xx or 76xx here, first check for bug #27141 */
> +	PTR_LI	a5, OCTEON_SLI_CTL_STATUS
> +	ld	a6, 0(a5)
> +	andi	a7, a4, 0xff
> +	andi	a6, a6, 0xff
> +
> +	beq	a6, a7, not_bug27141
> +	 nop
> +
> +	/* core 0 proc_id rev_id field does not match SLI_CTL_STATUS rev_id */
> +	/* We just hit bug #27141.  Need to reset the chip and try again */
> +
> +	PTR_LI	a4, OCTEON_RST_SOFT_RST
> +	ori	a5, zero, 0x1	/* set the reset bit */
> +
> +reset_78xx_27141:
> +	sync
> +	synci	0(zero)
> +	cache	9, 0(zero)
> +	sd	a5, 0(a4)
> +	wait
> +	b	reset_78xx_27141
> +	 nop
> +
> +not_bug27141:
> +	/* 76XX pass 1.x has the node number set to 3 */
> +	mfc0	a4, CP0_EBASE
> +	ext	a4, a4, 0, 10
> +	bne	a4, 0x180, 1f	/* Branch if not node 3 core 0 */
> +	 nop
> +
> +	/* Clear OCX_COM_NODE[ID] */
> +	PTR_LI	a5, OCTEON_OCX_COM_NODE
> +	ld	a4, 0(a5)
> +	dins	a4, zero, 0, 2
> +	sd	a4, 0(a5)
> +	ld	zero, 0(a5)
> +
> +	/* Clear L2C_OCI_CTL[GKSEGNODE] */
> +	PTR_LI	a5, OCTEON_L2C_OCI_CTL
> +	ld	a4, 0(a5)
> +	dins	a4, zero, 4, 2
> +	sd	a4, 0(a5)
> +	ld	zero, 0(a5)
> +
> +	/* Clear CP0 Root.CvmMemCtl2[KSEGNODE] */
> +	dmfc0	a4, CP0_CVMMEMCTL2
> +	dins	a4, zero, 12, 2
> +	dmtc0	a4, CP0_CVMMEMCTL2
> +
> +	/* Put the flash address in the start of the EBASE register to
> +	 * enable our exception handler but only for core 0.
> +	 */
> +	mfc0	a4, CP0_EBASE
> +	dext	a4, a4, 0, 10
> +	bnez	a4, no_flash
> +	/* OK in delay slot */
> +	dext	a6, a6, 0, 16		/* Get the base address in flash */
> +	sll	a6, a6, 16
> +	mtc0	a6, CP0_EBASE	/* Enable exceptions */
> +
> +no_flash:
> +	/* Zero out various registers */
> +	mtc0	zero, CP0_DEPC
> +	mtc0	zero, CP0_EPC
> +	mtc0	zero, CP0_CAUSE
> +	mfc0	a4, CP0_PRID
> +	ext	a4, a4, 8, 8
> +	mtc0	zero, CP0_DESAVE
> +
> +	/* The following are only available on Octeon 2 or later */
> +	mtc0	zero, CP0_KSCRATCH1
> +	mtc0	zero, CP0_KSCRATCH2
> +	mtc0	zero, CP0_KSCRATCH3
> +	mtc0	zero, CP0_USERLOCAL
> +
> +	/* Turn off ROMEN bit to disable ROM */
> +	PTR_LI	a1, OCTEON_MIO_RST_BOOT
> +	/* For OCTEON 3 we use RST_BOOT instead of MIO_RST_BOOT.
> +	 * The difference is bits 24-26 are 6 instead of 0 for the address.
> +	 */
> +	/* For Octeon 2 and CN70XX we can ignore the watchdog */
> +	blt	a4, OCTEON_PRID_CN78XX, watchdog_ok
> +	 nop
> +
> +	PTR_LI	a1, OCTEON_RST_BOOT
> +
> +	beq	a4, OCTEON_PRID_CN70XX, watchdog_ok
> +	 nop
> +
> +	ld	a2, 0(a1)
> +	/* There is a bug where some registers don't get properly reset when
> +	 * the watchdog timer causes a reset.  In this case we need to force
> +	 * a reset.
> +	 */
> +	bbit0	a2, 11, watchdog_ok	/* Skip if watchdog not hit */
> +	 dins	a2, zero, 2, 18	/* Don't clear LBOOT, LBOOT_EXT or LBOOT_OCI */
> +	/* Clear bit indicating reset due to watchdog */
> +	ori	a2, 1 << 11
> +	sd	a2, 0(a1)
> +
> +	/* Disable watchdog */
> +	PTR_LI	a1, OCTEON_CIU3_PP_POKE(0)
> +	sd	zero, 0(a1)
> +	PTR_LI	a1, OCTEON_CIU3_WDOG(0)
> +	sd	zero, 0(a1)
> +
> +	/* Record this in the GSER0_SCRATCH register in bit 11 */
> +	PTR_LI	a1, OCTEON_GSERX_SCRATCH(0)
> +	ld	a2, 0(a1)
> +	ori	a2, 1 << 11
> +	sd	a2, 0(a1)
> +
> +	PTR_LI	a1, OCTEON_RST_SOFT_RST
> +	li	a2, 1
> +	sd	a2, 0(a1)
> +	wait
> +
> +	/* We should never get here */
> +
> +watchdog_ok:
> +	ld	a2, 0(a1)
> +	/* Don't clear LBOOT/LBOOT_EXT or LBOOT_OCI */
> +	dins	a2, zero, 2, 18
> +	dins	a2, zero, 60, 1	/* Clear ROMEN bit */
> +	sd	a2, 0(a1)
> +
> +	/* Start of Octeon setup */
> +
> +	/* Check what core we are - if core 0, branch to init tlb
> +	 * loop in flash.  Otherwise, look up address of init tlb
> +	 * loop that was saved in the boot vector block.
> +	 */
> +	mfc0	a0, CP0_EBASE
> +	andi	a0, EBASE_CPUNUM		/* get core */
> +	beqz	a0, InitTLBStart_local
> +	 nop
> +
> +	break
> +	/* We should never get here - non-zero cores now go directly to
> +	 * tlb init from the boot stub in movable region.
> +	 */
> +
> +	.globl InitTLBStart
> +InitTLBStart:
> +InitTLBStart_local:
> +	/* If we don't have working memory yet configure a bunch of
> +	 * scratch memory, and set the stack pointer to the top
> +	 * of it.  This allows us to go to C code without having
> +	 * memory set up
> +	 *
> +	 * Warning: do not change SCRATCH_STACK_LINES as this can impact the
> +	 * transition from start.S to crti.asm. crti requires 590 bytes of
> +	 * stack space.
> +	 */
> +	cache	1,0(zero)	/* Clear Dcache so cvmseg works right */
> +#if CONFIG_OCTEON_BIG_STACK_SIZE
> +	rdhwr	v0, $0
> +	bnez	v0, 1f
> +	 nop
> +	PTR_LA	sp, big_stack_start - 16
> +	b	stack_clear_done
> +	 nop
> +1:
> +#endif
> +#define SCRATCH_STACK_LINES 0x36   /* MAX is 0x36 */
> +	dmfc0	v0, CP0_CVMMEMCTL
> +	dins	v0, zero, 0, 9
> +	/* setup SCRATCH_STACK_LINES scratch lines of scratch */
> +	ori	v0, 0x100 | SCRATCH_STACK_LINES
> +	dmtc0	v0, CP0_CVMMEMCTL
> +	/* set stack to top of scratch memory */
> +	li	sp, 0xffffffffffff8000 + (SCRATCH_STACK_LINES * 128)
> +	/* Clear scratch for CN63XX pass 2.0 errata Core-15169*/
> +	li	t0, 0xffffffffffff8000
> +clear_scratch:
> +	sd	zero, 0(t0)
> +	addiu	t0, 8
> +	bne	t0, sp, clear_scratch
> +	 nop
> +
> +	/* This code run on all cores - core 0 from flash,
> +	 * the rest from DRAM.	When booting from PCI, non-zero cores
> +	 * come directly here from the boot vector - no earlier code in this
> +	 * file is executed.
> +	 */
> +
> +	/* Some generic initialization is done here as well, as we need this
> +	 * done on all cores even when booting from PCI
> +	 */
> +stack_clear_done:
> +	/* Clear watch registers. */
> +	mtc0	zero, CP0_WATCHLO
> +	mtc0	zero, CP0_WATCHHI
> +
> +	/* STATUS register */
> +	mfc0	k0, CP0_STATUS
> +	li	k1, ~ST0_IE
> +	and	k0, k1
> +	mtc0	k0, CP0_STATUS
> +
> +	/* CAUSE register */
> +	mtc0	zero, CP0_CAUSE
> +
> +	/* Init Timer */
> +	dmtc0	zero, CP0_COUNT
> +	dmtc0	zero, CP0_COMPARE
> +
> +
> +	mfc0	a5, CP0_STATUS
> +	li	v0, 0xE0		/* enable 64 bit mode for CSR access */
> +	or	v0, v0, a5
> +	mtc0	v0, CP0_STATUS
> +
> +
> +	dli	v0, 1 << 29  /* Enable large physical address support in TLB */
> +	mtc0	v0, CP0_PAGEGRAIN
> +
> +InitTLB:
> +	dmtc0	zero, CP0_ENTRYLO0
> +	dmtc0	zero, CP0_ENTRYLO1
> +	mtc0	zero, CP0_PAGEMASK
> +	dmtc0	zero, CP0_CONTEXT
> +	/* Use an offset into kseg0 so we won't conflict with Mips1 legacy
> +	 * TLB clearing
> +	 */
> +	PTR_LI	v0, 0xFFFFFFFF90000000
> +	mfc0	a0, CP0_CONFIG1
> +	srl	a0, a0, 25
> +	/* Check if config4 reg present */
> +	mfc0	a1, CP0_CONFIG3
> +	bbit0	a1, 31, 2f
> +	 and	a0, a0, 0x3F		/* a0 now has the max mmu entry index */
> +	mfc0	a1, CP0_CONFIG4
> +	bbit0	a1, 14, 2f		/* check config4[MMUExtDef] */
> +	 nop
> +	/* append config4[MMUSizeExt] to most significant bit of
> +	 * config1[MMUSize-1]
> +	 */
> +	ins	a0, a1, 6, 8
> +	and	a0, a0, 0x3fff	/* a0 now includes max entries for cn6xxx */
> +2:
> +	dmtc0	zero, CP0_XCONTEXT
> +	mtc0	zero, CP0_WIRED
> +
> +InitTLBloop:
> +	dmtc0	v0, CP0_ENTRYHI
> +	tlbp
> +	mfc0	v1, CP0_INDEX
> +	daddiu	v0, v0, 1<<13
> +	bgez	v1, InitTLBloop
> +
> +	mtc0	a0, CP0_INDEX
> +	tlbwi
> +	bnez	a0, InitTLBloop
> +	 daddiu	a0, -1
> +
> +	mthi	zero
> +	mtlo	zero
> +
> +	/* Set up status register */
> +	mfc0	v0, CP0_STATUS
> +	/* Enable COP0 and COP2 access */
> +	li	a4, (1 << 28) | (1 << 30)
> +	or	v0, a4
> +
> +	/* Must leave BEV set here, as DRAM is not configured for core 0.
> +	 * Also, BEV must be 1 later on when the exception base address is set.
> +	 */
> +
> +	/* Mask all interrupts */
> +	ins	v0, zero, 0, 16
> +	/* Clear NMI (used to start cores other than core 0) */
> +	ori	v0, 0xE4		/* enable 64 bit, disable interrupts */
> +	mtc0	v0, CP0_STATUS
> +
> +	dli	v0,0xE000000F		/* enable all readhw locations */
> +	mtc0	v0, CP0_HWRENA
> +
> +	dmfc0	v0, CP0_CVMCTL
> +	ori	v0, 1<<14	/* enable fixup of unaligned mem access */
> +	dmtc0	v0, CP0_CVMCTL
> +
> +	/* Setup scratch memory.  This is also done in
> +	 * cvmx_user_app_init, and this code will be removed
> +	 * from the bootloader in the near future.
> +	 */
> +
> +	/* Set L2C_LAD_CTL[MAXLFB] = 0 on CN73XX */
> +	mfc0	a4, CP0_PRID
> +	ext	a4, a4, 8, 8
> +	blt	a4, OCTEON_PRID_CN73XX, 72f
> +	nop
> +	PTR_LI	v0, OCTEON_L2C_TAD_CTL
> +	ld	t1, 0(v0)
> +	dins	t1, zero, 0, 4
> +	sd	t1, 0(v0)
> +	ld	zero, 0(v0)
> +
> +72:
> +
> +	/* clear these to avoid immediate interrupt in noperf mode */
> +	dmtc0	zero, CP0_COMPARE	/* clear timer interrupt */
> +	dmtc0	zero, CP0_COUNT		/* clear timer interrupt */
> +	dmtc0	zero, CP0_PERF_CNT0	/* clear perfCnt0 */
> +	dmtc0	zero, CP0_PERF_CNT1	/* clear perfCnt1 */
> +	dmtc0	zero, CP0_PERF_CNT2
> +	dmtc0	zero, CP0_PERF_CNT3
> +
> +	/* If we're running on a node other than 0 then we need to set KSEGNODE
> +	 * to 0.  The nice thing with this code is that it also autodetects if
> +	 * we're running on a processor that supports CVMMEMCTL2 or not since
> +	 * only processors that have this will have a non-zero node ID.  Because
> +	 * of this there's no need to check if we're running on a 78XX.
> +	 */
> +	mfc0    t1, CP0_EBASE
> +	dext    t1, t1, 7, 3            /* Extract node number */
> +	beqz    t1, is_node0            /* If non-zero then we're not node 0 */
> +	 nop
> +	dmfc0   t1, CP0_CVMMEMCTL2
> +	dins    t1, zero, 12, 4
> +	dmtc0   t1, CP0_CVMMEMCTL2
> +is_node0:
> +
> +	/* Set up TLB mappings for u-boot code in flash. */
> +
> +	/* Use a bal to get the current PC into ra.  Since this bal is to
> +	 * the address immediately following the delay slot, the ra is
> +	 * the address of the label.  We then use this to get the actual
> +	 * address that we are executing from.
> +	 */
> +	bal	__dummy
> +	 nop
> +
> +__dummy:
> +	/* Get the actual address that we are running at */
> +	PTR_LA	a6, _start		/* Linked address of _start */
> +	PTR_LA	a7, __dummy
> +	dsubu	t0, a7, a6		/* offset of __dummy label from _start*/
> +	dsubu	a7, ra, t0		/* a7 now has actual address of _start*/
> +
> +	/* Save actual _start address in s7.  This is where we
> +	 * are executing from, as opposed to where the code is
> +	 * linked.
> +	 */
> +	move	s7, a7
> +	move	s4, zero
> +
> +	/* s7 has actual address of _start.  If this is
> +	 * on the boot bus, it will be between 0xBFC000000 and 0xBFFFFFFF.
> +	 * If it is on the boot bus, use 0xBFC00000 as the physical address
> +	 * for the TLB mapping, as we will be adjusting the boot bus
> +	 * to make this adjustment.
> +	 * If we are running from DRAM (remote-boot), then we want to use the
> +	 * real address in DRAM.
> +	 */
> +
> +	/* Check to see if we are running from flash - we expect that to
> +	 * be 0xffffffffb0000000-0xffffffffbfffffff
> +	 * (0x10000000-0x1fffffff, unmapped/uncached)
> +	 */
> +	dli	t2, 0xffffffffb0000000
> +	dsubu	t2, s7
> +	slt	s4, s7, t2
> +	bltz	t2, uboot_in_flash
> +	 nop
> +
> +	/* If we're not core 0 then we don't care about cache */
> +	mfc0	t2, CP0_EBASE
> +	andi	t2, EBASE_CPUNUM
> +	bnez	t2, uboot_in_ram
> +	 nop
> +
> +	/* Find out if we're OCTEON I or OCTEON + which don't support running
> +	 * out of cache.
> +	 */
> +	mfc0	t2, CP0_PRID
> +	ext	t2, t2, 8, 8
> +	li	s4, 1
> +	blt	t2, 0x90, uboot_in_ram
> +	 nop
> +
> +	/* U-Boot can be executing either in RAM or L2 cache.  Now we need to
> +	 * check if DRAM is initialized.  The way we do that is to look at
> +	 * the reset bit of the LMC0_DDR_PLL_CTL register (bit 7)
> +	 */
> +	PTR_LI	t2, OCTEON_LMC0_DDR_PLL_CTL
> +	ld	t2, 0(t2)
> +	bbit1	t2, 7, uboot_in_ram
> +	 nop
> +
> +	/* We must be executing out of cache */
> +	b	uboot_in_ram
> +	 li	s4, 2
> +
> +uboot_in_flash:
> +	/* Set s4 to 4 to indicate we're running in FLASH */
> +	li	s4, 4
> +
> +#if defined(CONFIG_OCTEON_DISABLE_L2_CACHE_INDEX_ALIASING)
> +	/* By default, L2C index aliasing is enabled.  In some cases it may
> +	 * need to be disabled.  The L2C index aliasing can only be disabled
> +	 * if U-Boot is running out of L2 cache and the L2 cache has not been
> +	 * used to store anything.
> +	 */
> +	PTR_LI	t1, OCTEON_L2C_CTL
> +	ld	t2, 0(t1)
> +	ori	t2, 1
> +	sd	t2, 0(t1)
> +#endif
> +
> +	/* Use BFC00000 as physical address for TLB mappings when booting
> +	 * from flash, as we will adjust the boot bus mappings to make this
> +	 * mapping correct.
> +	 */
> +	dli	a7, 0xFFFFFFFFBFC00000
> +	dsubu	s6, s7, a7  /* Save flash offset in s6 */
> +
> +#if defined(CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2)
> +	/* For OCTEON II we check to see if the L2 cache is big enough to hold
> +	 * U-Boot.  If it is big enough then we copy ourself from flash to the
> +	 * L2 cache in order to speed up execution.
> +	 */
> +
> +	/* Check for OCTEON 2 */
> +	mfc0	t1, CP0_PRID
> +	ext	t1, t1, 8, 8
> +	/* Get number of L2 cache sets */
> +	beq	t1, OCTEON_PRID_CNF71XX, got_l2_sets	/* CNF71XX */
> +	 li	t2, 1 << 9
> +	beq	t1, OCTEON_PRID_CN78XX, got_l2_sets	/* CN78XX */
> +	 li	t2, 1 << 13
> +	beq	t1, OCTEON_PRID_CN70XX, got_l2_sets	/* CN70XX */
> +	 li	t2, 1 << 10
> +	beq	t1, OCTEON_PRID_CN73XX, got_l2_sets	/* CN73XX */
> +	 li	t2, 1 << 11
> +	beq	t1, OCTEON_PRID_CNF75XX, got_l2_sets	/* CNF75XX */
> +	 li	t2, 1 << 11
> +	b	l2_cache_too_small	/* Unknown OCTEON model */
> +	 nop
> +
> +got_l2_sets:
> +	/* Get number of associations */
> +	PTR_LI	t0, OCTEON_MIO_FUSE_DAT3
> +	ld	t0, 0(t0)
> +	dext	t0, t0, 32, 3
> +
> +	beq	t1, OCTEON_PRID_CN70XX, process_70xx_l2sets
> +	 nop
> +	/* 0 = 16-way, 1 = 12-way, 2 = 8-way, 3 = 4-way, 4-7 reserved */
> +	beqz	t0, got_l2_ways
> +	 li	t3, 16
> +	beq	t0, 1, got_l2_ways
> +	 li	t3, 12
> +	beq	t0, 2, got_l2_ways
> +	 li	t3, 8
> +	beq	t0, 3, got_l2_ways
> +	 li	t3, 4
> +	b	l2_cache_too_small
> +	 nop
> +
> +process_70xx_l2sets:
> +	/* For 70XX, the number of ways is defined as:
> +	 * 0 - full cache (4-way) 512K
> +	 * 1 - 3/4 ways (3-way) 384K
> +	 * 2 - 1/2 ways (2-way) 256K
> +	 * 3 - 1/4 ways (1-way) 128K
> +	 * 4-7 illegal (aliased to 0-3)
> +	 */
> +	andi	t0, 3
> +	beqz	t0, got_l2_ways
> +	 li	t3, 4
> +	beq	t0, 1, got_l2_ways
> +	 li	t3, 3
> +	beq	t0, 2, got_l2_ways
> +	 li	t3, 2
> +	li	t3, 1
> +
> +got_l2_ways:
> +	dmul	a1, t2, t3		/* Calculate cache size */
> +	dsll	a1, 7			/* Ways * Sets * cache line sz (128) */
> +	daddiu	a1, a1, -128		/* Adjust cache size for copy code */
> +
> +	/* Calculate size of U-Boot image */
> +	/*
> +	 * "uboot_end - _start" is not correct, as the image also
> +	 * includes the DTB appended to the end (OF_EMBED is deprecated).
> +	 * Lets use a defined max for now here.
> +	 */
> +	PTR_LI	s5, CONFIG_BOARD_SIZE_LIMIT
> +
> +	daddu	t2, s5, s7	/* t2 = end address */
> +	daddiu	t2, t2, 127
> +	ins	t2, zero, 0, 7	/* Round up to cache line for memcpy */
> +
> +	slt	t1, a1, s5	/* See if we're bigger than the L2 cache */
> +	bnez	t1, l2_cache_too_small
> +	 nop
> +	/* Address we plan to load at in the L2 cache */
> +	PTR_LI	t9, CONFIG_OCTEON_L2_UBOOT_ADDR
> +# ifdef CONFIG_OCTEON_L2_MEMCPY_IN_CACHE
> +	/* Enable all ways for PP0.  Authentik ROM may have disabled these */
> +	PTR_LI	a1, OCTEON_L2C_WPAR_PP0
> +	sd	zero, 0(a1)
> +
> +	/* Address to place our memcpy code */
> +	PTR_LI	a0, CONFIG_OCTEON_L2_MEMCPY_ADDR
> +	/* The following code writes a simple memcpy routine into the cache
> +	 * to copy ourself from flash into the L2 cache.  This makes the
> +	 * memcpy routine a lot faster since each instruction can potentially
> +	 * require four read cycles to flash over the boot bus.
> +	 */
> +	/* Zero cache line in the L2 cache */
> +	zcb	(a0)
> +	synci	0(zero)
> +	dli	a1, 0xdd840000dd850008	/* ld a0, 0(t0);  ld a1, 8(t0) */
> +	sd	a1, 0(a0)
> +	dli	a1, 0xdd860010dd870018	/* ld a2, 16(t0); ld a3, 24(t0) */
> +	sd	a1, 8(a0)
> +	dli	a1, 0xfda40000fda50008	/* sd a0, 0(t1);  sd a1, 8(t1) */
> +	sd	a1, 16(a0)
> +	dli	a1, 0xfda60010fda70018	/* sd a2, 16(t1); sd a3, 24(t1) */
> +	sd	a1, 24(a0)
> +	dli	a1, 0x258c0020158efff6	/* addiu t0, 32; bne t0, t2, -40 */
> +	sd	a1, 32(a0)
> +	dli	a1, 0x25ad002003e00008	/* addiu t1, 32; jr ra */
> +	sd	a1, 40(a0)
> +	sd	zero, 48(a0)		/* nop; nop */
> +
> +	/* Synchronize the caches */
> +	sync
> +	synci	0(zero)
> +
> +	move	t0, s7
> +	move	t1, t9
> +
> +	/* Do the memcpy operation in L2 cache to copy ourself from flash
> +	 * to the L2 cache.
> +	 */
> +	jalr	a0
> +	 nop
> +
> +# else
> +	/* Copy ourself to the L2 cache from flash, 32 bytes at a time */
> +	/* This code is now written to the L2 cache using the code above */
> +1:
> +	ld	a0, 0(t0)
> +	ld	a1, 8(t0)
> +	ld	a2, 16(t0)
> +	ld	a3, 24(t0)
> +	sd	a0, 0(t1)
> +	sd	a1, 8(t1)
> +	sd	a2, 16(t1)
> +	sd	a3, 24(t1)
> +	addiu	t0, 32
> +	bne	t0, t2, 1b
> +	addiu	t1, 32
> +# endif	/* CONFIG_OCTEON_L2_MEMCPY_IN_CACHE */
> +
> +	/* Adjust the start address of U-Boot and the global pointer */
> +	subu	t0, s7, t9	/* t0 = address difference */
> +	move	s7, t9		/* Update physical address */
> +	move	s2, t9
> +	sync
> +	synci	0(zero)
> +
> +	/* Now we branch to the L2 cache.  We first get our PC then adjust it
> +	 */
> +	bal	3f
> +	 nop
> +3:
> +	/* Don't add any instructions here! */
> +	subu	t9, ra, t0
> +	/* Give ourself 16 bytes */
> +	addiu	t9, 0x10
> +
> +	jal	t9		/* Branch to address in L2 cache */
> +
> +	 nop
> +	nop
> +	/* Add instructions after here */
> +
> +	move	a7, s7
> +
> +	b	uboot_in_ram
> +	 ori	s4, 2		/* Running out of L2 cache */
> +
> +l2_cache_too_small:	/* We go here if we can't copy ourself to L2 */
> +#endif /* CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2 */
> +
> +	/* This code is only executed if booting from flash. */
> +	/*  For flash boot (_not_ RAM boot), we do a workaround for
> +	 * an LLM errata on CN38XX and CN58XX parts.
> +	 */
> +
> +uboot_in_ram:
> +	/* U-boot address is now in reg a7, and is 4 MByte aligned.
> +	 * (boot bus addressing has been adjusted to make this happen for flash,
> +	 * and for DRAM this alignment must be provided by the remote boot
> +	 * utility.
> +	 */
> +	/* See if we're in KSEG0 range, if so set EBASE register to handle
> +	 * exceptions.
> +	 */
> +	dli	a1, 0x20000000
> +	bge	a7, a1, 1f
> +	 nop
> +	/* Convert our physical address to KSEG0 */
> +	PTR_LI	a1, 0xffffffff80000000
> +	or	a1, a1, a7
> +	mtc0	a1, CP0_EBASE
> +1:
> +	/* U-boot now starts at 0xBFC00000.  Use a single 4 MByte TLB mapping
> +	 * to map u-boot.
> +	 */
> +	move	a0, a6		/* Virtual addr in a0 */
> +	dins	a0, zero, 0, 16	/* Zero out offset bits */
> +	move	a1, a7		/* Physical addr in a1 */
> +
> +	/* Now we need to remove the MIPS address space bits.  For this we
> +	 * need to determine if it is a 32 bit compatibility address or not.
> +	 */
> +
> +	/* 'lowest' address in compatibility space */
> +	PTR_LI	t0, 0xffffffff80000000
> +	dsubu	t0, t0, a1
> +	bltz	t0, compat_space
> +	 nop
> +
> +	/* We have a xkphys address, so strip off top bit */
> +	b	addr_fixup_done
> +	 dins	a1, zero, 63, 1
> +
> +compat_space:
> +	PTR_LI	a2, 0x1fffffff
> +	and	a1, a1, a2  /* Mask phy addr to remove address space bits */
> +
> +addr_fixup_done:
> +	/* Currenty the u-boot image size is limited to 4 MBytes.  In order to
> +	 * support larger images the flash mapping will need to be changed to
> +	 * be able to access more than that before C code is run.  Until that
> +	 * is done, we just use a 4 MByte mapping for the secondary cores as
> +	 * well.
> +	 */
> +	/* page size (only support 4 Meg binary size for now for core 0)
> +	 * This limitation is due to the fact that the boot vector is
> +	 * 0xBFC00000 which only makes 4MB available.  Later more flash
> +	 * address space will be available after U-Boot has been copied to
> +	 * RAM.	 For now assume that it is in flash.
> +	 */
> +	li	a2, 2*1024*1024
> +
> +	mfc0	a4, CP0_EBASE
> +	andi	a4, EBASE_CPUNUM		/* get core */
> +	beqz	a4, core_0_tlb
> +	 nop
> +
> +	/* Now determine how big a mapping to use for secondary cores,
> +	 * which need to map all of u-boot + heap in DRAM
> +	 */
> +	/* Here we look at the alignment of the the physical address,
> +	 * and use the largest page size possible.  In some cases
> +	 * this can result in an oversize mapping, but for secondary cores
> +	 * this mapping is very short lived.
> +	 */
> +
> +	/* Physical address in a1 */
> +	li	a2, 1
> +1:
> +	sll	a2, 1
> +	and	a5, a1, a2
> +	beqz	a5, 1b
> +	 nop
> +
> +	/* a2 now contains largest page size we can use */
> +core_0_tlb:
> +	JAL(single_tlb_setup)
> +
> +	/* Check if we're running from cache */
> +	bbit1	s4, 1, uboot_in_cache
> +	 nop
> +
> +	/* If we are already running from ram, we don't need to muck
> +	 * with boot bus mappings.
> +	 */
> +	PTR_LI	t2, 0xffffffffb0000000
> +	dsubu	t2, s7
> +	/* See if our starting address is lower than the boot bus */
> +	bgez	t2, uboot_in_ram2	/* If yes, booting from RAM */
> +	 nop
> +
> +uboot_in_cache:
> +#if CONFIG_OCTEON_BIG_STACK_SIZE
> +	/* The large stack is only for core 0.  For all other cores we need to
> +	 * use the L1 cache otherwise the other cores will stomp on top of each
> +	 * other unless even more space is reserved for the stack space for
> +	 * each core.  With potentially 96 cores this gets excessive.
> +	 */
> +	mfc0	v0, CP0_EBASE
> +	andi	a0, EBASE_CPUNUM
> +	bnez	a0, no_big_stack
> +	 nop
> +	PTR_LA	sp, big_stack_start
> +	daddiu	sp, -16
> +
> +no_big_stack:
> +#endif
> +	/* We now have the TLB set up, so we need to remap the boot bus.
> +	 * This is tricky, as we are running from flash, and will be changing
> +	 * the addressing of the flash.
> +	 */
> +	/* Enable movable boot bus region 0, at address 0x10000000 */
> +	PTR_LI	a4, OCTEON_MIO_BOOT_BASE
> +	dli	a5, 0x81000000	/* EN + base address 0x11000000 */
> +	sd	a5, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4)
> +
> +	/* Copy code to that remaps the boot bus to movable region */
> +	sd	zero, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4)
> +
> +	PTR_LA	a6, change_boot_mappings
> +	GETOFFSET(a5, change_boot_mappings);
> +	daddu	a5, a5, a6
> +
> +	/* The code is 16 bytes (2 DWORDS) */
> +	ld	a7, 0(a5)
> +	sd	a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4)
> +	ld	a7, 8(a5)
> +	sd	a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4)
> +
> +	/* Read from an RML register to ensure that the previous writes have
> +	 * completed before we branch to the movable region.
> +	 */
> +	ld	zero, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4)
> +
> +	/* Compute value for boot bus configuration register */
> +	/* Read region 0 config so we can _modify_ the base address field */
> +	PTR_LI	a4, OCTEON_MIO_BOOT_REG_CFG0	/* region 0 config */
> +	ld	a0, 0(a4)
> +	dli	a4, 0xf0000000		/* Mask off bits we want to save */
> +	and	a4, a4, a0
> +	dli	a0, 0x0fff0000		/* Force size to max */
> +	or	a4, a4, a0
> +
> +	move	a5, s6
> +	/* Convert to 64k blocks, as used by boot bus config */
> +	srl	a5, 16
> +	li	a6, 0x1fc0	/* 'normal' boot bus base config value */
> +	subu	a6, a6, a5	/* Subtract offset */
> +	/* combine into register value to pass to boot bus routine */
> +	or	a0, a4, a6
> +
> +	/* Branch there */
> +	PTR_LA	a1, __mapped_continue_label
> +	PTR_LI	a2, OCTEON_MIO_BOOT_REG_CFG0
> +	/* If region 0 is not enabled we can skip it */
> +	ld	a4, 0(a2)
> +	bbit0	a4, 31, __mapped_continue_label
> +	 nop
> +	li	a4, 0x10000000
> +	j	a4
> +	 synci	0(zero)
> +
> +	/* We never get here, as we go directly to __mapped_continue_label */
> +	break
> +
> +
> +uboot_in_ram2:
> +
> +	/* Now jump to address in TLB mapped memory to continue execution */
> +	PTR_LA	a4, __mapped_continue_label
> +	synci	0(a4)
> +	j	a4
> +	 nop
> +
> +__mapped_continue_label:
> +	/* Check if we are core 0, if we are not then we need
> +	 * to vector to code in DRAM to do application setup, and
> +	 * skip the rest of the bootloader.  Only core 0 runs the bootloader
> +	 * and sets up the tables that the other cores will use for
> +	 * configuration.
> +	 */
> +	mfc0	a0, CP0_EBASE
> +	andi	a0, EBASE_CPUNUM   /* get core */
> +	/* if (__all_cores_are_equal==0 && core==0),
> +	 * then jump to execute BL on core 0; else 'go to next line'
> +	 * (core_0_cont1 is executed ONLY when k0=a0=0(core0_ID))
> +	 */
> +	lw	t0, __all_cores_are_equal
> +	beq	a0, t0, core_0_cont1
> +	 nop
> +
> +	/* other cores look up addr from dram */
> +        /* DRAM controller already set up by first core */
> +        li      a1, (BOOT_VECTOR_NUM_WORDS * 4)
> +        mul     a0, a0, a1
> +
> +        /* Now find out the boot vector base address from the moveable boot
> +         * bus region.
> +         */
> +
> +        /* Get the address of the boot bus moveable region */
> +        PTR_LI     t8, OCTEON_MIO_BOOT_BASE
> +        ld      t9, OCTEON_MIO_BOOT_LOC_CFG0_OFF(t8)
> +        /* Make sure it's enabled */
> +        bbit0   t9, 31, invalid_boot_vector
> +         dext   t9, t9, 3, 24
> +        dsll    t9, t9, 7
> +        /* Make address XKPHYS */
> +	li	t0, 1
> +	dins	t9, t0, 63, 1
> +
> +        ld      t0, OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET(t9)
> +        dli     t1, OCTEON_BOOT_MOVEABLE_MAGIC1
> +        bne     t0, t1, invalid_boot_vector
> +         nop
> +
> +        /* Load base address of boot vector table */
> +        ld      t0, OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET(t9)
> +        /* Add offset for core */
> +        daddu   a1, t0, a0
> +
> +	mfc0	v0, CP0_STATUS
> +	move	v1, v0
> +	ins	v1, zero, 19, 1		/* Clear NMI bit */
> +	mtc0	v1, CP0_STATUS
> +
> +        /* Get app start function address */
> +        lw      t9, 8(a1)
> +        beqz    t9, invalid_boot_vector
> +         nop
> +
> +        j       t9
> +         lw      k0, 12(a1)      /* Load global data (deprecated) */
> +
> +invalid_boot_vector:
> +        wait
> +        b       invalid_boot_vector
> +         nop
> +
> +__all_cores_are_equal:
> +	/* The following .word tell if 'all_cores_are_equal' or core0 is special
> +	 * By default (for the first execution) the core0 should be special,
> +	 * in order to behave like the old(existing not-modified) bootloader
> +	 * and run the bootloader on core 0 to follow the existing design.
> +	 * However after that we make 'all_cores_equal' which allows to run SE
> +	 * applications on core0 like on any other core. NOTE that value written
> +	 * to '__all_cores_are_equal' should not match any core ID.
> +	 */
> +	.word 	0
> +
> +core_0_cont1:
> +	li	t0, 0xffffffff
> +	sw	t0, __all_cores_are_equal
> +	/* From here on, only core 0 runs, other cores have branched
> +	 * away.
> +	 */
> +#ifdef CONFIG_MIPS_INIT_STACK_IN_SRAM
> +	/* Set up initial stack and global data */
> +	setup_stack_gd
> +# ifdef CONFIG_DEBUG_UART
> +	PTR_LA	t9, debug_uart_init
> +	jalr	t9
> +	 nop
> +# endif
> +#endif
> +	move	a0, zero		# a0 <-- boot_flags = 0
> +	PTR_LA	t9, board_init_f
> +
> +	jr	t9
> +	 move	ra, zero
> +	END(_start)
> +
> +	.balign	8
> +	.globl	single_tlb_setup
> +	.ent	single_tlb_setup
> +	/* Sets up a single TLB entry.	Virtual/physical addresses
> +	 * must be properly aligned.
> +	 * a0  Virtual address
> +	 * a1  Physical address
> +	 * a2  page (_not_ mapping) size
> +	 */
> +single_tlb_setup:
> +	/* Determine the number of TLB entries available, and
> +	 * use the top one.
> +	 */
> +	mfc0	a3, CP0_CONFIG1
> +	dext	a3, a3, 25, 6		/* a3 now has the max mmu entry index */
> +	mfc0	a5, CP0_CONFIG3		/* Check if config4 reg present */
> +	bbit0	a5, 31, single_tlb_setup_cont
> +	 nop
> +	mfc0	a5, CP0_CONFIG4
> +	bbit0	a5, 14, single_tlb_setup_cont	/* check config4[MMUExtDef] */
> +	 nop
> +	/* append config4[MMUSizeExt] to most significant bit of
> +	 * config1[MMUSize-1]
> +	 */
> +	dins	a3, a5, 6, 8
> +	and	a3, a3, 0x3fff	/* a3 now includes max entries for cn6xxx */
> +
> +single_tlb_setup_cont:
> +
> +	/* Format physical address for entry low */
> +	nop
> +	dsrl	a1, a1, 12
> +	dsll	a1, a1, 6
> +	ori	a1, a1, 0x7	/* set DVG bits */
> +
> +	move	a4, a2
> +	daddu	a5, a4, a4	/* mapping size */
> +	dsll	a6, a4, 1
> +	daddiu	a6, a6, -1	/* pagemask */
> +	dsrl	a4, a4, 6	/* adjust for adding with entrylo */
> +
> +	/* Now set up mapping */
> +	mtc0	a6, CP0_PAGEMASK
> +	mtc0	a3, CP0_INDEX
> +
> +	dmtc0	a1, CP0_ENTRYLO0
> +	daddu	a1, a1, a4
> +
> +	dmtc0	a1, CP0_ENTRYLO1
> +	daddu	a1, a1, a4
> +
> +	dmtc0	a0, CP0_ENTRYHI
> +	daddu	a0, a0, a5
> +
> +	ehb
> +	tlbwi
> +	jr  ra
> +	 nop
> +	.end   single_tlb_setup
> +
> +
> +/**
> + * This code is moved to a movable boot bus region,
> + * and it is responsible for changing the flash mappings and
> + * jumping to run from the TLB mapped address.
> + *
> + * @param a0	New address for boot bus region 0
> + * @param a1	Address to branch to afterwards
> + * @param a2	Address of MIO_BOOT_REG_CFG0
> + */
> +	.balign	8
> +change_boot_mappings:
> +	sd	a0, 0(a2)
> +	sync
> +	j a1	    /* Jump to new TLB mapped location */
> +	 synci	0(zero)
> +
> +/* If we need a large stack, allocate it here. */
> +#if CONFIG_OCTEON_BIG_STACK_SIZE
> +	/* Allocate the stack here so it's in L2 cache or DRAM */
> +	.balign	16
> +big_stack_end:
> +	.skip	CONFIG_OCTEON_BIG_STACK_SIZE, 0
> +big_stack_start:
> +	.dword	0
> +#endif
> 

-- 
- Daniel


More information about the U-Boot mailing list