[U-Boot] [PATCH v15 07/10] arm64: core support

Bhupesh SHARMA bhupesh.linux at gmail.com
Wed Nov 27 21:38:32 CET 2013


Hi David,

Thanks for the patch.
Some comments/doubts in-line:

On 11/15/2013 9:15 AM, fenghua at phytium.com.cn wrote:
> From: David Feng <fenghua at phytium.com.cn>
>
> Relocation code based on a patch by Scott Wood, which is:
> Signed-off-by: Scott Wood <scottwood at freescale.com>
>
> Signed-off-by: David Feng <fenghua at phytium.com.cn>
> ---
>   arch/arm/config.mk                      |    3 +-
>   arch/arm/cpu/armv8/Makefile             |   17 +++
>   arch/arm/cpu/armv8/cache.S              |  136 +++++++++++++++++++
>   arch/arm/cpu/armv8/cache_v8.c           |  219 +++++++++++++++++++++++++++++++
>   arch/arm/cpu/armv8/config.mk            |   15 +++
>   arch/arm/cpu/armv8/cpu.c                |   43 ++++++
>   arch/arm/cpu/armv8/exceptions.S         |  113 ++++++++++++++++
>   arch/arm/cpu/armv8/generic_timer.c      |   31 +++++
>   arch/arm/cpu/armv8/gic.S                |  106 +++++++++++++++
>   arch/arm/cpu/armv8/start.S              |  164 +++++++++++++++++++++++
>   arch/arm/cpu/armv8/tlb.S                |   34 +++++
>   arch/arm/cpu/armv8/transition.S         |   83 ++++++++++++
>   arch/arm/cpu/armv8/u-boot.lds           |   89 +++++++++++++
>   arch/arm/include/asm/armv8/mmu.h        |  111 ++++++++++++++++
>   arch/arm/include/asm/byteorder.h        |   12 ++
>   arch/arm/include/asm/cache.h            |    5 +
>   arch/arm/include/asm/config.h           |    6 +
>   arch/arm/include/asm/gic.h              |   49 ++++++-
>   arch/arm/include/asm/global_data.h      |    6 +-
>   arch/arm/include/asm/io.h               |   15 ++-
>   arch/arm/include/asm/macro.h            |   53 ++++++++
>   arch/arm/include/asm/posix_types.h      |   10 ++
>   arch/arm/include/asm/proc-armv/ptrace.h |   21 +++
>   arch/arm/include/asm/proc-armv/system.h |   59 ++++++++-
>   arch/arm/include/asm/system.h           |   84 ++++++++++++
>   arch/arm/include/asm/types.h            |    4 +
>   arch/arm/include/asm/u-boot.h           |    4 +
>   arch/arm/include/asm/unaligned.h        |    2 +-
>   arch/arm/lib/Makefile                   |   20 ++-
>   arch/arm/lib/board.c                    |    7 +-
>   arch/arm/lib/bootm.c                    |   24 ++++
>   arch/arm/lib/crt0_64.S                  |  113 ++++++++++++++++
>   arch/arm/lib/interrupts_64.c            |  120 +++++++++++++++++
>   arch/arm/lib/relocate_64.S              |   58 ++++++++
>   common/image.c                          |    1 +
>   doc/README.arm64                        |   46 +++++++
>   examples/standalone/stubs.c             |   15 +++
>   include/image.h                         |    1 +
>   38 files changed, 1878 insertions(+), 21 deletions(-)
>   create mode 100644 arch/arm/cpu/armv8/Makefile
>   create mode 100644 arch/arm/cpu/armv8/cache.S
>   create mode 100644 arch/arm/cpu/armv8/cache_v8.c
>   create mode 100644 arch/arm/cpu/armv8/config.mk
>   create mode 100644 arch/arm/cpu/armv8/cpu.c
>   create mode 100644 arch/arm/cpu/armv8/exceptions.S
>   create mode 100644 arch/arm/cpu/armv8/generic_timer.c
>   create mode 100644 arch/arm/cpu/armv8/gic.S
>   create mode 100644 arch/arm/cpu/armv8/start.S
>   create mode 100644 arch/arm/cpu/armv8/tlb.S
>   create mode 100644 arch/arm/cpu/armv8/transition.S
>   create mode 100644 arch/arm/cpu/armv8/u-boot.lds
>   create mode 100644 arch/arm/include/asm/armv8/mmu.h
>   create mode 100644 arch/arm/lib/crt0_64.S
>   create mode 100644 arch/arm/lib/interrupts_64.c
>   create mode 100644 arch/arm/lib/relocate_64.S
>   create mode 100644 doc/README.arm64
>
> diff --git a/arch/arm/config.mk b/arch/arm/config.mk
> index bdabcf4..49cc7cc 100644
> --- a/arch/arm/config.mk
> +++ b/arch/arm/config.mk
> @@ -17,7 +17,8 @@ endif
>
>   LDFLAGS_FINAL += --gc-sections
>   PLATFORM_RELFLAGS += -ffunction-sections -fdata-sections \
> -		     -fno-common -ffixed-r9 -msoft-float
> +		     -fno-common -ffixed-r9
> +PLATFORM_RELFLAGS += $(call cc-option, -msoft-float)
>
>   # Support generic board on ARM
>   __HAVE_ARCH_GENERIC_BOARD := y
> diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
> new file mode 100644
> index 0000000..b6eb6de
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/Makefile
> @@ -0,0 +1,17 @@
> +#
> +# (C) Copyright 2000-2003
> +# Wolfgang Denk, DENX Software Engineering, wd at denx.de.
> +#
> +# SPDX-License-Identifier:	GPL-2.0+
> +#
> +
> +extra-y	:= start.o
> +
> +obj-y	+= cpu.o
> +obj-y	+= generic_timer.o
> +obj-y	+= cache_v8.o
> +obj-y	+= exceptions.o
> +obj-y	+= cache.o
> +obj-y	+= tlb.o
> +obj-y	+= gic.o
> +obj-y	+= transition.o
> diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S
> new file mode 100644
> index 0000000..546a83e
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/cache.S
> @@ -0,0 +1,136 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua at phytium.com.cn>
> + *
> + * This file is based on sample code from ARMv8 ARM.
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <version.h>
> +#include <asm/macro.h>
> +#include <linux/linkage.h>
> +
> +/*
> + * void __asm_flush_dcache_level(level)
> + *
> + * clean and invalidate one level cache.
> + *
> + * x0: cache level
> + * x1~x9: clobbered
> + */
> +ENTRY(__asm_flush_dcache_level)
> +	lsl	x1, x0, #1
> +	msr	csselr_el1, x1		/* select cache level */
> +	isb				/* sync change of cssidr_el1 */
> +	mrs	x6, ccsidr_el1		/* read the new cssidr_el1 */
> +	and	x2, x6, #7		/* x2 <- log2(cache line size)-4 */
> +	add	x2, x2, #4		/* x2 <- log2(cache line size) */
> +	mov	x3, #0x3ff
> +	and	x3, x3, x6, lsr #3	/* x3 <- max number of #ways */
> +	add	w4, w3, w3
> +	sub	w4, w4, 1		/* round up log2(#ways + 1) */
> +	clz	w5, w4			/* bit position of #ways */
> +	mov	x4, #0x7fff
> +	and	x4, x4, x6, lsr #13	/* x4 <- max number of #sets */
> +	/* x1 <- cache level << 1 */
> +	/* x2 <- line length offset */
> +	/* x3 <- number of cache ways - 1 */
> +	/* x4 <- number of cache sets - 1 */
> +	/* x5 <- bit position of #ways */
> +
> +loop_set:
> +	mov	x6, x3			/* x6 <- working copy of #ways */
> +loop_way:
> +	lsl	x7, x6, x5
> +	orr	x9, x1, x7		/* map way and level to cisw value */
> +	lsl	x7, x4, x2
> +	orr	x9, x9, x7		/* map set number to cisw value */
> +	dc	cisw, x9		/* clean & invalidate by set/way */
> +	subs	x6, x6, #1		/* decrement the way */
> +	b.ge	loop_way
> +	subs	x4, x4, #1		/* decrement the set */
> +	b.ge	loop_set
> +
> +	ret
> +ENDPROC(__asm_flush_dcache_level)
> +
> +/*
> + * void __asm_flush_dcache_all(void)
> + *
> + * clean and invalidate all data cache by SET/WAY.
> + */
> +ENTRY(__asm_flush_dcache_all)
> +	dsb	sy
> +	mrs	x10, clidr_el1		/* read clidr_el1 */
> +	lsr	x11, x10, #24
> +	and	x11, x11, #0x7		/* x11 <- loc */
> +	cbz	x11, finished		/* if loc is 0, exit */
> +	mov	x15, lr
> +	mov	x0, #0			/* start flush at cache level 0 */
> +	/* x0  <- cache level */
> +	/* x10 <- clidr_el1 */
> +	/* x11 <- loc */
> +	/* x15 <- return address */
> +
> +loop_level:
> +	lsl	x1, x0, #1
> +	add	x1, x1, x0		/* x0 <- tripled cache level */
> +	lsr	x1, x10, x1
> +	and	x1, x1, #7		/* x1 <- cache type */
> +	cmp	x1, #2
> +	b.lt	skip			/* skip if no cache or icache */
> +	bl	__asm_flush_dcache_level
> +skip:
> +	add	x0, x0, #1		/* increment cache level */
> +	cmp	x11, x0
> +	b.gt	loop_level
> +
> +	mov	x0, #0
> +	msr	csselr_el1, x0		/* resotre csselr_el1 */
> +	dsb	sy
> +	isb
> +	mov	lr, x15
> +
> +finished:
> +	ret
> +ENDPROC(__asm_flush_dcache_all)
> +
> +/*
> + * void __asm_flush_dcache_range(start, end)
> + *
> + * clean & invalidate data cache in the range
> + *
> + * x0: start address
> + * x1: end address
> + */
> +ENTRY(__asm_flush_dcache_range)
> +	mrs	x3, ctr_el0
> +	lsr	x3, x3, #16
> +	and	x3, x3, #0xf
> +	mov	x2, #4
> +	lsl	x2, x2, x3		/* cache line size */
> +
> +	/* x2 <- minimal cache line size in cache system */
> +	sub	x3, x2, #1
> +	bic	x0, x0, x3
> +1:	dc	civac, x0	/* clean & invalidate data or unified cache */
> +	add	x0, x0, x2
> +	cmp	x0, x1
> +	b.lo	1b
> +	dsb	sy
> +	ret
> +ENDPROC(__asm_flush_dcache_range)
> +
> +/*
> + * void __asm_invalidate_icache_all(void)
> + *
> + * invalidate all tlb entries.
> + */
> +ENTRY(__asm_invalidate_icache_all)
> +	ic	ialluis
> +	isb	sy
> +	ret
> +ENDPROC(__asm_invalidate_icache_all)
> diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
> new file mode 100644
> index 0000000..131fdab
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/cache_v8.c
> @@ -0,0 +1,219 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua at phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <common.h>
> +#include <asm/system.h>
> +#include <asm/armv8/mmu.h>
> +
> +DECLARE_GLOBAL_DATA_PTR;
> +
> +#ifndef CONFIG_SYS_DCACHE_OFF
> +
> +static void set_pgtable_section(u64 section, u64 memory_type)
> +{
> +	u64 *page_table = (u64 *)gd->arch.tlb_addr;
> +	u64 value;
> +
> +	value = (section << SECTION_SHIFT) | PMD_TYPE_SECT | PMD_SECT_AF;
> +	value |= PMD_ATTRINDX(memory_type);
> +	page_table[section] = value;
> +}
> +
> +/* to activate the MMU we need to set up virtual memory */
> +static void mmu_setup(void)
> +{
> +	int i, j, el;
> +	bd_t *bd = gd->bd;
> +
> +	/* Setup an identity-mapping for all spaces */
> +	for (i = 0; i < (PGTABLE_SIZE >> 3); i++)
> +		set_pgtable_section(i, MT_DEVICE_NGNRNE);
> +
> +	/* Setup an identity-mapping for all RAM space */
> +	for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) {
> +		ulong start = bd->bi_dram[i].start;
> +		ulong end = bd->bi_dram[i].start + bd->bi_dram[i].size;
> +		for (j = start >> SECTION_SHIFT;
> +		     j < end >> SECTION_SHIFT; j++) {
> +			set_pgtable_section(j, MT_NORMAL);
> +		}
> +	}
> +
> +	/* load TTBR0 */
> +	el = current_el();
> +	if (el == 1)
> +		asm volatile("msr ttbr0_el1, %0"
> +			     : : "r" (gd->arch.tlb_addr) : "memory");
> +	else if (el == 2)
> +		asm volatile("msr ttbr0_el2, %0"
> +			     : : "r" (gd->arch.tlb_addr) : "memory");
> +	else
> +		asm volatile("msr ttbr0_el3, %0"
> +			     : : "r" (gd->arch.tlb_addr) : "memory");
> +
> +	/* enable the mmu */
> +	set_sctlr(get_sctlr() | CR_M);
> +}
> +
> +/*
> + * Performs a invalidation of the entire data cache at all levels
> + */
> +void invalidate_dcache_all(void)
> +{
> +	__asm_flush_dcache_all();
> +}
> +
> +/*
> + * Performs a clean & invalidation of the entire data cache at all levels
> + */
> +void flush_dcache_all(void)
> +{
> +	__asm_flush_dcache_all();
> +}
> +
> +/*
> + * Invalidates range in all levels of D-cache/unified cache
> + */
> +void invalidate_dcache_range(unsigned long start, unsigned long stop)
> +{
> +	__asm_flush_dcache_range(start, stop);
> +}
> +
> +/*
> + * Flush range(clean & invalidate) from all levels of D-cache/unified cache
> + */
> +void flush_dcache_range(unsigned long start, unsigned long stop)
> +{
> +	__asm_flush_dcache_range(start, stop);
> +}
> +
> +void dcache_enable(void)
> +{
> +	/* The data cache is not active unless the mmu is enabled */
> +	if (!(get_sctlr() & CR_M)) {
> +		invalidate_dcache_all();
> +		__asm_invalidate_tlb_all();
> +		mmu_setup();
> +	}
> +
> +	set_sctlr(get_sctlr() | CR_C);
> +}
> +
> +void dcache_disable(void)
> +{
> +	uint32_t sctlr;
> +
> +	sctlr = get_sctlr();
> +
> +	/* if cache isn't enabled no need to disable */
> +	if (!(sctlr & CR_C))
> +		return;
> +
> +	set_sctlr(sctlr & ~(CR_C|CR_M));
> +
> +	flush_dcache_all();
> +	__asm_invalidate_tlb_all();
> +}
> +
> +int dcache_status(void)
> +{
> +	return (get_sctlr() & CR_C) != 0;
> +}
> +
> +#else	/* CONFIG_SYS_DCACHE_OFF */
> +
> +void invalidate_dcache_all(void)
> +{
> +}
> +
> +void flush_dcache_all(void)
> +{
> +}
> +
> +void invalidate_dcache_range(unsigned long start, unsigned long stop)
> +{
> +}
> +
> +void flush_dcache_range(unsigned long start, unsigned long stop)
> +{
> +}
> +
> +void dcache_enable(void)
> +{
> +}
> +
> +void dcache_disable(void)
> +{
> +}
> +
> +int dcache_status(void)
> +{
> +	return 0;
> +}
> +
> +#endif	/* CONFIG_SYS_DCACHE_OFF */
> +
> +#ifndef CONFIG_SYS_ICACHE_OFF
> +
> +void icache_enable(void)
> +{
> +	set_sctlr(get_sctlr() | CR_I);
> +}
> +
> +void icache_disable(void)
> +{
> +	set_sctlr(get_sctlr() & ~CR_I);
> +}
> +
> +int icache_status(void)
> +{
> +	return (get_sctlr() & CR_I) != 0;
> +}
> +
> +void invalidate_icache_all(void)
> +{
> +	__asm_invalidate_icache_all();
> +}
> +
> +#else	/* CONFIG_SYS_ICACHE_OFF */
> +
> +void icache_enable(void)
> +{
> +}
> +
> +void icache_disable(void)
> +{
> +}
> +
> +int icache_status(void)
> +{
> +	return 0;
> +}
> +
> +void invalidate_icache_all(void)
> +{
> +}
> +
> +#endif	/* CONFIG_SYS_ICACHE_OFF */
> +
> +/*
> + * Enable dCache & iCache, whether cache is actually enabled
> + * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF
> + */
> +void enable_caches(void)
> +{
> +	icache_enable();
> +	dcache_enable();
> +}
> +
> +/*
> + * Flush range from all levels of d-cache/unified-cache
> + */
> +void flush_cache(unsigned long start, unsigned long size)
> +{
> +	flush_dcache_range(start, start + size);
> +}
> diff --git a/arch/arm/cpu/armv8/config.mk b/arch/arm/cpu/armv8/config.mk
> new file mode 100644
> index 0000000..027a68c
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/config.mk
> @@ -0,0 +1,15 @@
> +#
> +# (C) Copyright 2002
> +# Gary Jennejohn, DENX Software Engineering, <garyj at denx.de>
> +#
> +# SPDX-License-Identifier:	GPL-2.0+
> +#
> +PLATFORM_RELFLAGS += -fno-common -ffixed-x18
> +
> +# SEE README.arm-unaligned-accesses
> +PF_NO_UNALIGNED := $(call cc-option, -mstrict-align)
> +PLATFORM_NO_UNALIGNED := $(PF_NO_UNALIGNED)
> +
> +PF_CPPFLAGS_ARMV8 := $(call cc-option, -march=armv8-a)
> +PLATFORM_CPPFLAGS += $(PF_CPPFLAGS_ARMV8)
> +PLATFORM_CPPFLAGS += $(PF_NO_UNALIGNED)
> diff --git a/arch/arm/cpu/armv8/cpu.c b/arch/arm/cpu/armv8/cpu.c
> new file mode 100644
> index 0000000..e06c3cc
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/cpu.c
> @@ -0,0 +1,43 @@
> +/*
> + * (C) Copyright 2008 Texas Insturments
> + *
> + * (C) Copyright 2002
> + * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
> + * Marius Groeger <mgroeger at sysgo.de>
> + *
> + * (C) Copyright 2002
> + * Gary Jennejohn, DENX Software Engineering, <garyj at denx.de>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <common.h>
> +#include <command.h>
> +#include <asm/system.h>
> +#include <linux/compiler.h>
> +
> +int cleanup_before_linux(void)
> +{
> +	/*
> +	 * this function is called just before we call linux
> +	 * it prepares the processor for linux
> +	 *
> +	 * disable interrupt and turn off caches etc ...
> +	 */
> +	disable_interrupts();
> +
> +	/*
> +	 * Turn off I-cache and invalidate it
> +	 */
> +	icache_disable();
> +	invalidate_icache_all();
> +
> +	/*
> +	 * turn off D-cache
> +	 * dcache_disable() in turn flushes the d-cache and disables MMU
> +	 */
> +	dcache_disable();
> +	invalidate_dcache_all();
> +
> +	return 0;
> +}
> diff --git a/arch/arm/cpu/armv8/exceptions.S b/arch/arm/cpu/armv8/exceptions.S
> new file mode 100644
> index 0000000..b91a1b6
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/exceptions.S
> @@ -0,0 +1,113 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua at phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <version.h>
> +#include <asm/ptrace.h>
> +#include <asm/macro.h>
> +#include <linux/linkage.h>
> +
> +/*
> + * Enter Exception.
> + * This will save the processor state that is ELR/X0~X30
> + * to the stack frame.
> + */
> +.macro	exception_entry
> +	stp	x29, x30, [sp, #-16]!
> +	stp	x27, x28, [sp, #-16]!
> +	stp	x25, x26, [sp, #-16]!
> +	stp	x23, x24, [sp, #-16]!
> +	stp	x21, x22, [sp, #-16]!
> +	stp	x19, x20, [sp, #-16]!
> +	stp	x17, x18, [sp, #-16]!
> +	stp	x15, x16, [sp, #-16]!
> +	stp	x13, x14, [sp, #-16]!
> +	stp	x11, x12, [sp, #-16]!
> +	stp	x9, x10, [sp, #-16]!
> +	stp	x7, x8, [sp, #-16]!
> +	stp	x5, x6, [sp, #-16]!
> +	stp	x3, x4, [sp, #-16]!
> +	stp	x1, x2, [sp, #-16]!
> +
> +	/* Could be running at EL3/EL2/EL1 */
> +	switch_el x11, 3f, 2f, 1f
> +3:	mrs	x1, esr_el3
> +	mrs	x2, elr_el3
> +	b	0f
> +2:	mrs	x1, esr_el2
> +	mrs	x2, elr_el2
> +	b	0f
> +1:	mrs	x1, esr_el1
> +	mrs	x2, elr_el1
> +0:
> +	stp	x2, x0, [sp, #-16]!
> +	mov	x0, sp
> +.endm
> +
> +/*
> + * Exception vectors.
> + */
> +	.align	11
> +	.globl	vectors
> +vectors:
> +	.align	7
> +	b	_do_bad_sync	/* Current EL Synchronous Thread */
> +
> +	.align	7
> +	b	_do_bad_irq	/* Current EL IRQ Thread */
> +
> +	.align	7
> +	b	_do_bad_fiq	/* Current EL FIQ Thread */
> +
> +	.align	7
> +	b	_do_bad_error	/* Current EL Error Thread */
> +
> +	.align	7
> +	b	_do_sync	/* Current EL Synchronous Handler */
> +
> +	.align	7
> +	b	_do_irq		/* Current EL IRQ Handler */
> +
> +	.align	7
> +	b	_do_fiq		/* Current EL FIQ Handler */
> +
> +	.align	7
> +	b	_do_error	/* Current EL Error Handler */
> +
> +
> +_do_bad_sync:
> +	exception_entry
> +	bl	do_bad_sync
> +
> +_do_bad_irq:
> +	exception_entry
> +	bl	do_bad_irq
> +
> +_do_bad_fiq:
> +	exception_entry
> +	bl	do_bad_fiq
> +
> +_do_bad_error:
> +	exception_entry
> +	bl	do_bad_error
> +
> +_do_sync:
> +	exception_entry
> +	bl	do_sync
> +
> +_do_irq:
> +	exception_entry
> +	bl	do_irq
> +
> +_do_fiq:
> +	exception_entry
> +	bl	do_fiq
> +
> +_do_error:
> +	exception_entry
> +	bl	do_error
> diff --git a/arch/arm/cpu/armv8/generic_timer.c b/arch/arm/cpu/armv8/generic_timer.c
> new file mode 100644
> index 0000000..223b95e
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/generic_timer.c
> @@ -0,0 +1,31 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua at phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <common.h>
> +#include <command.h>
> +#include <asm/system.h>
> +
> +/*
> + * Generic timer implementation of get_tbclk()
> + */
> +unsigned long get_tbclk(void)
> +{
> +	unsigned long cntfrq;
> +	asm volatile("mrs %0, cntfrq_el0" : "=r" (cntfrq));
> +	return cntfrq;
> +}
> +
> +/*
> + * Generic timer implementation of timer_read_counter()
> + */
> +unsigned long timer_read_counter(void)
> +{
> +	unsigned long cntpct;
> +	isb();
> +	asm volatile("mrs %0, cntpct_el0" : "=r" (cntpct));
> +	return cntpct;
> +}
> diff --git a/arch/arm/cpu/armv8/gic.S b/arch/arm/cpu/armv8/gic.S

The ARMv8 foundation model has support for GICv2 while GICv3 is actually
compatible to ARMv8. So although you mention in the cover letter that
this is currently GICv2 support, now while trying to add GICv3 support
it will be difficult to envision GICv2 code in 'arch/arm/cpu/armv8/' 
directory.

Infact GICv2 is compatible with ARMv7 and as secure and non-secure 
copies of GIC registers are equally applicable to ARMv7, would it make
sense to keep the GICv2 code at a place where both ARMv7 and ARMv8 can 
use it?

Can we reuse something from [1] for GICv2:

[1] 
http://git.denx.de/?p=u-boot.git;a=blob;f=arch/arm/cpu/armv7/nonsec_virt.S;h=24b4c18bd452fa155bcd5ed94c755aa05a33efe7;hb=HEAD#l88

> new file mode 100644
> index 0000000..599aa8f
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/gic.S
> @@ -0,0 +1,106 @@
> +/*
> + * GIC Initialization Routines.
> + *
> + * (C) Copyright 2013
> + * David Feng <fenghua at phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <linux/linkage.h>
> +#include <asm/macro.h>
> +#include <asm/gic.h>
> +
> +
> +/*************************************************************************
> + *
> + * void gic_init(void) __attribute__((weak));
> + *
> + * Currently, this routine only initialize secure copy of GIC
> + * with Security Extensions at EL3.
> + *
> + *************************************************************************/
> +WEAK(gic_init)
> +	branch_if_slave	x0, 2f
> +
> +	/* Initialize Distributor and SPIs */
> +	ldr	x1, =GICD_BASE
> +	mov	w0, #0x3		/* EnableGrp0 | EnableGrp1 */
> +	str	w0, [x1, GICD_CTLR]	/* Secure GICD_CTLR */
> +	ldr	w0, [x1, GICD_TYPER]
> +	and	w2, w0, #0x1f		/* ITLinesNumber */
> +	cbz	w2, 2f			/* No SPIs */
> +	add	x1, x1, (GICD_IGROUPRn + 4)
> +	mov	w0, #~0			/* Config SPIs as Grp1 */
> +1:	str	w0, [x1], #0x4
> +	sub	w2, w2, #0x1
> +	cbnz	w2, 1b
> +
> +	/* Initialize SGIs and PPIs */
> +2:	ldr	x1, =GICD_BASE
> +	mov	w0, #~0			/* Config SGIs and PPIs as Grp1 */
> +	str	w0, [x1, GICD_IGROUPRn]	/* GICD_IGROUPR0 */
> +	mov	w0, #0x1		/* Enable SGI 0 */
> +	str	w0, [x1, GICD_ISENABLERn]
> +
> +	/* Initialize Cpu Interface */
> +	ldr	x1, =GICC_BASE
> +	mov	w0, #0x1e7		/* Disable IRQ/FIQ Bypass & */
> +					/* Enable Ack Group1 Interrupt & */
> +					/* EnableGrp0 & EnableGrp1 */
> +	str	w0, [x1, GICC_CTLR]	/* Secure GICC_CTLR */
> +
> +	mov	w0, #0x1 << 7		/* Non-Secure access to GICC_PMR */
> +	str	w0, [x1, GICC_PMR]
> +
> +	ret
> +ENDPROC(gic_init)
> +
> +
> +/*************************************************************************
> + *
> + * void gic_send_sgi(u64 sgi) __attribute__((weak));
> + *
> + *************************************************************************/
> +WEAK(gic_send_sgi)
> +	ldr	x1, =GICD_BASE
> +	mov	w2, #0x8000
> +	movk	w2, #0x100, lsl #16
> +	orr	w2, w2, w0
> +	str	w2, [x1, GICD_SGIR]
> +	ret
> +ENDPROC(gic_send_sgi)
> +
> +
> +/*************************************************************************
> + *
> + * void wait_for_wakeup(void) __attribute__((weak));
> + *
> + * Wait for SGI 0 from master.
> + *
> + *************************************************************************/
> +WEAK(wait_for_wakeup)
> +	ldr	x1, =GICC_BASE
> +0:	wfi
> +	ldr	w0, [x1, GICC_AIAR]
> +	str	w0, [x1, GICC_AEOIR]
> +	cbnz	w0, 0b
> +	ret
> +ENDPROC(wait_for_wakeup)
> +
> +
> +/*************************************************************************
> + *
> + * void smp_kick_all_cpus(void) __attribute__((weak));
> + *
> + *************************************************************************/
> +WEAK(smp_kick_all_cpus)
> +	/* Kick secondary cpus up by SGI 0 interrupt */
> +	mov	x0, xzr			/* SGI 0 */
> +	mov	x29, lr			/* Save LR */
> +	bl	gic_send_sgi
> +	mov	lr, x29			/* Restore LR */
> +	ret
> +ENDPROC(smp_kick_all_cpus)
> diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
> new file mode 100644
> index 0000000..bcc2603
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/start.S
> @@ -0,0 +1,164 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua at phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <version.h>
> +#include <linux/linkage.h>
> +#include <asm/macro.h>
> +#include <asm/armv8/mmu.h>
> +
> +/*************************************************************************
> + *
> + * Startup Code (reset vector)
> + *
> + *************************************************************************/
> +
> +.globl	_start
> +_start:
> +	b	reset
> +
> +	.align 3
> +
> +.globl	_TEXT_BASE
> +_TEXT_BASE:
> +	.quad	CONFIG_SYS_TEXT_BASE
> +
> +/*
> + * These are defined in the linker script.
> + */
> +.globl	_end_ofs
> +_end_ofs:
> +	.quad	_end - _start
> +
> +.globl	_bss_start_ofs
> +_bss_start_ofs:
> +	.quad	__bss_start - _start
> +
> +.globl	_bss_end_ofs
> +_bss_end_ofs:
> +	.quad	__bss_end - _start
> +
> +reset:
> +	/*
> +	 * Could be EL3/EL2/EL1, Initial State:
> +	 * Little Endian, MMU Disabled, i/dCache Disabled
> +	 */
> +	adr	x0, vectors
> +	switch_el x1, 3f, 2f, 1f
> +3:	msr	vbar_el3, x0
> +	msr	cptr_el3, xzr			/* Enable FP/SIMD */
> +	ldr	x0, =COUNTER_FREQUENCY
> +	msr	cntfrq_el0, x0			/* Initialize CNTFRQ */
> +	b	0f
> +2:	msr	vbar_el2, x0
> +	mov	x0, #0x33ff
> +	msr	cptr_el2, x0			/* Enable FP/SIMD */
> +	b	0f
> +1:	msr	vbar_el1, x0
> +	mov	x0, #3 << 20
> +	msr	cpacr_el1, x0			/* Enable FP/SIMD */
> +0:
> +
> +	/* Cache/BPB/TLB Invalidate */
> +	bl	__asm_flush_dcache_all		/* dCache clean&invalidate */
> +	bl	__asm_invalidate_icache_all	/* iCache invalidate */
> +	bl	__asm_invalidate_tlb_all	/* invalidate TLBs */
> +
> +	/* Processor specific initialization */
> +	bl	lowlevel_init

Shouldn't this call be protected inside a
'#ifndef CONFIG_SKIP_LOWLEVEL_INIT'?

> +
> +	branch_if_master x0, x1, master_cpu
> +
> +	/*
> +	 * Slave CPUs
> +	 */
> +slave_cpu:
> +	wfe
> +	ldr	x1, =CPU_RELEASE_ADDR
> +	ldr	x0, [x1]
> +	cbz	x0, slave_cpu
> +	br	x0			/* branch to the given address */
> +
> +	/*
> +	 * Master CPU
> +	 */
> +master_cpu:
> +	bl	_main
> +
> +/*-----------------------------------------------------------------------*/
> +
> +WEAK(lowlevel_init)

Ok, so this means that a specific SoC lowlevel_init implementation can 
override this generic implementation. Because I sure other 
secure/non-secure settings need to be put into place for ARM IPs like 
SMMU-500.

> +	/* Initialize GIC Secure Bank Status */
> +	mov	x29, lr			/* Save LR */
> +	bl	gic_init
> +
> +	branch_if_master x0, x1, 1f
> +
> +	/*
> +	 * Slave should wait for master clearing spin table.
> +	 * This sync prevent salves observing incorrect
> +	 * value of spin table and jumping to wrong place.
> +	 */
> +	bl	wait_for_wakeup
> +
> +	/*
> +	 * All processors will enter EL2 and optionally EL1.
> +	 */
> +	bl	armv8_switch_to_el2
> +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
> +	bl	armv8_switch_to_el1
> +#endif
> +
> +1:
> +	mov	lr, x29			/* Restore LR */
> +	ret
> +ENDPROC(lowlevel_init)
> +
> +/*-----------------------------------------------------------------------*/
> +
> +ENTRY(c_runtime_cpu_setup)
> +	/* If I-cache is enabled invalidate it */
> +#ifndef CONFIG_SYS_ICACHE_OFF
> +	ic	iallu			/* I+BTB cache invalidate */
> +	isb	sy
> +#endif
> +
> +#ifndef CONFIG_SYS_DCACHE_OFF
> +	/*
> +	 * Setup MAIR and TCR.
> +	 */
> +	ldr	x0, =MEMORY_ATTRIBUTES
> +	ldr	x1, =TCR_FLAGS
> +
> +	switch_el x2, 3f, 2f, 1f
> +3:	orr	x1, x1, TCR_EL3_IPS_BITS
> +	msr	mair_el3, x0
> +	msr	tcr_el3, x1
> +	b	0f
> +2:	orr	x1, x1, TCR_EL2_IPS_BITS
> +	msr	mair_el2, x0
> +	msr	tcr_el2, x1
> +	b	0f
> +1:	orr	x1, x1, TCR_EL1_IPS_BITS
> +	msr	mair_el1, x0
> +	msr	tcr_el1, x1
> +0:
> +#endif
> +
> +	/* Relocate vBAR */
> +	adr	x0, vectors
> +	switch_el x1, 3f, 2f, 1f
> +3:	msr	vbar_el3, x0
> +	b	0f
> +2:	msr	vbar_el2, x0
> +	b	0f
> +1:	msr	vbar_el1, x0
> +0:
> +
> +	ret
> +ENDPROC(c_runtime_cpu_setup)
> diff --git a/arch/arm/cpu/armv8/tlb.S b/arch/arm/cpu/armv8/tlb.S
> new file mode 100644
> index 0000000..f840b04
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/tlb.S
> @@ -0,0 +1,34 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua at phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <version.h>
> +#include <linux/linkage.h>
> +#include <asm/macro.h>
> +
> +/*
> + * void __asm_invalidate_tlb_all(void)
> + *
> + * invalidate all tlb entries.
> + */
> +ENTRY(__asm_invalidate_tlb_all)
> +	switch_el x9, 3f, 2f, 1f
> +3:	tlbi	alle3
> +	dsb	sy
> +	isb
> +	b	0f
> +2:	tlbi	alle2
> +	dsb	sy
> +	isb
> +	b	0f
> +1:	tlbi	vmalle1
> +	dsb	sy
> +	isb
> +0:
> +	ret
> +ENDPROC(__asm_invalidate_tlb_all)
> diff --git a/arch/arm/cpu/armv8/transition.S b/arch/arm/cpu/armv8/transition.S
> new file mode 100644
> index 0000000..e0a5946
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/transition.S
> @@ -0,0 +1,83 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua at phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <version.h>
> +#include <linux/linkage.h>
> +#include <asm/macro.h>
> +
> +ENTRY(armv8_switch_to_el2)

Do we need a switch to Secure Monitor here? I am not able to relate how 
this with the present ARMv7 code (see [2]):

[2] 
http://git.denx.de/?p=u-boot.git;a=blob;f=arch/arm/cpu/armv7/nonsec_virt.S;h=24b4c18bd452fa155bcd5ed94c755aa05a33efe7;hb=HEAD#l29


> +	switch_el x0, 1f, 0f, 0f
> +0:	ret
> +1:
> +	mov	x0, #0x5b1	/* Non-secure EL0/EL1 | HVC | 64bit EL2 */
> +	msr	scr_el3, x0
> +	msr	cptr_el3, xzr	/* Disable coprocessor traps to EL3 */
> +	mov	x0, #0x33ff
> +	msr	cptr_el2, x0	/* Disable coprocessor traps to EL2 */
> +
> +	/* Initialize SCTLR_EL2 */
> +	msr	sctlr_el2, xzr
> +
> +	/* Return to the EL2_SP2 mode from EL3 */
> +	mov	x0, sp
> +	msr	sp_el2, x0	/* Migrate SP */
> +	mrs	x0, vbar_el3
> +	msr	vbar_el2, x0	/* Migrate VBAR */
> +	mov	x0, #0x3c9
> +	msr	spsr_el3, x0	/* EL2_SP2 | D | A | I | F */
> +	msr	elr_el3, lr
> +	eret
> +ENDPROC(armv8_switch_to_el2)
> +
> +ENTRY(armv8_switch_to_el1)
> +	switch_el x0, 0f, 1f, 0f
> +0:	ret
> +1:
> +	/* Initialize Generic Timers */
> +	mrs	x0, cnthctl_el2
> +	orr	x0, x0, #0x3		/* Enable EL1 access to timers */
> +	msr	cnthctl_el2, x0
> +	msr	cntvoff_el2, x0
> +	mrs	x0, cntkctl_el1
> +	orr	x0, x0, #0x3		/* Enable EL0 access to timers */
> +	msr	cntkctl_el1, x0
> +
> +	/* Initilize MPID/MPIDR registers */
> +	mrs	x0, midr_el1
> +	mrs	x1, mpidr_el1
> +	msr	vpidr_el2, x0
> +	msr	vmpidr_el2, x1
> +
> +	/* Disable coprocessor traps */
> +	mov	x0, #0x33ff
> +	msr	cptr_el2, x0		/* Disable coprocessor traps to EL2 */
> +	msr	hstr_el2, xzr		/* Disable coprocessor traps to EL2 */
> +	mov	x0, #3 << 20
> +	msr	cpacr_el1, x0		/* Enable FP/SIMD at EL1 */
> +
> +	/* Initialize HCR_EL2 */
> +	mov	x0, #(1 << 31)		/* 64bit EL1 */
> +	orr	x0, x0, #(1 << 29)	/* Disable HVC */
> +	msr	hcr_el2, x0
> +
> +	/* SCTLR_EL1 initialization */
> +	mov	x0, #0x0800
> +	movk	x0, #0x30d0, lsl #16
> +	msr	sctlr_el1, x0
> +
> +	/* Return to the EL1_SP1 mode from EL2 */
> +	mov	x0, sp
> +	msr	sp_el1, x0		/* Migrate SP */
> +	mrs	x0, vbar_el2
> +	msr	vbar_el1, x0		/* Migrate VBAR */
> +	mov	x0, #0x3c5
> +	msr	spsr_el2, x0		/* EL1_SP1 | D | A | I | F */
> +	msr	elr_el2, lr
> +	eret
> +ENDPROC(armv8_switch_to_el1)
> diff --git a/arch/arm/cpu/armv8/u-boot.lds b/arch/arm/cpu/armv8/u-boot.lds
> new file mode 100644
> index 0000000..4c12222
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/u-boot.lds
> @@ -0,0 +1,89 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua at phytium.com.cn>
> + *
> + * (C) Copyright 2002
> + * Gary Jennejohn, DENX Software Engineering, <garyj at denx.de>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", "elf64-littleaarch64")
> +OUTPUT_ARCH(aarch64)
> +ENTRY(_start)
> +SECTIONS
> +{
> +	. = 0x00000000;
> +
> +	. = ALIGN(8);
> +	.text :
> +	{
> +		*(.__image_copy_start)
> +		CPUDIR/start.o (.text*)
> +		*(.text*)
> +	}
> +
> +	. = ALIGN(8);
> +	.rodata : { *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.rodata*))) }
> +
> +	. = ALIGN(8);
> +	.data : {
> +		*(.data*)
> +	}
> +
> +	. = ALIGN(8);
> +
> +	. = .;
> +
> +	. = ALIGN(8);
> +	.u_boot_list : {
> +		KEEP(*(SORT(.u_boot_list*)));
> +	}
> +
> +	. = ALIGN(8);
> +
> +	.image_copy_end :
> +	{
> +		*(.__image_copy_end)
> +	}
> +
> +	. = ALIGN(8);
> +
> +	.rel_dyn_start :
> +	{
> +		*(.__rel_dyn_start)
> +	}
> +
> +	.rela.dyn : {
> +		*(.rela*)
> +	}
> +
> +	.rel_dyn_end :
> +	{
> +		*(.__rel_dyn_end)
> +	}
> +
> +	_end = .;
> +
> +	. = ALIGN(8);
> +
> +	.bss_start : {
> +		KEEP(*(.__bss_start));
> +	}
> +
> +	.bss : {
> +		*(.bss*)
> +		 . = ALIGN(8);
> +	}
> +
> +	.bss_end : {
> +		KEEP(*(.__bss_end));
> +	}
> +
> +	/DISCARD/ : { *(.dynsym) }
> +	/DISCARD/ : { *(.dynstr*) }
> +	/DISCARD/ : { *(.dynamic*) }
> +	/DISCARD/ : { *(.plt*) }
> +	/DISCARD/ : { *(.interp*) }
> +	/DISCARD/ : { *(.gnu*) }
> +}
> diff --git a/arch/arm/include/asm/armv8/mmu.h b/arch/arm/include/asm/armv8/mmu.h
> new file mode 100644
> index 0000000..1193e76
> --- /dev/null
> +++ b/arch/arm/include/asm/armv8/mmu.h
> @@ -0,0 +1,111 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua at phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#ifndef _ASM_ARMV8_MMU_H_
> +#define _ASM_ARMV8_MMU_H_
> +
> +#ifdef __ASSEMBLY__
> +#define _AC(X, Y)	X
> +#else
> +#define _AC(X, Y)	(X##Y)
> +#endif
> +
> +#define UL(x)		_AC(x, UL)
> +
> +/***************************************************************/
> +/*
> + * The following definitions are related each other, shoud be
> + * calculated specifically.
> + */
> +#define VA_BITS			(42)	/* 42 bits virtual address */
> +
> +/* PAGE_SHIFT determines the page size */
> +#undef  PAGE_SIZE
> +#define PAGE_SHIFT		16
> +#define PAGE_SIZE		(1 << PAGE_SHIFT)
> +#define PAGE_MASK		(~(PAGE_SIZE-1))
> +
> +/*
> + * section address mask and size definitions.
> + */
> +#define SECTION_SHIFT		29
> +#define SECTION_SIZE		(UL(1) << SECTION_SHIFT)
> +#define SECTION_MASK		(~(SECTION_SIZE-1))
> +/***************************************************************/
> +
> +/*
> + * Memory types
> + */
> +#define MT_DEVICE_NGNRNE	0
> +#define MT_DEVICE_NGNRE		1
> +#define MT_DEVICE_GRE		2
> +#define MT_NORMAL_NC		3
> +#define MT_NORMAL		4
> +
> +#define MEMORY_ATTRIBUTES	((0x00 << (MT_DEVICE_NGNRNE*8)) |	\
> +				(0x04 << (MT_DEVICE_NGNRE*8)) |		\
> +				(0x0c << (MT_DEVICE_GRE*8)) |		\
> +				(0x44 << (MT_NORMAL_NC*8)) |		\
> +				(UL(0xff) << (MT_NORMAL*8)))
> +
> +/*
> + * Hardware page table definitions.
> + *
> + * Level 2 descriptor (PMD).
> + */
> +#define PMD_TYPE_MASK		(3 << 0)
> +#define PMD_TYPE_FAULT		(0 << 0)
> +#define PMD_TYPE_TABLE		(3 << 0)
> +#define PMD_TYPE_SECT		(1 << 0)
> +
> +/*
> + * Section
> + */
> +#define PMD_SECT_S		(3 << 8)
> +#define PMD_SECT_AF		(1 << 10)
> +#define PMD_SECT_NG		(1 << 11)
> +#define PMD_SECT_PXN		(UL(1) << 53)
> +#define PMD_SECT_UXN		(UL(1) << 54)
> +
> +/*
> + * AttrIndx[2:0]
> + */
> +#define PMD_ATTRINDX(t)		((t) << 2)
> +#define PMD_ATTRINDX_MASK	(7 << 2)
> +
> +/*
> + * TCR flags.
> + */
> +#define TCR_T0SZ(x)		((64 - (x)) << 0)
> +#define TCR_IRGN_NC		(0 << 8)
> +#define TCR_IRGN_WBWA		(1 << 8)
> +#define TCR_IRGN_WT		(2 << 8)
> +#define TCR_IRGN_WBNWA		(3 << 8)
> +#define TCR_IRGN_MASK		(3 << 8)
> +#define TCR_ORGN_NC		(0 << 10)
> +#define TCR_ORGN_WBWA		(1 << 10)
> +#define TCR_ORGN_WT		(2 << 10)
> +#define TCR_ORGN_WBNWA		(3 << 10)
> +#define TCR_ORGN_MASK		(3 << 10)
> +#define TCR_SHARED_NON		(0 << 12)
> +#define TCR_SHARED_OUTER	(1 << 12)
> +#define TCR_SHARED_INNER	(2 << 12)
> +#define TCR_TG0_4K		(0 << 14)
> +#define TCR_TG0_64K		(1 << 14)
> +#define TCR_TG0_16K		(2 << 14)
> +#define TCR_EL1_IPS_BITS	(UL(3) << 32)	/* 42 bits physical address */
> +#define TCR_EL2_IPS_BITS	(3 << 16)	/* 42 bits physical address */
> +#define TCR_EL3_IPS_BITS	(3 << 16)	/* 42 bits physical address */
> +
> +/* PTWs cacheable, inner/outer WBWA and non-shareable */
> +#define TCR_FLAGS		(TCR_TG0_64K |		\
> +				TCR_SHARED_NON |	\
> +				TCR_ORGN_WBWA |		\
> +				TCR_IRGN_WBWA |		\
> +				TCR_T0SZ(VA_BITS))
> +
> +#endif /* _ASM_ARMV8_MMU_H_ */
> diff --git a/arch/arm/include/asm/byteorder.h b/arch/arm/include/asm/byteorder.h
> index c3489f1..71a9966 100644
> --- a/arch/arm/include/asm/byteorder.h
> +++ b/arch/arm/include/asm/byteorder.h
> @@ -23,10 +23,22 @@
>   #  define __SWAB_64_THRU_32__
>   #endif
>
> +#ifdef	CONFIG_ARM64
> +
> +#ifdef __AARCH64EB__
> +#include <linux/byteorder/big_endian.h>
> +#else
> +#include <linux/byteorder/little_endian.h>
> +#endif
> +
> +#else	/* CONFIG_ARM64 */
> +
>   #ifdef __ARMEB__
>   #include <linux/byteorder/big_endian.h>
>   #else
>   #include <linux/byteorder/little_endian.h>
>   #endif
>
> +#endif	/* CONFIG_ARM64 */
> +
>   #endif
> diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
> index 6d60a4a..ddebbc8 100644
> --- a/arch/arm/include/asm/cache.h
> +++ b/arch/arm/include/asm/cache.h
> @@ -11,6 +11,8 @@
>
>   #include <asm/system.h>
>
> +#ifndef CONFIG_ARM64
> +
>   /*
>    * Invalidate L2 Cache using co-proc instruction
>    */
> @@ -28,6 +30,9 @@ void l2_cache_disable(void);
>   void set_section_dcache(int section, enum dcache_option option);
>
>   void dram_bank_mmu_setup(int bank);
> +
> +#endif
> +
>   /*
>    * The current upper bound for ARM L1 data cache line sizes is 64 bytes.  We
>    * use that value for aligning DMA buffers unless the board config has specified
> diff --git a/arch/arm/include/asm/config.h b/arch/arm/include/asm/config.h
> index 99b703e..abf79e5 100644
> --- a/arch/arm/include/asm/config.h
> +++ b/arch/arm/include/asm/config.h
> @@ -9,4 +9,10 @@
>
>   #define CONFIG_LMB
>   #define CONFIG_SYS_BOOT_RAMDISK_HIGH
> +
> +#ifdef CONFIG_ARM64
> +#define CONFIG_PHYS_64BIT
> +#define CONFIG_STATIC_RELA
> +#endif
> +
>   #endif
> diff --git a/arch/arm/include/asm/gic.h b/arch/arm/include/asm/gic.h
> index a0891cc..ac2b2bf 100644
> --- a/arch/arm/include/asm/gic.h
> +++ b/arch/arm/include/asm/gic.h
> @@ -1,19 +1,54 @@
> -#ifndef __GIC_V2_H__
> -#define __GIC_V2_H__
> +#ifndef __GIC_H__
> +#define __GIC_H__
>
> -/* register offsets for the ARM generic interrupt controller (GIC) */
> +/* Register offsets for the ARM generic interrupt controller (GIC) */
>
>   #define GIC_DIST_OFFSET		0x1000
> +#define GIC_CPU_OFFSET_A9	0x0100
> +#define GIC_CPU_OFFSET_A15	0x2000
> +
> +/* Distributor Registers */
>   #define GICD_CTLR		0x0000
>   #define GICD_TYPER		0x0004
> +#define GICD_IIDR		0x0008
> +#define GICD_STATUSR		0x0010
> +#define GICD_SETSPI_NSR		0x0040
> +#define GICD_CLRSPI_NSR		0x0048
> +#define GICD_SETSPI_SR		0x0050
> +#define GICD_CLRSPI_SR		0x0058
> +#define GICD_SEIR		0x0068
>   #define GICD_IGROUPRn		0x0080
> -#define GICD_SGIR		0x0F00
> +#define GICD_ISENABLERn		0x0100
> +#define GICD_ICENABLERn		0x0180
> +#define GICD_ISPENDRn		0x0200
> +#define GICD_ICPENDRn		0x0280
> +#define GICD_ISACTIVERn		0x0300
> +#define GICD_ICACTIVERn		0x0380
> +#define GICD_IPRIORITYRn	0x0400
> +#define GICD_ITARGETSRn		0x0800
> +#define GICD_ICFGR		0x0c00
> +#define GICD_IGROUPMODRn	0x0d00
> +#define GICD_NSACRn		0x0e00
> +#define GICD_SGIR		0x0f00
> +#define GICD_CPENDSGIRn		0x0f10
> +#define GICD_SPENDSGIRn		0x0f20
> +#define GICD_IROUTERn		0x6000
>
> -#define GIC_CPU_OFFSET_A9	0x0100
> -#define GIC_CPU_OFFSET_A15	0x2000
> +/* Cpu Interface Memory Mapped Registers */
>   #define GICC_CTLR		0x0000
>   #define GICC_PMR		0x0004
> +#define GICC_BPR		0x0008
>   #define GICC_IAR		0x000C
>   #define GICC_EOIR		0x0010
> +#define GICC_RPR		0x0014
> +#define GICC_HPPIR		0x0018
> +#define GICC_ABPR		0x001c
> +#define GICC_AIAR		0x0020
> +#define GICC_AEOIR		0x0024
> +#define GICC_AHPPIR		0x0028
> +#define GICC_APRn		0x00d0
> +#define GICC_NSAPRn		0x00e0
> +#define GICC_IIDR		0x00fc
> +#define GICC_DIR		0x1000
>
> -#endif
> +#endif /* __GIC_H__ */
> diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h
> index e126436..60e8726 100644
> --- a/arch/arm/include/asm/global_data.h
> +++ b/arch/arm/include/asm/global_data.h
> @@ -47,6 +47,10 @@ struct arch_global_data {
>
>   #include <asm-generic/global_data.h>
>
> -#define DECLARE_GLOBAL_DATA_PTR     register volatile gd_t *gd asm ("r9")
> +#ifdef CONFIG_ARM64
> +#define DECLARE_GLOBAL_DATA_PTR		register volatile gd_t *gd asm ("x18")
> +#else
> +#define DECLARE_GLOBAL_DATA_PTR		register volatile gd_t *gd asm ("r9")
> +#endif
>
>   #endif /* __ASM_GBL_DATA_H */
> diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
> index 1fbc531..6a1f05a 100644
> --- a/arch/arm/include/asm/io.h
> +++ b/arch/arm/include/asm/io.h
> @@ -75,42 +75,45 @@ static inline phys_addr_t virt_to_phys(void * vaddr)
>   #define __arch_putw(v,a)		(*(volatile unsigned short *)(a) = (v))
>   #define __arch_putl(v,a)		(*(volatile unsigned int *)(a) = (v))
>
> -extern inline void __raw_writesb(unsigned int addr, const void *data, int bytelen)
> +extern inline void __raw_writesb(unsigned long addr, const void *data,
> +				 int bytelen)
>   {
>   	uint8_t *buf = (uint8_t *)data;
>   	while(bytelen--)
>   		__arch_putb(*buf++, addr);
>   }
>
> -extern inline void __raw_writesw(unsigned int addr, const void *data, int wordlen)
> +extern inline void __raw_writesw(unsigned long addr, const void *data,
> +				 int wordlen)
>   {
>   	uint16_t *buf = (uint16_t *)data;
>   	while(wordlen--)
>   		__arch_putw(*buf++, addr);
>   }
>
> -extern inline void __raw_writesl(unsigned int addr, const void *data, int longlen)
> +extern inline void __raw_writesl(unsigned long addr, const void *data,
> +				 int longlen)
>   {
>   	uint32_t *buf = (uint32_t *)data;
>   	while(longlen--)
>   		__arch_putl(*buf++, addr);
>   }
>
> -extern inline void __raw_readsb(unsigned int addr, void *data, int bytelen)
> +extern inline void __raw_readsb(unsigned long addr, void *data, int bytelen)
>   {
>   	uint8_t *buf = (uint8_t *)data;
>   	while(bytelen--)
>   		*buf++ = __arch_getb(addr);
>   }
>
> -extern inline void __raw_readsw(unsigned int addr, void *data, int wordlen)
> +extern inline void __raw_readsw(unsigned long addr, void *data, int wordlen)
>   {
>   	uint16_t *buf = (uint16_t *)data;
>   	while(wordlen--)
>   		*buf++ = __arch_getw(addr);
>   }
>
> -extern inline void __raw_readsl(unsigned int addr, void *data, int longlen)
> +extern inline void __raw_readsl(unsigned long addr, void *data, int longlen)
>   {
>   	uint32_t *buf = (uint32_t *)data;
>   	while(longlen--)
> diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h
> index ff13f36..f77e4b8 100644
> --- a/arch/arm/include/asm/macro.h
> +++ b/arch/arm/include/asm/macro.h
> @@ -54,5 +54,58 @@
>   	bcs	1b
>   .endm
>
> +#ifdef CONFIG_ARM64
> +/*
> + * Register aliases.
> + */
> +lr	.req	x30
> +
> +/*
> + * Branch according to exception level
> + */
> +.macro	switch_el, xreg, el3_label, el2_label, el1_label
> +	mrs	\xreg, CurrentEL
> +	cmp	\xreg, 0xc
> +	b.eq	\el3_label
> +	cmp	\xreg, 0x8
> +	b.eq	\el2_label
> +	cmp	\xreg, 0x4
> +	b.eq	\el1_label
> +.endm
> +
> +/*
> + * Branch if current processor is a slave,
> + * choose processor with all zero affinity value as the master.
> + */
> +.macro	branch_if_slave, xreg, slave_label
> +	mrs	\xreg, mpidr_el1
> +	tst	\xreg, #0xff		/* Test Affinity 0 */
> +	b.ne	\slave_label
> +	lsr	\xreg, \xreg, #8
> +	tst	\xreg, #0xff		/* Test Affinity 1 */
> +	b.ne	\slave_label
> +	lsr	\xreg, \xreg, #8
> +	tst	\xreg, #0xff		/* Test Affinity 2 */
> +	b.ne	\slave_label
> +	lsr	\xreg, \xreg, #16
> +	tst	\xreg, #0xff		/* Test Affinity 3 */
> +	b.ne	\slave_label
> +.endm
> +
> +/*
> + * Branch if current processor is a master,
> + * choose processor with all zero affinity value as the master.
> + */
> +.macro	branch_if_master, xreg1, xreg2, master_label
> +	mrs	\xreg1, mpidr_el1
> +	lsr	\xreg2, \xreg1, #32
> +	lsl	\xreg1, \xreg1, #40
> +	lsr	\xreg1, \xreg1, #40
> +	orr	\xreg1, \xreg1, \xreg2
> +	cbz	\xreg1, \master_label
> +.endm
> +
> +#endif /* CONFIG_ARM64 */
> +
>   #endif /* __ASSEMBLY__ */
>   #endif /* __ASM_ARM_MACRO_H__ */
> diff --git a/arch/arm/include/asm/posix_types.h b/arch/arm/include/asm/posix_types.h
> index c412486..9ba9add 100644
> --- a/arch/arm/include/asm/posix_types.h
> +++ b/arch/arm/include/asm/posix_types.h
> @@ -13,6 +13,8 @@
>   #ifndef __ARCH_ARM_POSIX_TYPES_H
>   #define __ARCH_ARM_POSIX_TYPES_H
>
> +#include <config.h>
> +
>   /*
>    * This file is generally used by user-level software, so you need to
>    * be a little careful about namespace pollution etc.  Also, we cannot
> @@ -28,9 +30,17 @@ typedef int			__kernel_pid_t;
>   typedef unsigned short		__kernel_ipc_pid_t;
>   typedef unsigned short		__kernel_uid_t;
>   typedef unsigned short		__kernel_gid_t;
> +
> +#ifdef	CONFIG_ARM64
> +typedef unsigned long		__kernel_size_t;
> +typedef long			__kernel_ssize_t;
> +typedef long			__kernel_ptrdiff_t;
> +#else	/* CONFIG_ARM64 */
>   typedef unsigned int		__kernel_size_t;
>   typedef int			__kernel_ssize_t;
>   typedef int			__kernel_ptrdiff_t;
> +#endif	/* CONFIG_ARM64 */
> +
>   typedef long			__kernel_time_t;
>   typedef long			__kernel_suseconds_t;
>   typedef long			__kernel_clock_t;
> diff --git a/arch/arm/include/asm/proc-armv/ptrace.h b/arch/arm/include/asm/proc-armv/ptrace.h
> index a060ee6..21aef58 100644
> --- a/arch/arm/include/asm/proc-armv/ptrace.h
> +++ b/arch/arm/include/asm/proc-armv/ptrace.h
> @@ -10,6 +10,25 @@
>   #ifndef __ASM_PROC_PTRACE_H
>   #define __ASM_PROC_PTRACE_H
>
> +#ifdef CONFIG_ARM64
> +
> +#define PCMASK		0
> +
> +#ifndef __ASSEMBLY__
> +
> +/*
> + * This struct defines the way the registers are stored
> + * on the stack during an exception.
> + */
> +struct pt_regs {
> +	unsigned long elr;
> +	unsigned long regs[31];
> +};
> +
> +#endif	/* __ASSEMBLY__ */
> +
> +#else	/* CONFIG_ARM64 */
> +
>   #define USR26_MODE	0x00
>   #define FIQ26_MODE	0x01
>   #define IRQ26_MODE	0x02
> @@ -104,4 +123,6 @@ static inline int valid_user_regs(struct pt_regs *regs)
>
>   #endif	/* __ASSEMBLY__ */
>
> +#endif	/* CONFIG_ARM64 */
> +
>   #endif
> diff --git a/arch/arm/include/asm/proc-armv/system.h b/arch/arm/include/asm/proc-armv/system.h
> index cda8976..693d1f4 100644
> --- a/arch/arm/include/asm/proc-armv/system.h
> +++ b/arch/arm/include/asm/proc-armv/system.h
> @@ -13,6 +13,60 @@
>   /*
>    * Save the current interrupt enable state & disable IRQs
>    */
> +#ifdef CONFIG_ARM64
> +
> +/*
> + * Save the current interrupt enable state
> + * and disable IRQs/FIQs
> + */
> +#define local_irq_save(flags)					\
> +	({							\
> +	asm volatile(						\
> +	"mrs	%0, daif"					\
> +	"msr	daifset, #3"					\
> +	: "=r" (flags)						\
> +	:							\
> +	: "memory");						\
> +	})
> +
> +/*
> + * restore saved IRQ & FIQ state
> + */
> +#define local_irq_restore(flags)				\
> +	({							\
> +	asm volatile(						\
> +	"msr	daif, %0"					\
> +	:							\
> +	: "r" (flags)						\
> +	: "memory");						\
> +	})
> +
> +/*
> + * Enable IRQs/FIQs
> + */
> +#define local_irq_enable()					\
> +	({							\
> +	asm volatile(						\
> +	"msr	daifclr, #3"					\
> +	:							\
> +	:							\
> +	: "memory");						\
> +	})
> +
> +/*
> + * Disable IRQs/FIQs
> + */
> +#define local_irq_disable()					\
> +	({							\
> +	asm volatile(						\
> +	"msr	daifset, #3"					\
> +	:							\
> +	:							\
> +	: "memory");						\
> +	})
> +
> +#else	/* CONFIG_ARM64 */
> +
>   #define local_irq_save(x)					\
>   	({							\
>   		unsigned long temp;				\
> @@ -107,7 +161,10 @@
>   	: "r" (x)						\
>   	: "memory")
>
> -#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110)
> +#endif	/* CONFIG_ARM64 */
> +
> +#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) || \
> +	defined(CONFIG_ARM64)
>   /*
>    * On the StrongARM, "swp" is terminally broken since it bypasses the
>    * cache totally.  This means that the cache becomes inconsistent, and,
> diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
> index 760345f..4178f8c 100644
> --- a/arch/arm/include/asm/system.h
> +++ b/arch/arm/include/asm/system.h
> @@ -1,6 +1,86 @@
>   #ifndef __ASM_ARM_SYSTEM_H
>   #define __ASM_ARM_SYSTEM_H
>
> +#ifdef CONFIG_ARM64
> +
> +/*
> + * SCTLR_EL1/SCTLR_EL2/SCTLR_EL3 bits definitions
> + */
> +#define CR_M		(1 << 0)	/* MMU enable			*/
> +#define CR_A		(1 << 1)	/* Alignment abort enable	*/
> +#define CR_C		(1 << 2)	/* Dcache enable		*/
> +#define CR_SA		(1 << 3)	/* Stack Alignment Check Enable	*/
> +#define CR_I		(1 << 12)	/* Icache enable		*/
> +#define CR_WXN		(1 << 19)	/* Write Permision Imply XN	*/
> +#define CR_EE		(1 << 25)	/* Exception (Big) Endian	*/
> +
> +#define PGTABLE_SIZE	(0x10000)
> +
> +#ifndef __ASSEMBLY__
> +
> +#define isb()				\
> +	({asm volatile(			\
> +	"isb" : : : "memory");		\
> +	})
> +
> +#define wfi()				\
> +	({asm volatile(			\
> +	"wfi" : : : "memory");		\
> +	})
> +
> +static inline unsigned int current_el(void)
> +{
> +	unsigned int el;
> +	asm volatile("mrs %0, CurrentEL" : "=r" (el) : : "cc");
> +	return el >> 2;
> +}
> +
> +static inline unsigned int get_sctlr(void)
> +{
> +	unsigned int el, val;
> +
> +	el = current_el();
> +	if (el == 1)
> +		asm volatile("mrs %0, sctlr_el1" : "=r" (val) : : "cc");
> +	else if (el == 2)
> +		asm volatile("mrs %0, sctlr_el2" : "=r" (val) : : "cc");
> +	else
> +		asm volatile("mrs %0, sctlr_el3" : "=r" (val) : : "cc");
> +
> +	return val;
> +}
> +
> +static inline void set_sctlr(unsigned int val)
> +{
> +	unsigned int el;
> +
> +	el = current_el();
> +	if (el == 1)
> +		asm volatile("msr sctlr_el1, %0" : : "r" (val) : "cc");
> +	else if (el == 2)
> +		asm volatile("msr sctlr_el2, %0" : : "r" (val) : "cc");
> +	else
> +		asm volatile("msr sctlr_el3, %0" : : "r" (val) : "cc");
> +
> +	asm volatile("isb");
> +}
> +
> +void __asm_flush_dcache_all(void);
> +void __asm_flush_dcache_range(u64 start, u64 end);
> +void __asm_invalidate_tlb_all(void);
> +void __asm_invalidate_icache_all(void);
> +
> +void armv8_switch_to_el2(void);
> +void armv8_switch_to_el1(void);
> +void gic_init(void);
> +void gic_send_sgi(unsigned long sgino);
> +void wait_for_wakeup(void);
> +void smp_kick_all_cpus(void);
> +
> +#endif	/* __ASSEMBLY__ */
> +
> +#else /* CONFIG_ARM64 */
> +
>   #ifdef __KERNEL__
>
>   #define CPU_ARCH_UNKNOWN	0
> @@ -45,6 +125,8 @@
>   #define CR_AFE	(1 << 29)	/* Access flag enable			*/
>   #define CR_TE	(1 << 30)	/* Thumb exception enable		*/
>
> +#define PGTABLE_SIZE		(4096 * 4)
> +
>   /*
>    * This is used to ensure the compiler did actually allocate the register we
>    * asked it for some inline assembly sequences.  Apparently we can't trust
> @@ -132,4 +214,6 @@ void mmu_page_table_flush(unsigned long start, unsigned long stop);
>
>   #endif /* __KERNEL__ */
>
> +#endif /* CONFIG_ARM64 */
> +
>   #endif
> diff --git a/arch/arm/include/asm/types.h b/arch/arm/include/asm/types.h
> index 71dc049..2326420 100644
> --- a/arch/arm/include/asm/types.h
> +++ b/arch/arm/include/asm/types.h
> @@ -39,7 +39,11 @@ typedef unsigned int u32;
>   typedef signed long long s64;
>   typedef unsigned long long u64;
>
> +#ifdef	CONFIG_ARM64
> +#define BITS_PER_LONG 64
> +#else	/* CONFIG_ARM64 */
>   #define BITS_PER_LONG 32
> +#endif	/* CONFIG_ARM64 */
>
>   /* Dma addresses are 32-bits wide.  */
>
> diff --git a/arch/arm/include/asm/u-boot.h b/arch/arm/include/asm/u-boot.h
> index 2b5fce8..cb81232 100644
> --- a/arch/arm/include/asm/u-boot.h
> +++ b/arch/arm/include/asm/u-boot.h
> @@ -44,6 +44,10 @@ typedef struct bd_info {
>   #endif /* !CONFIG_SYS_GENERIC_BOARD */
>
>   /* For image.h:image_check_target_arch() */
> +#ifndef CONFIG_ARM64
>   #define IH_ARCH_DEFAULT IH_ARCH_ARM
> +#else
> +#define IH_ARCH_DEFAULT IH_ARCH_ARM64
> +#endif
>
>   #endif	/* _U_BOOT_H_ */
> diff --git a/arch/arm/include/asm/unaligned.h b/arch/arm/include/asm/unaligned.h
> index 44593a8..0a228fb 100644
> --- a/arch/arm/include/asm/unaligned.h
> +++ b/arch/arm/include/asm/unaligned.h
> @@ -8,7 +8,7 @@
>   /*
>    * Select endianness
>    */
> -#ifndef __ARMEB__
> +#if __BYTE_ORDER == __LITTLE_ENDIAN
>   #define get_unaligned	__get_unaligned_le
>   #define put_unaligned	__put_unaligned_le
>   #else
> diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
> index 679f19a..321997c 100644
> --- a/arch/arm/lib/Makefile
> +++ b/arch/arm/lib/Makefile
> @@ -17,14 +17,22 @@ lib-y	+= _umodsi3.o
>   lib-y	+= div0.o
>   endif
>
> -obj-y += crt0.o
> +ifdef CONFIG_ARM64
> +obj-y	+= crt0_64.o
> +else
> +obj-y	+= crt0.o
> +endif
>
>   ifndef CONFIG_SPL_BUILD
> -obj-y += relocate.o
> +ifdef CONFIG_ARM64
> +obj-y	+= relocate_64.o
> +else
> +obj-y	+= relocate.o
> +endif
>   ifndef CONFIG_SYS_GENERIC_BOARD
>   obj-y	+= board.o
>   endif
> -obj-y += sections.o
> +obj-y	+= sections.o
>
>   obj-$(CONFIG_OF_LIBFDT) += bootm-fdt.o
>   obj-$(CONFIG_CMD_BOOTM) += bootm.o
> @@ -35,11 +43,17 @@ else
>   obj-$(CONFIG_SPL_FRAMEWORK) += spl.o
>   endif
>
> +ifdef CONFIG_ARM64
> +obj-y	+= interrupts_64.o
> +else
>   obj-y	+= interrupts.o
> +endif
>   obj-y	+= reset.o
>
>   obj-y	+= cache.o
> +ifndef CONFIG_ARM64
>   obj-y	+= cache-cp15.o
> +endif
>
>   # For EABI conformant tool chains, provide eabi_compat()
>   ifneq (,$(findstring -mabi=aapcs-linux,$(PLATFORM_CPPFLAGS)))
> diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c
> index 34f50b0..c4904b4 100644
> --- a/arch/arm/lib/board.c
> +++ b/arch/arm/lib/board.c
> @@ -344,7 +344,7 @@ void board_init_f(ulong bootflag)
>
>   #if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF))
>   	/* reserve TLB table */
> -	gd->arch.tlb_size = 4096 * 4;
> +	gd->arch.tlb_size = PGTABLE_SIZE;
>   	addr -= gd->arch.tlb_size;
>
>   	/* round down to next 64 kB limit */
> @@ -419,6 +419,7 @@ void board_init_f(ulong bootflag)
>   	}
>   #endif
>
> +#ifndef CONFIG_ARM64
>   	/* setup stackpointer for exeptions */
>   	gd->irq_sp = addr_sp;
>   #ifdef CONFIG_USE_IRQ
> @@ -431,6 +432,10 @@ void board_init_f(ulong bootflag)
>
>   	/* 8-byte alignment for ABI compliance */
>   	addr_sp &= ~0x07;
> +#else	/* CONFIG_ARM64 */
> +	/* 16-byte alignment for ABI compliance */
> +	addr_sp &= ~0x0f;
> +#endif	/* CONFIG_ARM64 */
>   #else
>   	addr_sp += 128;	/* leave 32 words for abort-stack   */
>   	gd->irq_sp = addr_sp;
> diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c
> index f476a89..77f1a5c 100644
> --- a/arch/arm/lib/bootm.c
> +++ b/arch/arm/lib/bootm.c
> @@ -196,6 +196,14 @@ static void do_nonsec_virt_switch(void)
>   		debug("entered non-secure state\n");
>   #endif
>   #endif
> +
> +#ifdef CONFIG_ARM64
> +	smp_kick_all_cpus();
> +	armv8_switch_to_el2();
> +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
> +	armv8_switch_to_el1();
> +#endif
> +#endif
>   }
>
>   /* Subcommand: PREP */
> @@ -240,6 +248,21 @@ static void boot_prep_linux(bootm_headers_t *images)
>   /* Subcommand: GO */
>   static void boot_jump_linux(bootm_headers_t *images, int flag)
>   {
> +#ifdef CONFIG_ARM64
> +	void (*kernel_entry)(void *fdt_addr);
> +	int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
> +
> +	kernel_entry = (void (*)(void *fdt_addr))images->ep;
> +
> +	debug("## Transferring control to Linux (at address %lx)...\n",
> +		(ulong) kernel_entry);
> +	bootstage_mark(BOOTSTAGE_ID_RUN_OS);
> +
> +	announce_and_cleanup(fake);
> +
> +	if (!fake)
> +		kernel_entry(images->ft_addr);
> +#else
>   	unsigned long machid = gd->bd->bi_arch_number;
>   	char *s;
>   	void (*kernel_entry)(int zero, int arch, uint params);
> @@ -266,6 +289,7 @@ static void boot_jump_linux(bootm_headers_t *images, int flag)
>
>   	if (!fake)
>   		kernel_entry(0, machid, r2);
> +#endif
>   }
>
>   /* Main Entry point for arm bootm implementation
> diff --git a/arch/arm/lib/crt0_64.S b/arch/arm/lib/crt0_64.S
> new file mode 100644
> index 0000000..7756396
> --- /dev/null
> +++ b/arch/arm/lib/crt0_64.S
> @@ -0,0 +1,113 @@
> +/*
> + * crt0 - C-runtime startup Code for AArch64 U-Boot
> + *
> + * (C) Copyright 2013
> + * David Feng <fenghua at phytium.com.cn>
> + *
> + * (C) Copyright 2012
> + * Albert ARIBAUD <albert.u.boot at aribaud.net>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <config.h>
> +#include <asm-offsets.h>
> +#include <asm/macro.h>
> +#include <linux/linkage.h>
> +
> +/*
> + * This file handles the target-independent stages of the U-Boot
> + * start-up where a C runtime environment is needed. Its entry point
> + * is _main and is branched into from the target's start.S file.
> + *
> + * _main execution sequence is:
> + *
> + * 1. Set up initial environment for calling board_init_f().
> + *    This environment only provides a stack and a place to store
> + *    the GD ('global data') structure, both located in some readily
> + *    available RAM (SRAM, locked cache...). In this context, VARIABLE
> + *    global data, initialized or not (BSS), are UNAVAILABLE; only
> + *    CONSTANT initialized data are available.
> + *
> + * 2. Call board_init_f(). This function prepares the hardware for
> + *    execution from system RAM (DRAM, DDR...) As system RAM may not
> + *    be available yet, , board_init_f() must use the current GD to
> + *    store any data which must be passed on to later stages. These
> + *    data include the relocation destination, the future stack, and
> + *    the future GD location.
> + *
> + * (the following applies only to non-SPL builds)
> + *
> + * 3. Set up intermediate environment where the stack and GD are the
> + *    ones allocated by board_init_f() in system RAM, but BSS and
> + *    initialized non-const data are still not available.
> + *
> + * 4. Call relocate_code(). This function relocates U-Boot from its
> + *    current location into the relocation destination computed by
> + *    board_init_f().
> + *
> + * 5. Set up final environment for calling board_init_r(). This
> + *    environment has BSS (initialized to 0), initialized non-const
> + *    data (initialized to their intended value), and stack in system
> + *    RAM. GD has retained values set by board_init_f(). Some CPUs
> + *    have some work left to do at this point regarding memory, so
> + *    call c_runtime_cpu_setup.
> + *
> + * 6. Branch to board_init_r().
> + */
> +
> +ENTRY(_main)
> +
> +/*
> + * Set up initial C runtime environment and call board_init_f(0).
> + */
> +	ldr	x0, =(CONFIG_SYS_INIT_SP_ADDR)
> +	sub	x0, x0, #GD_SIZE	/* allocate one GD above SP */
> +	bic	sp, x0, #0xf	/* 16-byte alignment for ABI compliance */
> +	mov	x18, sp			/* GD is above SP */
> +	mov	x0, #0
> +	bl	board_init_f
> +
> +/*
> + * Set up intermediate environment (new sp and gd) and call
> + * relocate_code(addr_moni). Trick here is that we'll return
> + * 'here' but relocated.
> + */
> +	ldr	x0, [x18, #GD_START_ADDR_SP]	/* x0 <- gd->start_addr_sp */
> +	bic	sp, x0, #0xf	/* 16-byte alignment for ABI compliance */
> +	ldr	x18, [x18, #GD_BD]		/* x18 <- gd->bd */
> +	sub	x18, x18, #GD_SIZE		/* new GD is below bd */
> +
> +	adr	lr, relocation_return
> +	ldr	x9, [x18, #GD_RELOC_OFF]	/* x9 <- gd->reloc_off */
> +	add	lr, lr, x9	/* new return address after relocation */
> +	ldr	x0, [x18, #GD_RELOCADDR]	/* x0 <- gd->relocaddr */
> +	b	relocate_code
> +
> +relocation_return:
> +
> +/*
> + * Set up final (full) environment
> + */
> +	bl	c_runtime_cpu_setup		/* still call old routine */
> +
> +/*
> + * Clear BSS section
> + */
> +	ldr	x0, =__bss_start		/* this is auto-relocated! */
> +	ldr	x1, =__bss_end			/* this is auto-relocated! */
> +	mov	x2, #0
> +clear_loop:
> +	str	x2, [x0]
> +	add	x0, x0, #8
> +	cmp	x0, x1
> +	b.lo	clear_loop
> +
> +	/* call board_init_r(gd_t *id, ulong dest_addr) */
> +	mov	x0, x18				/* gd_t */
> +	ldr	x1, [x18, #GD_RELOCADDR]	/* dest_addr */
> +	b	board_init_r			/* PC relative jump */
> +
> +	/* NOTREACHED - board_init_r() does not return */
> +
> +ENDPROC(_main)
> diff --git a/arch/arm/lib/interrupts_64.c b/arch/arm/lib/interrupts_64.c
> new file mode 100644
> index 0000000..b476722
> --- /dev/null
> +++ b/arch/arm/lib/interrupts_64.c
> @@ -0,0 +1,120 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua at phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <common.h>
> +#include <linux/compiler.h>
> +
> +
> +int interrupt_init(void)
> +{
> +	return 0;
> +}
> +
> +void enable_interrupts(void)
> +{
> +	return;
> +}
> +
> +int disable_interrupts(void)
> +{
> +	return 0;
> +}
> +
> +void show_regs(struct pt_regs *regs)
> +{
> +	int i;
> +
> +	printf("ELR:     %lx\n", regs->elr);
> +	printf("LR:      %lx\n", regs->regs[30]);
> +	for (i = 0; i < 29; i += 2)
> +		printf("x%-2d: %016lx x%-2d: %016lx\n",
> +		       i, regs->regs[i], i+1, regs->regs[i+1]);
> +	printf("\n");
> +}
> +
> +/*
> + * do_bad_sync handles the impossible case in the Synchronous Abort vector.
> + */
> +void do_bad_sync(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("Bad mode in \"Synchronous Abort\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_bad_irq handles the impossible case in the Irq vector.
> + */
> +void do_bad_irq(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("Bad mode in \"Irq\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_bad_fiq handles the impossible case in the Fiq vector.
> + */
> +void do_bad_fiq(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("Bad mode in \"Fiq\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_bad_error handles the impossible case in the Error vector.
> + */
> +void do_bad_error(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("Bad mode in \"Error\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_sync handles the Synchronous Abort exception.
> + */
> +void do_sync(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("\"Synchronous Abort\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_irq handles the Irq exception.
> + */
> +void do_irq(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("\"Irq\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_fiq handles the Fiq exception.
> + */
> +void do_fiq(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("\"Fiq\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> +
> +/*
> + * do_error handles the Error exception.
> + * Errors are more likely to be processor specific,
> + * it is defined with weak attribute and can be redefined
> + * in processor specific code.
> + */
> +void __weak do_error(struct pt_regs *pt_regs, unsigned int esr)
> +{
> +	printf("\"Error\" handler, esr 0x%08x\n", esr);
> +	show_regs(pt_regs);
> +	panic("Resetting CPU ...\n");
> +}
> diff --git a/arch/arm/lib/relocate_64.S b/arch/arm/lib/relocate_64.S
> new file mode 100644
> index 0000000..7fba9e2
> --- /dev/null
> +++ b/arch/arm/lib/relocate_64.S
> @@ -0,0 +1,58 @@
> +/*
> + * relocate - common relocation function for AArch64 U-Boot
> + *
> + * (C) Copyright 2013
> + * Albert ARIBAUD <albert.u.boot at aribaud.net>
> + * David Feng <fenghua at phytium.com.cn>
> + *
> + * SPDX-License-Identifier:	GPL-2.0+
> + */
> +
> +#include <asm-offsets.h>
> +#include <config.h>
> +#include <linux/linkage.h>
> +
> +/*
> + * void relocate_code (addr_moni)
> + *
> + * This function relocates the monitor code.
> + * x0 holds the destination address.
> + */
> +ENTRY(relocate_code)
> +	/*
> +	 * Copy u-boot from flash to RAM
> +	 */
> +	ldr	x1, =__image_copy_start	/* x1 <- SRC &__image_copy_start */
> +	subs	x9, x0, x1		/* x9 <- relocation offset */
> +	b.eq	relocate_done		/* skip relocation */
> +	ldr	x2, =__image_copy_end	/* x2 <- SRC &__image_copy_end */
> +
> +copy_loop:
> +	ldp	x10, x11, [x1], #16	/* copy from source address [x1] */
> +	stp	x10, x11, [x0], #16	/* copy to   target address [x0] */
> +	cmp	x1, x2			/* until source end address [x2] */
> +	b.lo	copy_loop
> +
> +	/*
> +	 * Fix .rela.dyn relocations
> +	 */
> +	ldr	x2, =__rel_dyn_start	/* x2 <- SRC &__rel_dyn_start */
> +	ldr	x3, =__rel_dyn_end	/* x3 <- SRC &__rel_dyn_end */
> +fixloop:
> +	ldp	x0, x1, [x2], #16	/* (x0,x1) <- (SRC location, fixup) */
> +	ldr	x4, [x2], #8		/* x4 <- addend */
> +	and	x1, x1, #0xffffffff
> +	cmp	x1, #1027		/* relative fixup? */
> +	bne	fixnext
> +
> +	/* relative fix: store addend plus offset at dest location */
> +	add	x0, x0, x9
> +	add	x4, x4, x9
> +	str	x4, [x0]
> +fixnext:
> +	cmp	x2, x3
> +	b.lo	fixloop
> +
> +relocate_done:
> +	ret
> +ENDPROC(relocate_code)
> diff --git a/common/image.c b/common/image.c
> index b0ae58f..4145354 100644
> --- a/common/image.c
> +++ b/common/image.c
> @@ -81,6 +81,7 @@ static const table_entry_t uimage_arch[] = {
>   	{	IH_ARCH_NDS32,		"nds32",	"NDS32",	},
>   	{	IH_ARCH_OPENRISC,	"or1k",		"OpenRISC 1000",},
>   	{	IH_ARCH_SANDBOX,	"sandbox",	"Sandbox",	},
> +	{	IH_ARCH_ARM64,		"arm64",	"AArch64",	},
>   	{	-1,			"",		"",		},
>   };
>
> diff --git a/doc/README.arm64 b/doc/README.arm64
> new file mode 100644
> index 0000000..75586db
> --- /dev/null
> +++ b/doc/README.arm64
> @@ -0,0 +1,46 @@
> +U-boot for arm64
> +
> +Summary
> +=======
> +No hardware platform of arm64 is available now. The u-boot is
> +simulated on Foundation Model and Fast Model for ARMv8.
> +
> +Notes
> +=====
> +
> +1. Currenly, u-boot run at the highest exception level processor
> +   supported and jump to EL2 or optionally EL1 before enter OS.
> +
> +2. U-boot for arm64 is compiled with AArch64-gcc. AArch64-gcc
> +   use rela relocation format, a tool(tools/relocate-rela) by Scott Wood
> +   is used to encode the initial addend of rela to u-boot.bin. After running,
> +   the u-boot will be relocated to destination again.
> +
> +3. Fdt should be placed at a 2-megabyte boundary and within the first 512
> +   megabytes from the start of the kernel image. So, fdt_high should be
> +   defined specially.
> +   Please reference linux/Documentation/arm64/booting.txt for detail.
> +
> +4. Spin-table is used to wake up secondary processors. One location
> +   (or per processor location) is defined to hold the kernel entry point
> +   for secondary processors. It must be ensured that the location is
> +   accessible and zero immediately after secondary processor
> +   enter slave_cpu branch execution in start.S. The location address
> +   is encoded in cpu node of DTS. Linux kernel store the entry point
> +   of secondary processors to it and send event to wakeup secondary
> +   processors.
> +   Please reference linux/Documentation/arm64/booting.txt for detail.
> +
> +5. Generic board is supported.
> +
> +6. CONFIG_ARM64 instead of CONFIG_ARMV8 is used to distinguish aarch64 and
> +   aarch32 specific codes.
> +
> +Contributor
> +===========
> +   Tom Rini       <trini at ti.com>
> +   Scott Wood     <scottwood at freescale.com>
> +   York Sun       <yorksun at freescale.com>
> +   Simon Glass    <sjg at chromium.org>
> +   Sharma Bhupesh <bhupesh.sharma at freescale.com>
> +   Rob Herring    <robherring2 at gmail.com>
> diff --git a/examples/standalone/stubs.c b/examples/standalone/stubs.c
> index 8fb1765..fc5d7ef 100644
> --- a/examples/standalone/stubs.c
> +++ b/examples/standalone/stubs.c
> @@ -39,6 +39,20 @@ gd_t *global_data;
>   "	bctr\n"				\
>   	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "r11");
>   #elif defined(CONFIG_ARM)
> +#ifdef CONFIG_ARM64
> +/*
> + * x18 holds the pointer to the global_data, x9 is a call-clobbered
> + * register
> + */
> +#define EXPORT_FUNC(x) \
> +	asm volatile (			\
> +"	.globl " #x "\n"		\
> +#x ":\n"				\
> +"	ldr	x9, [x18, %0]\n"		\
> +"	ldr	x9, [x9, %1]\n"		\
> +"	br	x9\n"		\
> +	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "x9");
> +#else
>   /*
>    * r8 holds the pointer to the global_data, ip is a call-clobbered
>    * register
> @@ -50,6 +64,7 @@ gd_t *global_data;
>   "	ldr	ip, [r8, %0]\n"		\
>   "	ldr	pc, [ip, %1]\n"		\
>   	: : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "ip");
> +#endif
>   #elif defined(CONFIG_MIPS)
>   /*
>    * k0 ($26) holds the pointer to the global_data; t9 ($25) is a call-
> diff --git a/include/image.h b/include/image.h
> index ee6eb8d..7de2bb2 100644
> --- a/include/image.h
> +++ b/include/image.h
> @@ -156,6 +156,7 @@ struct lmb;
>   #define IH_ARCH_SANDBOX		19	/* Sandbox architecture (test only) */
>   #define IH_ARCH_NDS32	        20	/* ANDES Technology - NDS32  */
>   #define IH_ARCH_OPENRISC        21	/* OpenRISC 1000  */
> +#define IH_ARCH_ARM64		22	/* ARM64	*/
>
>   /*
>    * Image Types
>

Regards,
Bhupesh


More information about the U-Boot mailing list