[U-Boot] [PATCH v2 1/2] armv8: Support loading 32-bit OS in AArch32 execution state

Thu May 19 10:58:55 CEST 2016

On 05/19/2016 10:26 AM, Alison Wang wrote:
> To support loading a 32-bit OS, the execution state will change from
> AArch64 to AArch32 when jumping to kernel.
>
> The architecture information will be got through checking FIT
> image, then U-Boot will load 32-bit OS or 64-bit OS automatically.
>
> Signed-off-by: Ebony Zhu <ebony.zhu at nxp.com>
> Signed-off-by: Alison Wang <alison.wang at nxp.com>
> Signed-off-by: Chenhui Zhao <chenhui.zhao at nxp.com>
> ---
> Changes in v2:
> - armv8_switch_to_el2_aarch32() is removed. armv8_switch_to_el2_m is used
>    to switch to AArch64 EL2 or AArch32 Hyp.
> - armv8_switch_to_el1_aarch32() is removed. armv8_switch_to_el1_m is used
>    to switch to AArch64 EL1 or AArch32 SVC.
>
>   arch/arm/cpu/armv8/transition.S |  8 ++---
>   arch/arm/include/asm/macro.h    | 80 +++++++++++++++++++++++++++++++++++++----
>   arch/arm/include/asm/system.h   | 25 +++++++++++--
>   arch/arm/lib/bootm.c            | 26 +++++++++++---
>   common/image-fit.c              | 12 ++++++-
>   5 files changed, 133 insertions(+), 18 deletions(-)
>
> diff --git a/arch/arm/cpu/armv8/transition.S b/arch/arm/cpu/armv8/transition.S
> index 253a39b..9fa3b59 100644
> --- a/arch/arm/cpu/armv8/transition.S
> +++ b/arch/arm/cpu/armv8/transition.S
> @@ -11,13 +11,13 @@
>   #include <asm/macro.h>
>   
>   ENTRY(armv8_switch_to_el2)
> -	switch_el x0, 1f, 0f, 0f
> +	switch_el x4, 1f, 0f, 0f
>   0:	ret
> -1:	armv8_switch_to_el2_m x0
> +1:	armv8_switch_to_el2_m x0, x1, x2, x3, x4, x5, x6
>   ENDPROC(armv8_switch_to_el2)
>   
>   ENTRY(armv8_switch_to_el1)
> -	switch_el x0, 0f, 1f, 0f
> +	switch_el x4, 0f, 1f, 0f
>   0:	ret
> -1:	armv8_switch_to_el1_m x0, x1
> +1:	armv8_switch_to_el1_m x0, x1, x2, x3, x4, x5, x6
>   ENDPROC(armv8_switch_to_el1)
> diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h
> index 9bb0efa..56d77f6 100644
> --- a/arch/arm/include/asm/macro.h
> +++ b/arch/arm/include/asm/macro.h
> @@ -8,6 +8,9 @@
>   
>   #ifndef __ASM_ARM_MACRO_H__
>   #define __ASM_ARM_MACRO_H__
> +
> +#include <asm/system.h>
> +
>   #ifdef __ASSEMBLY__
>   
>   /*
> @@ -135,9 +138,20 @@ lr	.req	x30
>   #endif
>   .endm
>   
> -.macro armv8_switch_to_el2_m, xreg1
> -	/* 64bit EL2 | HCE | SMD | RES1 (Bits[5:4]) | Non-secure EL0/EL1 */
> -	mov	\xreg1, #0x5b1
> +.macro armv8_switch_to_el2_m, xreg1, xreg2, xreg3, xreg4, xreg5, xreg6, xreg7

These arguments should probably get documented :). Also xreg4 doesn't 
seem to actually be a scratch register, but a real parameter. So please 
name it accordingly.

> +	mov	\xreg5, \xreg1
> +	mov	\xreg6, \xreg2
> +	mov	\xreg7, \xreg3
> +
> +	/*
> +	 * If the next lower exception level is AArch64, 64bit EL2 | HCE | SMD |
> +	 * RES1 (Bits[5:4]) | Non-secure EL0/EL1. If the next lower exception
> +	 * level is AArch32, 32bit EL2 | HCE | SMD | RES1 (Bits[5:4]) |
> +	 * Non-secure EL0/EL1.
> +	 */
> +	mov	\xreg1, #0x1b1
> +	lsl	\xreg2, \xreg4, #10
> +	orr	\xreg1, \xreg1, \xreg2

Is there any way you can make this obvious from the code? Basically we 
would want something like

mov \xreg1, #(SCR_EL3_EL2_AA64 | SCR_EL3_HCE | SCR_EL3_SMD ...)

I'm sure there are creative ways to achieve this even with the high bits 
set. Maybe ldr = works?

>   	msr	scr_el3, \xreg1
>   	msr	cptr_el3, xzr		/* Disable coprocessor traps to EL3 */
>   	mov	\xreg1, #0x33ff
> @@ -156,18 +170,46 @@ lr	.req	x30
>   	movk	\xreg1, #0x30C5, lsl #16
>   	msr	sctlr_el2, \xreg1
>   
> -	/* Return to the EL2_SP2 mode from EL3 */
>   	mov	\xreg1, sp
>   	msr	sp_el2, \xreg1		/* Migrate SP */
>   	mrs	\xreg1, vbar_el3
>   	msr	vbar_el2, \xreg1	/* Migrate VBAR */
> +
> +	/* Check switch to AArch64 EL2 or AArch32 Hypervisor mode */
> +	ldr	\xreg1, =ES_TO_AARCH64
> +	cmp	\xreg4, \xreg1
> +	b.eq	1f
> +	b.ne	2f

Just b.ne should be enough, no?

> +
> +1:
> +	/* Return to the EL2_SP2 mode from EL3 */
>   	mov	\xreg1, #0x3c9

Magic values again :). Please convert them to constants getting ORed.

>   	msr	spsr_el3, \xreg1	/* EL2_SP2 | D | A | I | F */
>   	msr	elr_el3, lr
> +
> +	mov	\xreg1, \xreg5
> +	mov	\xreg2, \xreg6
> +	mov	\xreg3, \xreg7
> +	eret
> +
> +2:
> +	/* Return to AArch32 Hypervisor mode */
> +	mov     \xreg1, #0x1da
> +	msr	spsr_el3, \xreg1	/* Hyp | D | A | I | F */
> +	msr     elr_el3, \xreg5
> +
> +	mov	\xreg1, #0
> +	mov	\xreg2, \xreg6
> +	mov	\xreg3, \xreg7
>   	eret
> +
>   .endm
>   
> -.macro armv8_switch_to_el1_m, xreg1, xreg2
> +.macro armv8_switch_to_el1_m, xreg1, xreg2, xreg3, xreg4, xreg5, xreg6, xreg7
> +	mov	\xreg5, \xreg1
> +	mov	\xreg6, \xreg2
> +	mov	\xreg7, \xreg3
> +
>   	/* Initialize Generic Timers */
>   	mrs	\xreg1, cnthctl_el2
>   	orr	\xreg1, \xreg1, #0x3	/* Enable EL1 access to timers */
> @@ -188,7 +230,7 @@ lr	.req	x30
>   	msr	cpacr_el1, \xreg1	/* Enable FP/SIMD at EL1 */
>   
>   	/* Initialize HCR_EL2 */
> -	mov	\xreg1, #(1 << 31)		/* 64bit EL1 */
> +	lsl	\xreg1, \xreg4, #31

This deserves a comment. Imagine you read this code again in 5 years. 
Would you still understand what's going on?

>   	orr	\xreg1, \xreg1, #(1 << 29)	/* Disable HVC */
>   	msr	hcr_el2, \xreg1
>   
> @@ -203,15 +245,39 @@ lr	.req	x30
>   	movk	\xreg1, #0x30d0, lsl #16
>   	msr	sctlr_el1, \xreg1
>   
> -	/* Return to the EL1_SP1 mode from EL2 */
>   	mov	\xreg1, sp
>   	msr	sp_el1, \xreg1		/* Migrate SP */
>   	mrs	\xreg1, vbar_el2
>   	msr	vbar_el1, \xreg1	/* Migrate VBAR */
> +
> +	/* Check switch to AArch64 EL1 or AArch32 Supervisor mode */
> +	ldr	\xreg1, =ES_TO_AARCH64
> +	cmp	\xreg4, \xreg1
> +	b.eq	1f
> +	b.ne	2f
> +
> +1:
> +	/* Return to the EL1_SP1 mode from EL2 */
>   	mov	\xreg1, #0x3c5
>   	msr	spsr_el2, \xreg1	/* EL1_SP1 | D | A | I | F */
>   	msr	elr_el2, lr
> +
> +	mov	\xreg1, \xreg5
> +	mov	\xreg2, \xreg6
> +	mov	\xreg3, \xreg7
> +	eret
> +
> +2:
> +	/* Return to the Supervisor mode from EL2 */
> +	mov     \xreg1, #0x1d3
> +	msr     spsr_el2, \xreg1        /* Supervisor | D | A | I | F */
> +	msr     elr_el2, \xreg5
> +
> +	mov	\xreg1, #0
> +	mov	\xreg2, \xreg6
> +	mov	\xreg3, \xreg7
>   	eret
> +
>   .endm
>   
>   #if defined(CONFIG_GICV3)
> diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
> index 9ae890a..fcfe3ae 100644
> --- a/arch/arm/include/asm/system.h
> +++ b/arch/arm/include/asm/system.h
> @@ -17,6 +17,9 @@
>   #define CR_WXN		(1 << 19)	/* Write Permision Imply XN	*/
>   #define CR_EE		(1 << 25)	/* Exception (Big) Endian	*/
>   
> +#define ES_TO_AARCH64			1
> +#define ES_TO_AARCH32			0
> +
>   #ifndef __ASSEMBLY__
>   
>   u64 get_page_table_size(void);
> @@ -100,8 +103,26 @@ void __asm_invalidate_icache_all(void);
>   int __asm_flush_l3_cache(void);
>   void __asm_switch_ttbr(u64 new_ttbr);
>   
> -void armv8_switch_to_el2(void);
> -void armv8_switch_to_el1(void);
> +/*
> + * Switch from EL3 to EL2 for ARMv8
> + *
> + * @entry_point: kernel entry point
> + * @mach_nr:     machine nr
> + * @fdt_addr:    fdt address
> + * @es_flag:     execution state flag, ES_TO_AARCH64 or ES_TO_AARCH32
> + */
> +void armv8_switch_to_el2(u64 entry_point, u64 mach_nr, u64 fdt_addr,
> +			 u64 es_flag);
> +/*
> + * Switch from EL2 to EL1 for ARMv8
> + *
> + * @entry_point: kernel entry point
> + * @mach_nr:     machine nr
> + * @fdt_addr:    fdt address
> + * @es_flag:     execution state flag, ES_TO_AARCH64 or ES_TO_AARCH32
> + */
> +void armv8_switch_to_el1(u64 entry_point, u64 mach_nr, u64 fdt_addr,
> +			 u64 es_flag);
>   void gic_init(void);
>   void gic_send_sgi(unsigned long sgino);
>   void wait_for_wakeup(void);
> diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c
> index 0838d89..f6f685a 100644
> --- a/arch/arm/lib/bootm.c
> +++ b/arch/arm/lib/bootm.c
> @@ -193,9 +193,9 @@ static void do_nonsec_virt_switch(void)
>   {
>   	smp_kick_all_cpus();
>   	dcache_disable();	/* flush cache before swtiching to EL2 */
> -	armv8_switch_to_el2();
> +	armv8_switch_to_el2(0, 0, 0, ES_TO_AARCH64);

Just rename the existing function to armv8_switch_to_el2_adv() or so and 
provide armv8_switch_to_el2() as static inline function that simply 
invokes armv8_switch_to_el2_adv(0, 0, 0, ES_TO_AARCH64);

>   #ifdef CONFIG_ARMV8_SWITCH_TO_EL1
> -	armv8_switch_to_el1();
> +	armv8_switch_to_el1(0, 0, 0, ES_TO_AARCH64);
>   #endif
>   }
>   #endif
> @@ -286,8 +286,26 @@ static void boot_jump_linux(bootm_headers_t *images, int flag)
>   	announce_and_cleanup(fake);
>   
>   	if (!fake) {
> -		do_nonsec_virt_switch();
> -		kernel_entry(images->ft_addr, NULL, NULL, NULL);
> +		if ((IH_ARCH_DEFAULT == IH_ARCH_ARM64) &&
> +		    (images->os.arch == IH_ARCH_ARM)) {
> +			smp_kick_all_cpus();
> +			dcache_disable();

This open codes part of do_nonsec_virt_switch().

Could we just clean up all of that mess a bit more maybe? How about you call

do_nonsec_virt_switch(ES_EL2 | ES_AARCH32, entry, images->ft_addr) or
do_nonsec_virt_switch(ES_EL1 | ES_AARCH64, entry, images->ft_addr)

Then the if() would only need to set ES_AARCH32/64 in a variable and 
everything else goes along the same code.

> +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
> +			armv8_switch_to_el2(0, 0, 0, ES_TO_AARCH64);
> +			armv8_switch_to_el1((u64)images->ep,
> +					    (u64)gd->bd->bi_arch_number,
> +					    (u64)images->ft_addr,
> +					    ES_TO_AARCH32);
> +#else
> +			armv8_switch_to_el2((u64)images->ep,
> +					    (u64)gd->bd->bi_arch_number,
> +					    (u64)images->ft_addr,
> +					    ES_TO_AARCH32);
> +#endif
> +		} else {
> +			do_nonsec_virt_switch();
> +			kernel_entry(images->ft_addr, NULL, NULL, NULL);
> +		}
>   	}
>   #else
>   	unsigned long machid = gd->bd->bi_arch_number;
> diff --git a/common/image-fit.c b/common/image-fit.c
> index 25f8a11..2986469 100644
> --- a/common/image-fit.c
> +++ b/common/image-fit.c
> @@ -1163,7 +1163,8 @@ int fit_image_check_arch(const void *fit, int noffset, uint8_t arch)
>   	if (fit_image_get_arch(fit, noffset, &image_arch))
>   		return 0;
>   	return (arch == image_arch) ||
> -		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64);
> +		(arch == IH_ARCH_I386 && image_arch == IH_ARCH_X86_64) ||
> +		(arch == IH_ARCH_ARM64 && image_arch == IH_ARCH_ARM);

This should be a Kconfig option. ThunderX doesn't support AArch32 execution.

>   }
>   
>   /**
> @@ -1586,6 +1587,9 @@ int fit_image_load(bootm_headers_t *images, ulong addr,
>   	int type_ok, os_ok;
>   	ulong load, data, len;
>   	uint8_t os;
> +#ifndef USE_HOSTCC
> +	uint8_t os_arch;
> +#endif
>   	const char *prop_name;
>   	int ret;
>   
> @@ -1669,6 +1673,12 @@ int fit_image_load(bootm_headers_t *images, ulong addr,
>   		return -ENOEXEC;
>   	}
>   #endif
> +
> +#ifndef USE_HOSTCC
> +	fit_image_get_arch(fit, noffset, &os_arch);
> +	images->os.arch = os_arch;
> +#endif
> +
>   	if (image_type == IH_TYPE_FLATDT &&
>   	    !fit_image_check_comp(fit, noffset, IH_COMP_NONE)) {
>   		puts("FDT image is compressed");