[U-Boot] [Patch v2 3/5] armv8/fsl-lsch3: Release secondary cores from boot hold off with Boot Page
Mark Rutland
mark.rutland at arm.com
Thu Aug 21 15:47:24 CEST 2014
Hi York,
I have mostly minor comments this time; this is looking pretty good.
On Tue, Aug 19, 2014 at 09:28:00PM +0100, York Sun wrote:
> Secondary cores need to be released from holdoff by boot release
> registers. With GPP bootrom, they can boot from main memory
> directly. Individual spin table is used for each core. If a single
> release address is needed, defining macro CONFIG_FSL_SMP_RELEASE_ALL
> will use the CPU_RELEASE_ADDR. Spin table and the boot page is reserved
> in device tree so OS won't overwrite.
>
> Signed-off-by: York Sun <yorksun at freescale.com>
> Signed-off-by: Arnab Basu <arnab.basu at freescale.com>
> ---
> v2: Removed copying boot page. Use u-boot image as is in memory.
> Added dealing with different size of addr_cell in device tree.
> Added dealing with big- and little-endian.
> Added flushing spin table after cpu_release().
>
> arch/arm/cpu/armv8/fsl-lsch3/Makefile | 2 +
> arch/arm/cpu/armv8/fsl-lsch3/cpu.c | 13 ++
> arch/arm/cpu/armv8/fsl-lsch3/cpu.h | 1 +
> arch/arm/cpu/armv8/fsl-lsch3/fdt.c | 56 +++++++
> arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S | 128 ++++++++++++---
> arch/arm/cpu/armv8/fsl-lsch3/mp.c | 172 +++++++++++++++++++++
> arch/arm/cpu/armv8/fsl-lsch3/mp.h | 36 +++++
> arch/arm/cpu/armv8/transition.S | 63 +-------
> arch/arm/include/asm/arch-fsl-lsch3/config.h | 3 +-
> arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h | 35 +++++
> arch/arm/include/asm/macro.h | 92 +++++++++++
> arch/arm/lib/gic_64.S | 10 +-
> common/board_f.c | 2 +-
> 13 files changed, 525 insertions(+), 88 deletions(-)
> create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/fdt.c
> create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.c
> create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.h
>
> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/Makefile b/arch/arm/cpu/armv8/fsl-lsch3/Makefile
> index 9249537..f920eeb 100644
> --- a/arch/arm/cpu/armv8/fsl-lsch3/Makefile
> +++ b/arch/arm/cpu/armv8/fsl-lsch3/Makefile
> @@ -7,3 +7,5 @@
> obj-y += cpu.o
> obj-y += lowlevel.o
> obj-y += speed.o
> +obj-$(CONFIG_MP) += mp.o
> +obj-$(CONFIG_OF_LIBFDT) += fdt.o
> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c
> index c129d03..47b947f 100644
> --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c
> +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c
> @@ -11,6 +11,7 @@
> #include <asm/io.h>
> #include <asm/arch-fsl-lsch3/immap_lsch3.h>
> #include "cpu.h"
> +#include "mp.h"
> #include "speed.h"
> #include <fsl_mc.h>
>
> @@ -434,3 +435,15 @@ int cpu_eth_init(bd_t *bis)
> #endif
> return error;
> }
> +
> +
> +int arch_early_init_r(void)
> +{
> + int rv;
> + rv = fsl_lsch3_wake_seconday_cores();
> +
> + if (rv)
> + printf("Did not wake secondary cores\n");
> +
> + return 0;
> +}
> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h
> index 28544d7..2e3312b 100644
> --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h
> +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h
> @@ -5,3 +5,4 @@
> */
>
> int fsl_qoriq_core_to_cluster(unsigned int core);
> +u32 cpu_mask(void);
> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/fdt.c b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c
> new file mode 100644
> index 0000000..2dbcdcb
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c
> @@ -0,0 +1,56 @@
> +/*
> + * Copyright 2014 Freescale Semiconductor, Inc.
> + *
> + * SPDX-License-Identifier: GPL-2.0+
> + */
> +
> +#include <common.h>
> +#include <libfdt.h>
> +#include <fdt_support.h>
> +#include "mp.h"
> +
> +#ifdef CONFIG_MP
> +void ft_fixup_cpu(void *blob)
> +{
> + int off;
> + __maybe_unused u64 spin_tbl_addr = (u64)get_spin_tbl_addr();
> + fdt32_t *reg;
> + int addr_cells;
> + u64 val;
> + size_t *boot_code_size = &(__secondary_boot_code_size);
> +
> + off = fdt_node_offset_by_prop_value(blob, -1, "device_type", "cpus", 4);
I didn't think /cpus had device_type = "cpus". I can't see any
instances in any DTs I have to hand. Can we not find /cpus by path?
> + of_bus_default_count_cells(blob, off, &addr_cells, NULL);
> +
> + off = fdt_node_offset_by_prop_value(blob, -1, "device_type", "cpu", 4);
> + while (off != -FDT_ERR_NOTFOUND) {
> + reg = (fdt32_t *)fdt_getprop(blob, off, "reg", 0);
> + if (reg) {
> + val = spin_tbl_addr;
> +#ifndef CONFIG_FSL_SMP_RELEASE_ALL
> + val += id_to_core(of_read_number(reg, addr_cells))
> + * SPIN_TABLE_ELEM_SIZE;
> +#endif
> + val = cpu_to_fdt64(val);
> + fdt_setprop_string(blob, off, "enable-method",
> + "spin-table");
> + fdt_setprop(blob, off, "cpu-release-addr",
> + &val, sizeof(val));
> + } else {
> + puts("cpu NULL\n");
Could we change that to "Warning: found cpu node without reg property"
or something like that which makes the problem clear?
> + }
> + off = fdt_node_offset_by_prop_value(blob, off, "device_type",
> + "cpu", 4);
> + }
> +
> + fdt_add_mem_rsv(blob, (uintptr_t)&secondary_boot_code,
> + *boot_code_size);
> +}
> +#endif
> +
> +void ft_cpu_setup(void *blob, bd_t *bd)
> +{
> +#ifdef CONFIG_MP
> + ft_fixup_cpu(blob);
> +#endif
> +}
> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S
> index ad32b6c..629e6d4 100644
> --- a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S
> +++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S
> @@ -8,7 +8,9 @@
>
> #include <config.h>
> #include <linux/linkage.h>
> +#include <asm/gic.h>
> #include <asm/macro.h>
> +#include "mp.h"
>
> ENTRY(lowlevel_init)
> mov x29, lr /* Save LR */
> @@ -37,29 +39,119 @@ ENTRY(lowlevel_init)
>
> branch_if_master x0, x1, 1f
>
> + ldr x0, =secondary_boot_func
> + blr x0
> +
> +1:
> +2:
Isn't the '2' label redundant?
> + mov lr, x29 /* Restore LR */
> + ret
> +ENDPROC(lowlevel_init)
> +
> + /* Keep literals not used by the secondary boot code outside it */
> + .ltorg
> +
> + /* Using 64 bit alignment since the spin table is accessed as data */
> + .align 4
> + .global secondary_boot_code
> + /* Secondary Boot Code starts here */
> +secondary_boot_code:
> + .global __spin_table
> +__spin_table:
> + .space CONFIG_MAX_CPUS*SPIN_TABLE_ELEM_SIZE
> +
> + .align 2
> +ENTRY(secondary_boot_func)
> /*
> - * Slave should wait for master clearing spin table.
> - * This sync prevent salves observing incorrect
> - * value of spin table and jumping to wrong place.
> + * MPIDR_EL1 Fields:
> + * MPIDR[1:0] = AFF0_CPUID <- Core ID (0,1)
> + * MPIDR[7:2] = AFF0_RES
> + * MPIDR[15:8] = AFF1_CLUSTERID <- Cluster ID (0,1,2,3)
> + * MPIDR[23:16] = AFF2_CLUSTERID
> + * MPIDR[24] = MT
> + * MPIDR[29:25] = RES0
> + * MPIDR[30] = U
> + * MPIDR[31] = ME
> + * MPIDR[39:32] = AFF3
> + *
> + * Linear Processor ID (LPID) calculation from MPIDR_EL1:
> + * (We only use AFF0_CPUID and AFF1_CLUSTERID for now
> + * until AFF2_CLUSTERID and AFF3 have non-zero values)
> + *
> + * LPID = MPIDR[15:8] | MPIDR[1:0]
> */
> -#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3)
> -#ifdef CONFIG_GICV2
> - ldr x0, =GICC_BASE
> + mrs x0, mpidr_el1
> + ubfm x1, x0, #8, #15
> + ubfm x2, x0, #0, #1
> + orr x10, x2, x1, lsl #2 /* x10 has LPID */
> + ubfm x9, x0, #0, #15 /* x9 contains MPIDR[15:0] */
> + /*
> + * offset of the spin table element for this core from start of spin
> + * table (each elem is padded to 64 bytes)
> + */
> + lsl x1, x10, #6
> + ldr x0, =__spin_table
> + /* physical address of this cpus spin table element */
> + add x11, x1, x0
> +
> + str x9, [x11, #16] /* LPID */
> + mov x4, #1
> + str x4, [x11, #8] /* STATUS */
> + dsb sy
> +#if defined(CONFIG_GICV3)
> + gic_wait_for_interrupt_m x0
> #endif
> - bl gic_wait_for_interrupt
Why do we only need to wait for GICv3? We previously did so for GICv2.
> +
> + bl secondary_switch_to_el2
> +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
> + secondary_switch_to_el1
> #endif
Missing bl?
Is there any reason to have the SWITCH_TO_EL1 option other than for
debugging?
EL2 is the preferred EL to boot at for Linux and Xen (it gives far more
flexibility), and if dropping to EL1 is necessary I think it would make
more sense as a run-time option than a compile-time option.
>
> - /*
> - * All processors will enter EL2 and optionally EL1.
> +slave_cpu:
> + wfe
> +#ifdef CONFIG_FSL_SMP_RELEASE_ALL
> + /* All cores are released from the address in the 1st spin table
> + * element
> */
> - bl armv8_switch_to_el2
> -#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
> - bl armv8_switch_to_el1
> + ldr x1, =__spin_table
> + ldr x0, [x1]
> +#else
> + ldr x0, [x11]
> +#endif
> + cbz x0, slave_cpu
Similarly is there any reason to have the option of a single release
addr if we can support unique addresses?
[...]
> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/mp.c b/arch/arm/cpu/armv8/fsl-lsch3/mp.c
> new file mode 100644
> index 0000000..b29eda9
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/fsl-lsch3/mp.c
> @@ -0,0 +1,172 @@
> +/*
> + * Copyright 2014 Freescale Semiconductor, Inc.
> + *
> + * SPDX-License-Identifier: GPL-2.0+
> + */
> +
> +#include <common.h>
> +#include <asm/io.h>
> +#include <asm/system.h>
> +#include <asm/io.h>
> +#include <asm/arch-fsl-lsch3/immap_lsch3.h>
> +#include "mp.h"
> +
> +DECLARE_GLOBAL_DATA_PTR;
> +
> +void *get_spin_tbl_addr(void)
> +{
> + void *ptr = (void *)&__spin_table;
> +
> + return ptr;
> +}
I don't think the cast is necessary here. As far as I can see this
could be just:
void *get_spin_tbl_addr(void)
{
return &__spin_table;
}
[...]
> diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h
> index f77e4b8..0009c28 100644
> --- a/arch/arm/include/asm/macro.h
> +++ b/arch/arm/include/asm/macro.h
> @@ -105,6 +105,98 @@ lr .req x30
> cbz \xreg1, \master_label
> .endm
>
> +.macro armv8_switch_to_el2_m, xreg1
> + mov \xreg1, #0x5b1 /* Non-secure EL0/EL1 | HVC | 64bit EL2 */
> + msr scr_el3, \xreg1
When dropping to EL1 from EL2 we disable HVC via HCR_EL2; presumably due
to lack of a handler. Would it make sense to do similarly here and
disable SMC here until we have a user (e.g. PSCI)?
> + msr cptr_el3, xzr /* Disable coprocessor traps to EL3 */
> + mov \xreg1, #0x33ff
> + msr cptr_el2, \xreg1 /* Disable coprocessor traps to EL2 */
> +
> + /* Initialize SCTLR_EL2 */
> + /*
> + * setting RES1 bits (29,28,23,22,18,16,11,5,4) to 1
> + * and RES0 bits (31,30,27,26,24,21,20,17,15-13,10-6) +
> + * EE,WXN,I,SA,C,A,M to 0
> + */
> + mov \xreg1, #0x0830
> + movk \xreg1, #0x30C5, lsl #16
> + msr sctlr_el2, \xreg1
> +
> + /* Return to the EL2_SP2 mode from EL3 */
> + mov \xreg1, sp
> + msr sp_el2, \xreg1 /* Migrate SP */
> + mrs \xreg1, vbar_el3
> + msr vbar_el2, \xreg1 /* Migrate VBAR */
> + mov \xreg1, #0x3c9
> + msr spsr_el3, \xreg1 /* EL2_SP2 | D | A | I | F */
> + msr elr_el3, lr
> + eret
> +.endm
Otherwise this looks fine to me.
> +.macro armv8_switch_to_el1_m, xreg1, xreg2
> + /* Initialize Generic Timers */
> + mrs \xreg1, cnthctl_el2
> + orr \xreg1, \xreg1, #0x3 /* Enable EL1 access to timers */
> + msr cnthctl_el2, \xreg1
> + msr cntvoff_el2, xzr
> +
> + /* Initilize MPID/MPIDR registers */
> + mrs \xreg1, midr_el1
> + mrs \xreg2, mpidr_el1
> + msr vpidr_el2, \xreg1
> + msr vmpidr_el2, \xreg2
> +
> + /* Disable coprocessor traps */
> + mov \xreg1, #0x33ff
> + msr cptr_el2, \xreg1 /* Disable coprocessor traps to EL2 */
> + msr hstr_el2, xzr /* Disable coprocessor traps to EL2 */
> + mov \xreg1, #3 << 20
> + msr cpacr_el1, \xreg1 /* Enable FP/SIMD at EL1 */
> +
> + /* Initialize HCR_EL2 */
> + mov \xreg1, #(1 << 31) /* 64bit EL1 */
> + orr \xreg1, \xreg1, #(1 << 29) /* Disable HVC */
> + msr hcr_el2, \xreg1
> +
> + /* SCTLR_EL1 initialization */
> + /*
> + * setting RES1 bits (29,28,23,22,20,11) to 1
> + * and RES0 bits (31,30,27,21,17,13,10,6) +
> + * UCI,EE,EOE,WXN,nTWE,nTWI,UCT,DZE,I,UMA,SED,ITD,
> + * CP15BEN,SA0,SA,C,A,M to 0
> + */
> + mov \xreg1, #0x0800
> + movk \xreg1, #0x30d0, lsl #16
> + msr sctlr_el1, \xreg1
> +
> + /* Return to the EL1_SP1 mode from EL2 */
> + mov \xreg1, sp
> + msr sp_el1, \xreg1 /* Migrate SP */
> + mrs \xreg1, vbar_el2
> + msr vbar_el1, \xreg1 /* Migrate VBAR */
> + mov \xreg1, #0x3c5
> + msr spsr_el2, \xreg1 /* EL1_SP1 | D | A | I | F */
> + msr elr_el2, lr
> + eret
> +.endm
This looks fine to me.
Thanks,
Mark.
More information about the U-Boot
mailing list