[PATCH 14/17] arm: cpu: Add ACPI parking protocol support
Simon Glass
sjg at chromium.org
Mon Jul 29 17:28:42 CEST 2024
Hi Patrick,
On Sat, 27 Jul 2024 at 01:20, Patrick Rudolph
<patrick.rudolph at 9elements.com> wrote:
>
> On Arm platforms that use ACPI they cannot rely on the "spin-table"
> CPU bringup usually defined in the FDT. Thus implement the
> 'ACPI Multi-processor Startup for ARM Platforms', also referred to as
> 'ACPI parking protocol'.
>
> The ACPI parking protocol works similar to the spin-table mechanism, but
> the specification also covers lots of shortcomings of the spin-table
> implementations.
>
> Every CPU defined in the ACPI MADT table has it's own 4K page where the
> spinloop code and the OS mailbox resides. When selected the U-Boot board
> code must make sure that the secondary CPUs enter u-boot after relocation
U-Boot
> as well, so that they can enter the spinloop code residing in the ACPI
> parking protocol pages.
>
> The OS will then write to the mailbox and generate an IPI to release the
> CPUs from the spinloop code.
>
> For now it's only implemented on ARMv8, but can easily be extended to
> other platforms.
>
> TEST: Boots all CPUs on qemu-system-aarch64 -machine raspi4b
>
> Signed-off-by: Patrick Rudolph <patrick.rudolph at 9elements.com>
> Cc: Simon Glass <sjg at chromium.org>
> Cc: Tom Rini <trini at konsulko.com>
> ---
> arch/arm/cpu/armv8/Makefile | 1 +
> arch/arm/cpu/armv8/parking_protocol_v8.S | 102 +++++++++++++++++++++++
> arch/arm/cpu/armv8/start.S | 4 +
> arch/arm/include/asm/acpi_table.h | 52 ++++++++++++
> arch/arm/lib/acpi_table.c | 87 +++++++++++++++++++
> include/acpi/acpi_table.h | 10 +++
> lib/Kconfig | 15 ++++
> lib/acpi/acpi_table.c | 3 +
> 8 files changed, 274 insertions(+)
> create mode 100644 arch/arm/cpu/armv8/parking_protocol_v8.S
>
> diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
> index bba4f570db..9bb07bdf12 100644
> --- a/arch/arm/cpu/armv8/Makefile
> +++ b/arch/arm/cpu/armv8/Makefile
> @@ -29,6 +29,7 @@ obj-$(CONFIG_ARM_SMCCC) += smccc-call.o
>
> ifndef CONFIG_SPL_BUILD
> obj-$(CONFIG_ARMV8_SPIN_TABLE) += spin_table.o spin_table_v8.o
> +obj-$(CONFIG_ACPI_PARKING_PROTOCOL) += parking_protocol_v8.o
> else
> obj-$(CONFIG_ARCH_SUNXI) += fel_utils.o
> endif
> diff --git a/arch/arm/cpu/armv8/parking_protocol_v8.S b/arch/arm/cpu/armv8/parking_protocol_v8.S
How about acpi_park_v8.S as it is shorter?
> new file mode 100644
> index 0000000000..b7f6abbd85
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/parking_protocol_v8.S
> @@ -0,0 +1,102 @@
> +/* SPDX-License-Identifier: GPL-2.0+ */
> +/*
> + * Copyright (C) 2024 9elements GmbH
> + * Author: Patrick Rudolph <patrick.rudolph at 9elements.com>
> + */
> +
> +#include <linux/linkage.h>
> +#include <asm/acpi_table.h>
> +
> +__secondary_cpuif_id:
> + .quad 0
> +
> +__secondary_parking_protocol_tables:
> + .quad 0
> +
> +__secondary_parking_protocol_ncpus:
> + .quad 0
> +
> +.global acpi_parking_protocol_code_size
> +acpi_parking_protocol_code_size:
acpi_park_code_size
acpi_parking_protocol is just too long
> + .word __secondary_parking_protocol_code_end - __secondary_parking_protocol_code_start
Is there a 'primary'? Does this refer to the code run by the secondary CPUs?
> +
> +.global acpi_parking_protocol_install
> +ENTRY(acpi_parking_protocol_install)
> + ldr x2, =__secondary_parking_protocol_ncpus
> + str x1, [x2]
> + ldr x2, =__secondary_parking_protocol_tables
> + str x0, [x2]
> + dsb ishst
> + sev
> + ret
> +ENDPROC(acpi_parking_protocol_install)
> +
> +.global acpi_parking_protocol_secondary_jump
> +ENTRY(acpi_parking_protocol_secondary_jump)
> +0:
> + /* Store a unique cpuif id in x0 */
> + ldr x2, =__secondary_cpuif_id
> + ldaxr w0, [x2]
> + add w0, w0, #1 // =1
> + stlxr w1, w0, [x2]
> + cbnz w1, 0b
> +
> +0: /* Check if parking protocol table is ready */
> + ldr x1, =__secondary_parking_protocol_tables
> + ldr x2, [x1]
> + cbnz x2, 0f
> + wfe
> + b 0b
> +
> +0: /* Sanity check cpuif id */
> + ldr x1, =__secondary_parking_protocol_ncpus
> + ldr x3, [x1]
> + cmp w0, w3
> + b.lt 0f
> +
> +hlt: wfi
> + b hlt /* Should never happen. */
> +
> + /* Find spinning code in ACPI parking protocol table */
> +0: mov x1, #ACPI_PP_PAGE_SIZE
> + mul x3, x0, x1
> + add x2, x2, x3
> + mov x0, x2 /* Backup ACPI page ptr for later use */
> +
> + mov x1, #ACPI_PP_CPU_CODE_OFFSET
> + add x2, x2, x1
> +
> + /* Jump to spin code in own parking protocol page */
> + br x2
> +ENDPROC(acpi_parking_protocol_secondary_jump)
> +
> +.align 8
> +__secondary_parking_protocol_code_start:
> +.global acpi_parking_protocol_code_start
> +ENTRY(acpi_parking_protocol_code_start)
> + /* x0 points to the 4K aligned parking protocol page */
> +
> + /* Prepare defines for spinning code */
> + mov w3, #ACPI_PP_CPU_ID_INVALID
> + mov x2, #ACPI_PP_JMP_ADR_INVALID
> +
> +0: wfe
> + ldr w1, [x0, #ACPI_PP_CPU_ID_OFFSET]
> +
> + /* Check CPU ID is not invalid */
> + cmp w1, w3
> + b.eq 0b
> +
> + /* Check jump address not invalid */
> + ldr x1, [x0, #ACPI_PP_CPU_JMP_ADDR_OFFSET]
> + cmp x1, x2
> + b.eq 0b
> +
> + /* Clear jump address before jump */
> + str x2, [x0, #ACPI_PP_CPU_JMP_ADDR_OFFSET]
> + dsb sy
> +
> + br x1
> +ENDPROC(acpi_parking_protocol_code_start)
> + /* Secondary Boot Code ends here */
> +__secondary_parking_protocol_code_end:
> diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
> index 7461280261..32a295c832 100644
> --- a/arch/arm/cpu/armv8/start.S
> +++ b/arch/arm/cpu/armv8/start.S
> @@ -178,6 +178,10 @@ pie_fixup_done:
> branch_if_master x0, master_cpu
> b spin_table_secondary_jump
> /* never return */
> +#elif defined(CONFIG_ACPI_PARKING_PROTOCOL) && !defined(CONFIG_SPL_BUILD)
> + branch_if_master x0, master_cpu
> + b acpi_parking_protocol_secondary_jump
> + /* never return */
> #elif defined(CONFIG_ARMV8_MULTIENTRY)
> branch_if_master x0, master_cpu
>
> diff --git a/arch/arm/include/asm/acpi_table.h b/arch/arm/include/asm/acpi_table.h
> index 8a25e93847..3c6736239d 100644
> --- a/arch/arm/include/asm/acpi_table.h
> +++ b/arch/arm/include/asm/acpi_table.h
> @@ -4,6 +4,7 @@
> #define __ASM_ACPI_TABLE_H__
>
> #ifndef __ACPI__
> +#ifndef __ASSEMBLY__
>
> void acpi_write_madt_gicc(struct acpi_madt_gicc *gicc, uint cpu_num,
> uint perf_gsiv, ulong phys_base, ulong gicv,
> @@ -13,6 +14,57 @@ void acpi_write_madt_gicc(struct acpi_madt_gicc *gicc, uint cpu_num,
> void acpi_write_madt_gicd(struct acpi_madt_gicd *gicd, uint gic_id,
> ulong phys_base, uint gic_version);
>
> +/* Multi-processor Startup for ARM Platforms */
please add full comments for members
> +struct acpi_parking_protocol_page {
> + u32 cpu_id;
> + u32 reserved;
> + u64 jumping_address;
> + u8 os_reserved[2032];
> + u8 cpu_spinning_code[2048];
> +} __packed;
> +
> +/* Architecture specific functions */
> +/**
> + * acpi_parking_protocol_code_size - Spinloop code size *
> + */
> +extern u32 acpi_parking_protocol_code_size;
u32 but it says .word in the code...isn't that 16 bits?
> +
> +/**
> + * parking_protocol_code_start() - Spinloop code
> + *
> + * Architectural spinloop code to be installed in each parking protocol
> + * page. The spinloop code must be less than 2048 bytes.
> + *
> + * The spinloop code will be entered after calling
> + * acpi_parking_protocol_install().
> + *
> + */
> +void acpi_parking_protocol_code_start(void);
> +
> +/**
> + * acpi_parking_protocol_install() - Installs the parking protocol.
> + *
> + * Installs the reserved memory containing the spinloop code and the
> + * OS mailbox as required by the ACPI Multi-processor Startup for
> + * ARM Platforms specification.
> + *
> + * The secondary CPUs will wait for this function to be called in order
> + * to enter the spinloop code residing in the tables.
> + *
> + * @tables: ACPI parking protocol tables.
> + * @num_cpus: Number of allocated pages.
> + */
> +void acpi_parking_protocol_install(uintptr_t tables, size_t num_cpus);
> +
> +#endif /* !__ASSEMBLY__ */
> #endif /* !__ACPI__ */
>
> +#define ACPI_PP_CPU_ID_INVALID 0xffffffff
> +#define ACPI_PP_JMP_ADR_INVALID 0
> +#define ACPI_PP_PAGE_SIZE 4096
> +#define ACPI_PP_CPU_ID_OFFSET 0
> +#define ACPI_PP_CPU_JMP_ADDR_OFFSET 8
> +#define ACPI_PP_CPU_CODE_OFFSET 2048
> +#define ACPI_PP_VERSION 1
> +
> #endif /* __ASM_ACPI_TABLE_H__ */
> diff --git a/arch/arm/lib/acpi_table.c b/arch/arm/lib/acpi_table.c
> index ea3a6343c9..2fa25ec50e 100644
> --- a/arch/arm/lib/acpi_table.c
> +++ b/arch/arm/lib/acpi_table.c
> @@ -10,7 +10,11 @@
> #include <acpi/acpigen.h>
> #include <acpi/acpi_device.h>
> #include <acpi/acpi_table.h>
> +#include <cpu_func.h>
> +#include <efi_loader.h>
> +#include <malloc.h>
> #include <string.h>
> +#include <tables_csum.h>
>
> void acpi_write_madt_gicc(struct acpi_madt_gicc *gicc, uint cpu_num,
> uint perf_gsiv, ulong phys_base, ulong gicv,
> @@ -42,3 +46,86 @@ void acpi_write_madt_gicd(struct acpi_madt_gicd *gicd, uint gic_id,
> gicd->phys_base = phys_base;
> gicd->gic_version = gic_version;
> }
> +
> +void acpi_write_parking_protocol(struct acpi_madt *madt)
> +{
> + struct acpi_parking_protocol_page *page;
> + struct acpi_madt_gicc *gicc;
> + void *reloc_addr;
> + u64 start;
> + int ncpus = 0;
> +
> + /* According to the "Multi-processor Startup for ARM Platforms":
> + * - Every CPU as specified by MADT GICC has it's own 4K page
> + * - Every page is divided into two sections: OS and FW reserved
> + * - Memory occupied by "Parking Protocol" must be marked 'Reserved'
> + * - Spinloop code should reside in FW reserved 2048 bytes
> + * - Spinloop code will check the mailbox in OS reserved area
> + */
> +
> + if (acpi_parking_protocol_code_size > sizeof(page->cpu_spinning_code)) {
> + log_err("Spinning code too big to fit: %d\n",
> + acpi_parking_protocol_code_size);
> + }
> +
> + /* Count all cores including BSP */
Ick, that's a very strange way of counting CPUs. You can use
uclass_id_count(UCLASS_CPU).
> + for (int i = sizeof(struct acpi_madt); i < madt->header.length; ) {
> + gicc = (struct acpi_madt_gicc *)((void *)madt + i);
> + if (gicc->type != ACPI_APIC_GICC) {
> + i += gicc->length;
> + continue;
> + }
> + ncpus++;
> + i += gicc->length;
> + }
> + debug("Found %d GICCs in MADT\n", ncpus);
> +
> + /* Allocate pages linearly due to assembly code requirements */
> + page = memalign(ACPI_PP_PAGE_SIZE, ACPI_PP_PAGE_SIZE * ncpus);
> + start = (u64)(uintptr_t)page;
Again, this should go in the bloblist.
> +
> +#if CONFIG_IS_ENABLED(EFI_LOADER)
> + int ret = efi_add_memory_map(start, ncpus * ACPI_PP_PAGE_SIZE, EFI_RESERVED_MEMORY_TYPE);
> +
> + if (ret != EFI_SUCCESS)
> + log_err("Reserved memory mapping failed addr %llx size %x\n",
> + start, ncpus * ACPI_PP_PAGE_SIZE);
> +#endif
This map will overlap the one that EFI creates for the malloc()
region, won't it?
> +
> + /* Prepare the parking protocol pages */
> + for (int i = sizeof(struct acpi_madt); i < madt->header.length; ) {
please use 'pos' instead of 'i' as this isn't really an index
> + gicc = (struct acpi_madt_gicc *)((void *)madt + i);
Can you put the loop code in a separate function? This is getting a
bit long for one function
> + if (gicc->type != ACPI_APIC_GICC) {
> + i += gicc->length;
> + continue;
> + }
> +
> + /* Update GICC */
> + gicc->parking_proto = ACPI_PP_VERSION;
> + gicc->parked_addr = (uint64_t)(uintptr_t)page;
> +
> + /* Prepare parking protocol page */
> + memset(page, 0, sizeof(struct acpi_parking_protocol_page));
'\0'
> + page->cpu_id = ACPI_PP_CPU_ID_INVALID;
> + page->jumping_address = ACPI_PP_JMP_ADR_INVALID;
> +
> + /* Relocate spinning code */
> + reloc_addr = &page->cpu_spinning_code[0];
> +
> + debug("Relocating spin table from %p to %p (size %x)\n",
> + &acpi_parking_protocol_code_start, reloc_addr,
> + acpi_parking_protocol_code_size);
> + memcpy(reloc_addr, &acpi_parking_protocol_code_start,
> + acpi_parking_protocol_code_size);
> +
> + if (!CONFIG_IS_ENABLED(SYS_DCACHE_OFF))
> + flush_dcache_range((unsigned long)page,
> + (unsigned long)page +
> + sizeof(struct acpi_parking_protocol_page));
> + page++;
> + i += gicc->length;
> + }
> +
> + /* Point secondary CPUs to new spin loop code */
> + acpi_parking_protocol_install(start, ncpus);
> +}
> diff --git a/include/acpi/acpi_table.h b/include/acpi/acpi_table.h
> index 26c5e9f21a..994bfd7f8d 100644
> --- a/include/acpi/acpi_table.h
> +++ b/include/acpi/acpi_table.h
> @@ -951,6 +951,16 @@ void acpi_fill_fadt(struct acpi_fadt *fadt);
> */
> void *acpi_fill_madt(struct acpi_madt *madt, void *current);
>
> +/**
> + * acpi_write_parking_protocol() - Installs the ACPI parking protocol.
> + *
> + * Sets up the ACPI parking protocol and installs the spinning code for
> + * secondary CPUs.
> + *
> + * @madt: The MADT to update
> + */
> +void acpi_write_parking_protocol(struct acpi_madt *madt);
> +
> /**
> * acpi_write_dbg2_pci_uart() - Write out a DBG2 table
> *
> diff --git a/lib/Kconfig b/lib/Kconfig
> index 2059219a12..43e4fa3253 100644
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -314,6 +314,21 @@ config GENERATE_ACPI_TABLE
> by the operating system. It defines platform-independent interfaces
> for configuration and power management monitoring.
>
> +config ACPI_PARKING_PROTOCOL
> + bool "Support ACPI parking protocol method"
> + depends on GENERATE_ACPI_TABLE
> + depends on ARMV8_MULTIENTRY
> + default y if !SEC_FIRMWARE_ARMV8_PSCI && !ARMV8_PSCI
> + help
> + Say Y here to support "ACPI parking protocol" enable method
> + for booting Linux.
> +
> + To use this feature, you must do:
> + - Bring secondary CPUs into U-Boot proper in a board specific
> + manner. This must be done *after* relocation. Otherwise, the
> + secondary CPUs will spin in unprotected memory area because the
> + master CPU protects the relocated spin code.
> +
> config SPL_TINY_MEMSET
> bool "Use a very small memset() in SPL"
> depends on SPL
> diff --git a/lib/acpi/acpi_table.c b/lib/acpi/acpi_table.c
> index eb3c83ab02..56eb4a10d0 100644
> --- a/lib/acpi/acpi_table.c
> +++ b/lib/acpi/acpi_table.c
> @@ -271,6 +271,9 @@ int acpi_write_madt(struct acpi_ctx *ctx, const struct acpi_writer *entry)
> /* (Re)calculate length and checksum */
> header->length = (uintptr_t)current - (uintptr_t)madt;
>
> + if (IS_ENABLED(CONFIG_ACPI_PARKING_PROTOCOL))
> + acpi_write_parking_protocol(madt);
> +
> header->checksum = table_compute_checksum((void *)madt, header->length);
> acpi_add_table(ctx, madt);
> acpi_inc(ctx, madt->header.length);
> --
> 2.45.2
>
Regards,
Simon
More information about the U-Boot
mailing list