[PATCH v3 27/30] arm: cpu: Add ACPI parking protocol support

Simon Glass sjg at chromium.org
Thu Sep 12 02:58:26 CEST 2024


Hi Patrick,

On Wed, 11 Sept 2024 at 00:26, Patrick Rudolph
<patrick.rudolph at 9elements.com> wrote:
>
> On Arm platforms that use ACPI they cannot rely on the "spin-table"
> CPU bringup usually defined in the FDT. Thus implement the
> 'ACPI Multi-processor Startup for ARM Platforms', also referred to as
> 'ACPI parking protocol'.
>
> The ACPI parking protocol works similar to the spin-table mechanism, but
> the specification also covers lots of shortcomings of the spin-table
> implementations.
>
> Every CPU defined in the ACPI MADT table has it's own 4K page where the
> spinloop code and the OS mailbox resides. When selected the U-Boot board
> code must make sure that the secondary CPUs enter u-boot after relocation
> as well, so that they can enter the spinloop code residing in the ACPI
> parking protocol pages.
>
> The OS will then write to the mailbox and generate an IPI to release the
> CPUs from the spinloop code.
>
> For now it's only implemented on ARMv8, but can easily be extended to
> other platforms.
>
> TEST: Boots all CPUs on qemu-system-aarch64 -machine raspi4b
>
> Signed-off-by: Patrick Rudolph <patrick.rudolph at 9elements.com>
> Cc: Simon Glass <sjg at chromium.org>
> Cc: Tom Rini <trini at konsulko.com>
> ---
> Changelog v2:
> - Use shorter function names
> - Drop the use of atomics, they do not work on real hardware
> - Rewrite code and verify on real hardware that CPUs are spinning
>   inside their parking protocol spin-loop code
>
> ---
>  arch/arm/cpu/armv8/Makefile       |   1 +
>  arch/arm/cpu/armv8/acpi_park_v8.S | 107 ++++++++++++++++++++++++++
>  arch/arm/cpu/armv8/start.S        |   4 +
>  arch/arm/include/asm/acpi_table.h |  45 +++++++++++
>  arch/arm/lib/acpi_table.c         | 123 ++++++++++++++++++++++++++++++
>  include/acpi/acpi_table.h         |  10 +++
>  include/bloblist.h                |   1 +
>  lib/Kconfig                       |  15 ++++
>  lib/acpi/acpi_table.c             |   4 +
>  9 files changed, 310 insertions(+)
>  create mode 100644 arch/arm/cpu/armv8/acpi_park_v8.S

Reviewed-by: Simon Glass <sjg at chromium.org>

Just some nits, looks very tidy.

>
> diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
> index bba4f570db..4f4368ff8c 100644
> --- a/arch/arm/cpu/armv8/Makefile
> +++ b/arch/arm/cpu/armv8/Makefile
> @@ -29,6 +29,7 @@ obj-$(CONFIG_ARM_SMCCC)               += smccc-call.o
>
>  ifndef CONFIG_SPL_BUILD
>  obj-$(CONFIG_ARMV8_SPIN_TABLE) += spin_table.o spin_table_v8.o
> +obj-$(CONFIG_ACPI_PARKING_PROTOCOL) += acpi_park_v8.o
>  else
>  obj-$(CONFIG_ARCH_SUNXI) += fel_utils.o
>  endif
> diff --git a/arch/arm/cpu/armv8/acpi_park_v8.S b/arch/arm/cpu/armv8/acpi_park_v8.S
> new file mode 100644
> index 0000000000..4dc7ff216c
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/acpi_park_v8.S
> @@ -0,0 +1,107 @@
> +/* SPDX-License-Identifier: GPL-2.0+ */
> +/*

Could you add a comment here about what this file is for?

> + * Copyright (C) 2024 9elements GmbH
> + *   Author: Patrick Rudolph <patrick.rudolph at 9elements.com>
> + */
> +
> +#include <linux/linkage.h>
> +#include <asm/acpi_table.h>
> +
> +/* Filled by C code */
> +.global acpi_pp_tables
> +acpi_pp_tables:
> +       .quad 0
> +
> +.global acpi_pp_etables
> +acpi_pp_etables:
> +       .quad 0
> +
> +/* Read by C code */
> +.global acpi_pp_code_size
> +acpi_pp_code_size:
> +       .word __secondary_pp_code_end - __secondary_pp_code_start
> +
> +.global acpi_pp_secondary_jump
> +ENTRY(acpi_pp_secondary_jump)
> +0:
> +       /*
> +        * Cannot use atomic operations since the MMU and D-cache
> +        * might be off. Use the MPIDR instead to find the spintable.
> +        */
> +
> +       /* Check if parking protocol table is ready */
> +       ldr     x1, =acpi_pp_tables
> +       ldr     x0, [x1]
> +       cbnz    x0, 0f
> +       wfe
> +       b       0b
> +
> +0:     /* Get end of page tables in x3 */
> +       ldr     x1, =acpi_pp_etables
> +       ldr     x3, [x1]
> +
> +       /* Get own CPU ID in w2 */
> +       mrs     x2, mpidr_el1
> +       lsr     x9, x2, #32
> +       bfi     x2, x9, #24, #8 /* w2 is aff3:aff2:aff1:aff0 */
> +
> +0:     /* Loop over all parking protocol pages */
> +       cmp     x0, x3
> +       b.ge    hlt
> +
> +       /* Fetch CPU_ID from current page */
> +       ldr     x1, [x0, #ACPI_PP_CPU_ID_OFFSET]
> +       lsr     x9, x1, #32
> +       bfi     x1, x9, #24, #8 /* w1 is aff3:aff2:aff1:aff0 */
> +
> +       /* Compare CPU_IDs */
> +       cmp     w1, w2
> +       b.eq    0f
> +
> +       add     x0, x0, #ACPI_PP_PAGE_SIZE
> +       b       0b
> +
> +hlt:   wfi
> +       b       hlt     /* Should never happen. */
> +
> +0:     /* x0 points to the 4K aligned parking protocol page */
> +       add     x2, x0, #ACPI_PP_CPU_CODE_OFFSET
> +
> +       /* Jump to spin code in own parking protocol page */
> +       br      x2
> +ENDPROC(acpi_pp_secondary_jump)
> +
> +.align 8
> +__secondary_pp_code_start:
> +.global acpi_pp_code_start
> +ENTRY(acpi_pp_code_start)
> +       /* x0 points to the 4K aligned parking protocol page */

"4K-aligned, parking-protocol page"

> +
> +       /* Prepare defines for spinning code */
> +       mov     w3, #ACPI_PP_CPU_ID_INVALID
> +       mov     x2, #ACPI_PP_JMP_ADR_INVALID
> +
> +       /* Mark parking protocol page as ready */
> +       str     w3, [x0, #ACPI_PP_CPU_ID_OFFSET]
> +       dsb     sy
> +
> +0:     wfe
> +       ldr     w1, [x0, #ACPI_PP_CPU_ID_OFFSET]
> +
> +       /* Check CPU ID is not invalid */
> +       cmp     w1, w3
> +       b.eq    0b
> +
> +       /* Check jump address not invalid */

s/not invalid/valid/ ?

> +       ldr     x1, [x0, #ACPI_PP_CPU_JMP_ADDR_OFFSET]
> +       cmp     x1, x2
> +       b.eq    0b
> +
> +       /* Clear jump address before jump */
> +       str     x2, [x0, #ACPI_PP_CPU_JMP_ADDR_OFFSET]
> +       dsb     sy
> +
> +       br      x1
> +ENDPROC(acpi_pp_code_start)
> +       /* Secondary Boot Code ends here */
> +__secondary_pp_code_end:
> diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S

The changes to this file should really go in a separate commit since
this is generic code.

> index 7461280261..f56870768e 100644
> --- a/arch/arm/cpu/armv8/start.S
> +++ b/arch/arm/cpu/armv8/start.S
> @@ -178,6 +178,10 @@ pie_fixup_done:
>         branch_if_master x0, master_cpu
>         b       spin_table_secondary_jump
>         /* never return */
> +#elif defined(CONFIG_ACPI_PARKING_PROTOCOL) && !defined(CONFIG_SPL_BUILD)

A comment about what this is doing would be nice

> +       branch_if_master x0, master_cpu
> +       b       acpi_pp_secondary_jump
> +       /* never return */
>  #elif defined(CONFIG_ARMV8_MULTIENTRY)
>         branch_if_master x0, master_cpu
>
> diff --git a/arch/arm/include/asm/acpi_table.h b/arch/arm/include/asm/acpi_table.h
> index c65eabe837..c9ea7e04b4 100644
> --- a/arch/arm/include/asm/acpi_table.h
> +++ b/arch/arm/include/asm/acpi_table.h
> @@ -4,6 +4,7 @@
>  #define __ASM_ACPI_TABLE_H__
>
>  #ifndef __ACPI__
> +#ifndef __ASSEMBLY__
>
>  #include <acpi/acpi_table.h>
>
> @@ -109,7 +110,51 @@ int acpi_pptt_add_cache(struct acpi_ctx *ctx, const u32 flags,
>                         const u32 sets, const u8 assoc, const u8 attributes,
>                         const u16 line_size);
>
> +/* Multi-processor Startup for ARM Platforms */
> +struct acpi_parking_protocol_page {
> +       u64 cpu_id;

What is the CPI ID, actually? How do you obtain it? Can you add a
comment for this struct?

> +       u64 jumping_address;

ulong entry ?

> +       u8 os_reserved[2032];
> +       u8 cpu_spinning_code[2048];

I don't know of any other kind of spinning code that doesn't run on a
cpu...so how about spin_code or spinning_code?

> +} __packed;
> +
> +/* Architecture specific functions */
> +/**
> + * acpi_pp_code_size - Spinloop code size *
> + */
> +extern u16 acpi_pp_code_size;

Please avoid extern in header files

> +
> +/**
> + * acpi_pp_tables - Start of ACPI PP tables.
> + */
> +extern u64 acpi_pp_tables;

Is this used on 32-bit machines? Judging by the armv8 I would say not,
so can we just use ulong here instead of u64?

> +
> +/**
> + * acpi_pp_etables - End of ACPI PP tables.
> + */
> +extern u64 acpi_pp_etables;
> +
> +/**
> + * acpi_pp_code_start() - Spinloop code
> + *
> + * Architectural spinloop code to be installed in each parking protocol
> + * page. The spinloop code must be less than 2048 bytes.
> + *
> + * The spinloop code will be entered after calling
> + * acpi_parking_protocol_install().
> + *
> + */
> +void acpi_pp_code_start(void);
> +
>  #endif /* !__ASSEMBLY__ */
>  #endif /* !__ACPI__ */
>
> +#define ACPI_PP_CPU_ID_INVALID         0xffffffff
> +#define ACPI_PP_JMP_ADR_INVALID                0
> +#define ACPI_PP_PAGE_SIZE              4096
> +#define ACPI_PP_CPU_ID_OFFSET          0
> +#define ACPI_PP_CPU_JMP_ADDR_OFFSET    8

Why do you need ADDR_OFFSET ? It is normally either an address or an
offset in U-Boot. Would ACPI_PP_CPU_JMP_OFFSET be OK?

> +#define ACPI_PP_CPU_CODE_OFFSET                2048
> +#define ACPI_PP_VERSION                        1
> +
>  #endif /* __ASM_ACPI_TABLE_H__ */
> diff --git a/arch/arm/lib/acpi_table.c b/arch/arm/lib/acpi_table.c
> index 75a0bc5b6e..d648d85049 100644
> --- a/arch/arm/lib/acpi_table.c
> +++ b/arch/arm/lib/acpi_table.c
> @@ -10,8 +10,16 @@
>  #include <acpi/acpigen.h>
>  #include <acpi/acpi_device.h>
>  #include <acpi/acpi_table.h>
> +#include <asm-generic/io.h>
>  #include <dm/acpi.h>
> +#include <bloblist.h>
> +#include <cpu_func.h>
> +#include <efi_loader.h>
> +#include <linux/log2.h>
> +#include <linux/sizes.h>
> +#include <malloc.h>
>  #include <string.h>
> +#include <tables_csum.h>

header order

>
>  void acpi_write_madt_gicc(struct acpi_madt_gicc *gicc, uint cpu_num,
>                           uint perf_gsiv, ulong phys_base, ulong gicv,
> @@ -120,3 +128,118 @@ __weak void *acpi_fill_madt(struct acpi_madt *madt, struct acpi_ctx *ctx)
>         acpi_fill_madt_subtbl(ctx);
>         return ctx->current;
>  }
> +
> +/**
> + * acpi_write_pp_setup_one_page() - Fill out one page used by the PP
> + *
> + * Fill out the struct acpi_parking_protocol_page to contain the spin-loop
> + * code and the mailbox area. After this function the page is ready for
> + * the secondary core's to enter the spin-loop code.
> + *
> + * @page:                 Pointer to current parking protocol page
> + * @gicc:                 Pointer to corresponding GICC sub-table
> + */
> +static void acpi_write_pp_setup_one_page(struct acpi_parking_protocol_page *page,

acpi_parking_protocol_page is way too long

How about acpi_park_page ?

> +                                        struct acpi_madt_gicc *gicc)
> +{
> +       void *reloc_addr;

This is a pointer, not an address. An address is always ulong. So just 'reloc'

> +
> +       /* Update GICC. Mark parking protocol as available. */
> +       gicc->parking_proto = ACPI_PP_VERSION;
> +       gicc->parked_addr = virt_to_phys(page);
> +
> +       /* Prepare parking protocol page */
> +       memset(page, '\0', sizeof(struct acpi_parking_protocol_page));
> +
> +       /* Init mailbox. Set MPIDR so core's will find their page. */
> +       page->cpu_id = gicc->mpidr;
> +       page->jumping_address = ACPI_PP_JMP_ADR_INVALID;
> +
> +       /* Relocate spinning code */
> +       reloc_addr = &page->cpu_spinning_code[0];
> +
> +       debug("Relocating spin table from %p to %p (size %x)\n",
> +             &acpi_pp_code_start, reloc_addr, acpi_pp_code_size);

Can you use %lx to print addresses? It avoids 16 digits which makes
them unreadable. Also we generally show addresses for U-Boot, so you
can use map_to_sysmem() to turn a pointer into an address.

> +       memcpy(reloc_addr, &acpi_pp_code_start, acpi_pp_code_size);
> +
> +       if (!CONFIG_IS_ENABLED(SYS_DCACHE_OFF))
> +               flush_dcache_range((unsigned long)page,
> +                                  (unsigned long)(page + 1));
> +}
> +
> +void acpi_write_parking_protocol(struct acpi_madt *madt)
> +{
> +       struct acpi_parking_protocol_page *start, *page;
> +       struct acpi_madt_gicc *gicc;
> +       int ncpus = 0;
> +
> +       /* According to the "Multi-processor Startup for ARM Platforms":

/*
 * - According...

> +        * - Every CPU as specified by MADT GICC has it's own 4K page
> +        * - Every page is divided into two sections: OS and FW reserved
> +        * - Memory occupied by "Parking Protocol" must be marked 'Reserved'
> +        * - Spinloop code should reside in FW reserved 2048 bytes
> +        * - Spinloop code will check the mailbox in OS reserved area
> +        */
> +
> +       if (acpi_pp_code_size > sizeof(page->cpu_spinning_code)) {
> +               log_err("Spinning code too big to fit: %d\n",
> +                       acpi_pp_code_size);
> +               return;
> +       }
> +
> +       /* Count all MADT GICCs including BSP */
> +       for (int i = sizeof(struct acpi_madt); i < madt->header.length;

We normally declare the var at the top of functions.

> +            i += gicc->length) {
> +               gicc = (struct acpi_madt_gicc *)((void *)madt + i);
> +               if (gicc->type != ACPI_APIC_GICC)
> +                       continue;
> +               ncpus++;
> +       }
> +       debug("Found %d GICCs in MADT\n", ncpus);

%#x  ? There could be alot. Up to you though

> +
> +       /* Allocate pages linearly due to assembly code requirements */
> +       if (IS_ENABLED(CONFIG_BLOBLIST_TABLES)) {
> +               start = bloblist_add(BLOBLISTT_ACPI_PP, ACPI_PP_PAGE_SIZE * ncpus,
> +                                    ilog2(SZ_4K));
> +       } else {
> +               start = memalign(ACPI_PP_PAGE_SIZE, ACPI_PP_PAGE_SIZE * ncpus);

Let's use the bloblist always...

> +       }
> +       if (!start) {
> +               log_err("Failed to allocate memory for ACPI parking protocol pages\n");

ACPI-parking-protocol pages

> +               return;
> +       }
> +       log_debug("Allocated parking protocol at %p\n", start);
> +       page = start;
> +
> +       if (IS_ENABLED(CONFIG_EFI_LOADER)) {
> +               /* Default mapping is 'BOOT CODE'. Mark as reserved instead. */
> +               int ret = efi_add_memory_map((u64)(uintptr_t)start,
> +                                            ncpus * ACPI_PP_PAGE_SIZE,
> +                                            EFI_RESERVED_MEMORY_TYPE);
> +
> +               if (ret != EFI_SUCCESS)

if (ret)

> +                       log_err("Reserved memory mapping failed addr %p size %x\n",
> +                               start, ncpus * ACPI_PP_PAGE_SIZE);
> +       }
> +
> +       /* Prepare the parking protocol pages */
> +       for (int i = sizeof(struct acpi_madt); i < madt->header.length;

decl again

> +            i += gicc->length) {
> +               gicc = (struct acpi_madt_gicc *)((void *)madt + i);
> +               if (gicc->type != ACPI_APIC_GICC)
> +                       continue;
> +
> +               acpi_write_pp_setup_one_page(page++, gicc);
> +       }
> +
> +       acpi_pp_etables = virt_to_phys(start) +
> +                         ACPI_PP_PAGE_SIZE * ncpus;
> +       acpi_pp_tables = virt_to_phys(start);
> +
> +       /* Make sure other cores see written value in memory */
> +       flush_dcache_all();

Is the cache ever disabled?

> +
> +       /* Send an event to wake up the secondary CPU. */
> +       asm("dsb        ishst\n"
> +           "sev");
> +}
> diff --git a/include/acpi/acpi_table.h b/include/acpi/acpi_table.h
> index 3dd24b520d..abf69f16ba 100644
> --- a/include/acpi/acpi_table.h
> +++ b/include/acpi/acpi_table.h
> @@ -1237,6 +1237,16 @@ int acpi_iort_add_smmu_v3(struct acpi_ctx *ctx,
>   */
>  void *acpi_fill_madt(struct acpi_madt *madt, struct acpi_ctx *ctx);
>
> +/**
> + * acpi_write_parking_protocol() - Installs the ACPI parking protocol.
> + *
> + * Sets up the ACPI parking protocol and installs the spinning code for
> + * secondary CPUs.
> + *
> + * @madt: The MADT to update
> + */
> +void acpi_write_parking_protocol(struct acpi_madt *madt);

acpi_write_park() ?

> +
>  /**
>   * acpi_get_rsdp_addr() - get ACPI RSDP table address
>   *
> diff --git a/include/bloblist.h b/include/bloblist.h
> index b0706b5637..ff32d3fecf 100644
> --- a/include/bloblist.h
> +++ b/include/bloblist.h
> @@ -110,6 +110,7 @@ enum bloblist_tag_t {
>         BLOBLISTT_ACPI_TABLES = 4,
>         BLOBLISTT_TPM_EVLOG = 5,
>         BLOBLISTT_TPM_CRB_BASE = 6,
> +       BLOBLISTT_ACPI_PP = 7,
>
>         /* Standard area to allocate blobs used across firmware components */
>         BLOBLISTT_AREA_FIRMWARE = 0x10,
> diff --git a/lib/Kconfig b/lib/Kconfig
> index ea444354eb..a93b494288 100644
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -314,6 +314,21 @@ config GENERATE_ACPI_TABLE
>           by the operating system. It defines platform-independent interfaces
>           for configuration and power management monitoring.
>
> +config ACPI_PARKING_PROTOCOL
> +       bool "Support ACPI parking protocol method"
> +       depends on GENERATE_ACPI_TABLE
> +       depends on ARMV8_MULTIENTRY
> +       default y if !SEC_FIRMWARE_ARMV8_PSCI && !ARMV8_PSCI
> +       help
> +         Say Y here to support "ACPI parking protocol" enable method
> +         for booting Linux.
> +
> +         To use this feature, you must do:
> +           - Bring secondary CPUs into U-Boot proper in a board specific

board-specific

> +             manner.  This must be done *after* relocation.  Otherwise, the
> +             secondary CPUs will spin in unprotected memory area because the

in an unprotected memory-area

> +             master CPU protects the relocated spin code.
> +
>  config SPL_TINY_MEMSET
>         bool "Use a very small memset() in SPL"
>         depends on SPL
> diff --git a/lib/acpi/acpi_table.c b/lib/acpi/acpi_table.c
> index 8aab41212a..7b419144dc 100644
> --- a/lib/acpi/acpi_table.c
> +++ b/lib/acpi/acpi_table.c
> @@ -290,6 +290,10 @@ int acpi_write_madt(struct acpi_ctx *ctx, const struct acpi_writer *entry)
>
>         /* (Re)calculate length and checksum */
>         header->length = (uintptr_t)current - (uintptr_t)madt;

Can you cast to void * instead? We try to avoid casts between pointer
and int and use map_sysmem() / map_to_sysmem() instead.

> +
> +       if (IS_ENABLED(CONFIG_ACPI_PARKING_PROTOCOL))
> +               acpi_write_parking_protocol(madt);
> +
>         header->checksum = table_compute_checksum((void *)madt, header->length);
>         acpi_add_table(ctx, madt);
>         ctx->current = (void *)madt + madt->header.length;
> --
> 2.46.0
>

Regards,
Simon


More information about the U-Boot mailing list