[PATCH v2 10/25] x86: mp: Support APs waiting for instructions
Bin Meng
bmeng.cn at gmail.com
Sun Jun 28 08:25:33 CEST 2020
Hi Simon,
On Mon, Jun 15, 2020 at 1:00 AM Simon Glass <sjg at chromium.org> wrote:
>
> At present the APs (non-boot CPUs) are inited once and then parked ready
> for the OS to use them. However in some cases we want to send new requests
> through, such as to change MTRRs and keep them consistent across CPUs.
>
> Change the last state of the flight plan to go into a wait loop, accepting
> instructions from the main CPU.
>
> Signed-off-by: Simon Glass <sjg at chromium.org>
> ---
>
> Changes in v2:
> - Add more comments
>
> arch/x86/cpu/mp_init.c | 126 +++++++++++++++++++++++++++++++++++---
> arch/x86/include/asm/mp.h | 11 ++++
> 2 files changed, 128 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/cpu/mp_init.c b/arch/x86/cpu/mp_init.c
> index cd4cae559d..673fdc9628 100644
> --- a/arch/x86/cpu/mp_init.c
> +++ b/arch/x86/cpu/mp_init.c
> @@ -43,13 +43,38 @@ struct mp_flight_plan {
> struct mp_flight_record *records;
> };
>
> +/**
> + * struct mp_callback - Callback information for APs
> + *
> + * @func: Function to run
> + * @arg: Argument to pass to the function
> + * @logical_cpu_number: Either a CPU number (i.e. dev->req_seq) or a special
> + * value like MP_SELECT_BSP. It tells the AP whether it should process this
> + * callback
> + */
> +struct mp_callback {
> + /**
> + * func() - Function to call on the AP
> + *
> + * @arg: Argument to pass
> + */
> + void (*func)(void *arg);
> + void *arg;
> + int logical_cpu_number;
> +};
> +
> static struct mp_flight_plan mp_info;
>
> -struct cpu_map {
> - struct udevice *dev;
> - int apic_id;
> - int err_code;
> -};
> +/*
> + * ap_callbacks - Callback mailbox array
> + *
> + * Array of callback, one entry for each available CPU, indexed by the CPU
> + * number, which is dev->req_seq. The entry for the main CPU is never used.
> + * When this is NULL, there is no pending work for the CPU to run. When
> + * non-NULL it points to the mp_callback structure. This is shared between all
> + * CPUs, so should only be written by the main CPU.
> + */
> +static struct mp_callback **ap_callbacks;
>
> static inline void barrier_wait(atomic_t *b)
> {
> @@ -147,11 +172,9 @@ static void ap_init(unsigned int cpu_index)
> debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id,
> dev ? dev->name : "(apic_id not found)");
>
> - /* Walk the flight plan */
> + /* Walk the flight plan, which never returns */
> ap_do_flight_plan(dev);
> -
> - /* Park the AP */
> - debug("parking\n");
> + debug("Unexpected return\n");
> done:
> stop_this_cpu();
> }
> @@ -455,6 +478,86 @@ static int get_bsp(struct udevice **devp, int *cpu_countp)
> return dev->req_seq;
> }
>
> +/**
> + * read_callback() - Read the pointer in a callback slot
> + *
> + * This is called by APs to read their callback slow to see if there is a
> + * pointer to new instructions
> + *
> + * @slot: Pointer to the AP's callback slot
> + * @return value of that pointer
> + */
> +static struct mp_callback *read_callback(struct mp_callback **slot)
> +{
> + struct mp_callback *ret;
> +
> + asm volatile ("mov %1, %0\n"
> + : "=r" (ret)
> + : "m" (*slot)
> + : "memory"
> + );
Why do we need inline assembly here? Prevent compiler reordering the
codes ("memory")? Can we use C and additional memory barrier to do
this?
> + return ret;
> +}
> +
> +/**
> + * store_callback() - Store a pointer to the callback slot
> + *
> + * This is called by APs to write NULL into the callback slot when they have
> + * finished the work requested by the BSP.
> + *
> + * @slot: Pointer to the AP's callback slot
> + * @val: Value to write (e.g. NULL)
> + */
> +static void store_callback(struct mp_callback **slot, struct mp_callback *val)
> +{
> + asm volatile ("mov %1, %0\n"
> + : "=m" (*slot)
> + : "r" (val)
> + : "memory"
> + );
> +}
> +
> +/**
> + * ap_wait_for_instruction() - Wait for and process requests from the main CPU
> + *
> + * This is called by APs (here, everything other than the main boot CPU) to
> + * await instructions. They arrive in the form of a function call and argument,
> + * which is then called. This uses a simple mailbox with atomic read/set
> + *
> + * @cpu: CPU that is waiting
> + * @unused: Optional argument provided by struct mp_flight_record, not used here
> + * @return Does not return
> + */
> +static int ap_wait_for_instruction(struct udevice *cpu, void *unused)
> +{
> + struct mp_callback lcb;
> + struct mp_callback **per_cpu_slot;
> +
> + per_cpu_slot = &ap_callbacks[cpu->req_seq];
> +
> + while (1) {
> + struct mp_callback *cb = read_callback(per_cpu_slot);
> +
> + if (!cb) {
> + asm ("pause");
> + continue;
> + }
> +
> + /* Copy to local variable before using the value */
> + memcpy(&lcb, cb, sizeof(lcb));
> + mfence();
> + if (lcb.logical_cpu_number == MP_SELECT_ALL ||
> + lcb.logical_cpu_number == MP_SELECT_APS ||
> + cpu->req_seq == lcb.logical_cpu_number)
> + lcb.func(lcb.arg);
> +
> + /* Indicate we are finished */
> + store_callback(per_cpu_slot, NULL);
What happens if at the same time BSP put another job to the callback
slot? There is no protection here.
> + }
> +
> + return 0;
> +}
> +
> static int mp_init_cpu(struct udevice *cpu, void *unused)
> {
> struct cpu_platdata *plat = dev_get_parent_platdata(cpu);
> @@ -467,6 +570,7 @@ static int mp_init_cpu(struct udevice *cpu, void *unused)
>
> static struct mp_flight_record mp_steps[] = {
> MP_FR_BLOCK_APS(mp_init_cpu, NULL, mp_init_cpu, NULL),
> + MP_FR_BLOCK_APS(ap_wait_for_instruction, NULL, NULL, NULL),
> };
>
> int mp_init(void)
> @@ -504,6 +608,10 @@ int mp_init(void)
> if (ret)
> log_warning("Warning: Device tree does not describe all CPUs. Extra ones will not be started correctly\n");
>
> + ap_callbacks = calloc(num_cpus, sizeof(struct mp_callback *));
> + if (!ap_callbacks)
> + return -ENOMEM;
> +
> /* Copy needed parameters so that APs have a reference to the plan */
> mp_info.num_records = ARRAY_SIZE(mp_steps);
> mp_info.records = mp_steps;
> diff --git a/arch/x86/include/asm/mp.h b/arch/x86/include/asm/mp.h
> index 94af819ad9..41b1575f4b 100644
> --- a/arch/x86/include/asm/mp.h
> +++ b/arch/x86/include/asm/mp.h
> @@ -11,6 +11,17 @@
> #include <asm/atomic.h>
> #include <asm/cache.h>
>
> +enum {
> + /* Indicates that the function should run on all CPUs */
> + MP_SELECT_ALL = -1,
> +
> + /* Run on boot CPUs */
> + MP_SELECT_BSP = -2,
> +
> + /* Run on non-boot CPUs */
> + MP_SELECT_APS = -3,
> +};
> +
> typedef int (*mp_callback_t)(struct udevice *cpu, void *arg);
>
> /*
> --
Regards,
Bin
More information about the U-Boot
mailing list