[PATCH v2 10/25] x86: mp: Support APs waiting for instructions

Bin Meng bmeng.cn at gmail.com
Sun Jun 28 08:25:33 CEST 2020


Hi Simon,

On Mon, Jun 15, 2020 at 1:00 AM Simon Glass <sjg at chromium.org> wrote:
>
> At present the APs (non-boot CPUs) are inited once and then parked ready
> for the OS to use them. However in some cases we want to send new requests
> through, such as to change MTRRs and keep them consistent across CPUs.
>
> Change the last state of the flight plan to go into a wait loop, accepting
> instructions from the main CPU.
>
> Signed-off-by: Simon Glass <sjg at chromium.org>
> ---
>
> Changes in v2:
> - Add more comments
>
>  arch/x86/cpu/mp_init.c    | 126 +++++++++++++++++++++++++++++++++++---
>  arch/x86/include/asm/mp.h |  11 ++++
>  2 files changed, 128 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/cpu/mp_init.c b/arch/x86/cpu/mp_init.c
> index cd4cae559d..673fdc9628 100644
> --- a/arch/x86/cpu/mp_init.c
> +++ b/arch/x86/cpu/mp_init.c
> @@ -43,13 +43,38 @@ struct mp_flight_plan {
>         struct mp_flight_record *records;
>  };
>
> +/**
> + * struct mp_callback - Callback information for APs
> + *
> + * @func: Function to run
> + * @arg: Argument to pass to the function
> + * @logical_cpu_number: Either a CPU number (i.e. dev->req_seq) or a special
> + *     value like MP_SELECT_BSP. It tells the AP whether it should process this
> + *     callback
> + */
> +struct mp_callback {
> +       /**
> +        * func() - Function to call on the AP
> +        *
> +        * @arg: Argument to pass
> +        */
> +       void (*func)(void *arg);
> +       void *arg;
> +       int logical_cpu_number;
> +};
> +
>  static struct mp_flight_plan mp_info;
>
> -struct cpu_map {
> -       struct udevice *dev;
> -       int apic_id;
> -       int err_code;
> -};
> +/*
> + * ap_callbacks - Callback mailbox array
> + *
> + * Array of callback, one entry for each available CPU, indexed by the CPU
> + * number, which is dev->req_seq. The entry for the main CPU is never used.
> + * When this is NULL, there is no pending work for the CPU to run. When
> + * non-NULL it points to the mp_callback structure. This is shared between all
> + * CPUs, so should only be written by the main CPU.
> + */
> +static struct mp_callback **ap_callbacks;
>
>  static inline void barrier_wait(atomic_t *b)
>  {
> @@ -147,11 +172,9 @@ static void ap_init(unsigned int cpu_index)
>         debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id,
>               dev ? dev->name : "(apic_id not found)");
>
> -       /* Walk the flight plan */
> +       /* Walk the flight plan, which never returns */
>         ap_do_flight_plan(dev);
> -
> -       /* Park the AP */
> -       debug("parking\n");
> +       debug("Unexpected return\n");
>  done:
>         stop_this_cpu();
>  }
> @@ -455,6 +478,86 @@ static int get_bsp(struct udevice **devp, int *cpu_countp)
>         return dev->req_seq;
>  }
>
> +/**
> + * read_callback() - Read the pointer in a callback slot
> + *
> + * This is called by APs to read their callback slow to see if there is a
> + * pointer to new instructions
> + *
> + * @slot: Pointer to the AP's callback slot
> + * @return value of that pointer
> + */
> +static struct mp_callback *read_callback(struct mp_callback **slot)
> +{
> +       struct mp_callback *ret;
> +
> +       asm volatile ("mov      %1, %0\n"
> +               : "=r" (ret)
> +               : "m" (*slot)
> +               : "memory"
> +       );

Why do we need inline assembly here? Prevent compiler reordering the
codes ("memory")? Can we use C and additional memory barrier to do
this?

> +       return ret;
> +}
> +
> +/**
> + * store_callback() - Store a pointer to the callback slot
> + *
> + * This is called by APs to write NULL into the callback slot when they have
> + * finished the work requested by the BSP.
> + *
> + * @slot: Pointer to the AP's callback slot
> + * @val: Value to write (e.g. NULL)
> + */
> +static void store_callback(struct mp_callback **slot, struct mp_callback *val)
> +{
> +       asm volatile ("mov      %1, %0\n"
> +               : "=m" (*slot)
> +               : "r" (val)
> +               : "memory"
> +       );
> +}
> +
> +/**
> + * ap_wait_for_instruction() - Wait for and process requests from the main CPU
> + *
> + * This is called by APs (here, everything other than the main boot CPU) to
> + * await instructions. They arrive in the form of a function call and argument,
> + * which is then called. This uses a simple mailbox with atomic read/set
> + *
> + * @cpu: CPU that is waiting
> + * @unused: Optional argument provided by struct mp_flight_record, not used here
> + * @return Does not return
> + */
> +static int ap_wait_for_instruction(struct udevice *cpu, void *unused)
> +{
> +       struct mp_callback lcb;
> +       struct mp_callback **per_cpu_slot;
> +
> +       per_cpu_slot = &ap_callbacks[cpu->req_seq];
> +
> +       while (1) {
> +               struct mp_callback *cb = read_callback(per_cpu_slot);
> +
> +               if (!cb) {
> +                       asm ("pause");
> +                       continue;
> +               }
> +
> +               /* Copy to local variable before using the value */
> +               memcpy(&lcb, cb, sizeof(lcb));
> +               mfence();
> +               if (lcb.logical_cpu_number == MP_SELECT_ALL ||
> +                   lcb.logical_cpu_number == MP_SELECT_APS ||
> +                   cpu->req_seq == lcb.logical_cpu_number)
> +                       lcb.func(lcb.arg);
> +
> +               /* Indicate we are finished */
> +               store_callback(per_cpu_slot, NULL);

What happens if at the same time BSP put another job to the callback
slot? There is no protection here.

> +       }
> +
> +       return 0;
> +}
> +
>  static int mp_init_cpu(struct udevice *cpu, void *unused)
>  {
>         struct cpu_platdata *plat = dev_get_parent_platdata(cpu);
> @@ -467,6 +570,7 @@ static int mp_init_cpu(struct udevice *cpu, void *unused)
>
>  static struct mp_flight_record mp_steps[] = {
>         MP_FR_BLOCK_APS(mp_init_cpu, NULL, mp_init_cpu, NULL),
> +       MP_FR_BLOCK_APS(ap_wait_for_instruction, NULL, NULL, NULL),
>  };
>
>  int mp_init(void)
> @@ -504,6 +608,10 @@ int mp_init(void)
>         if (ret)
>                 log_warning("Warning: Device tree does not describe all CPUs. Extra ones will not be started correctly\n");
>
> +       ap_callbacks = calloc(num_cpus, sizeof(struct mp_callback *));
> +       if (!ap_callbacks)
> +               return -ENOMEM;
> +
>         /* Copy needed parameters so that APs have a reference to the plan */
>         mp_info.num_records = ARRAY_SIZE(mp_steps);
>         mp_info.records = mp_steps;
> diff --git a/arch/x86/include/asm/mp.h b/arch/x86/include/asm/mp.h
> index 94af819ad9..41b1575f4b 100644
> --- a/arch/x86/include/asm/mp.h
> +++ b/arch/x86/include/asm/mp.h
> @@ -11,6 +11,17 @@
>  #include <asm/atomic.h>
>  #include <asm/cache.h>
>
> +enum {
> +       /* Indicates that the function should run on all CPUs */
> +       MP_SELECT_ALL   = -1,
> +
> +       /* Run on boot CPUs */
> +       MP_SELECT_BSP   = -2,
> +
> +       /* Run on non-boot CPUs */
> +       MP_SELECT_APS   = -3,
> +};
> +
>  typedef int (*mp_callback_t)(struct udevice *cpu, void *arg);
>
>  /*
> --

Regards,
Bin


More information about the U-Boot mailing list