[PATCH v2 10/25] x86: mp: Support APs waiting for instructions

Mon Jul 6 07:26:21 CEST 2020

Hi Bin,

On Sun, 28 Jun 2020 at 00:25, Bin Meng <bmeng.cn at gmail.com> wrote:
>
> Hi Simon,
>
> On Mon, Jun 15, 2020 at 1:00 AM Simon Glass <sjg at chromium.org> wrote:
> >
> > At present the APs (non-boot CPUs) are inited once and then parked ready
> > for the OS to use them. However in some cases we want to send new
requests
> > through, such as to change MTRRs and keep them consistent across CPUs.
> >
> > Change the last state of the flight plan to go into a wait loop,
accepting
> > instructions from the main CPU.
> >
> > Signed-off-by: Simon Glass <sjg at chromium.org>
> > ---
> >
> > Changes in v2:
> > - Add more comments
> >
> >  arch/x86/cpu/mp_init.c    | 126 +++++++++++++++++++++++++++++++++++---
> >  arch/x86/include/asm/mp.h |  11 ++++
> >  2 files changed, 128 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/x86/cpu/mp_init.c b/arch/x86/cpu/mp_init.c
> > index cd4cae559d..673fdc9628 100644
> > --- a/arch/x86/cpu/mp_init.c
> > +++ b/arch/x86/cpu/mp_init.c
> > @@ -43,13 +43,38 @@ struct mp_flight_plan {
> >         struct mp_flight_record *records;
> >  };
> >
> > +/**
> > + * struct mp_callback - Callback information for APs
> > + *
> > + * @func: Function to run
> > + * @arg: Argument to pass to the function
> > + * @logical_cpu_number: Either a CPU number (i.e. dev->req_seq) or a
special
> > + *     value like MP_SELECT_BSP. It tells the AP whether it should
process this
> > + *     callback
> > + */
> > +struct mp_callback {
> > +       /**
> > +        * func() - Function to call on the AP
> > +        *
> > +        * @arg: Argument to pass
> > +        */
> > +       void (*func)(void *arg);
> > +       void *arg;
> > +       int logical_cpu_number;
> > +};
> > +
> >  static struct mp_flight_plan mp_info;
> >
> > -struct cpu_map {
> > -       struct udevice *dev;
> > -       int apic_id;
> > -       int err_code;
> > -};
> > +/*
> > + * ap_callbacks - Callback mailbox array
> > + *
> > + * Array of callback, one entry for each available CPU, indexed by the
CPU
> > + * number, which is dev->req_seq. The entry for the main CPU is never
used.
> > + * When this is NULL, there is no pending work for the CPU to run. When
> > + * non-NULL it points to the mp_callback structure. This is shared
between all
> > + * CPUs, so should only be written by the main CPU.
> > + */
> > +static struct mp_callback **ap_callbacks;
> >
> >  static inline void barrier_wait(atomic_t *b)
> >  {
> > @@ -147,11 +172,9 @@ static void ap_init(unsigned int cpu_index)
> >         debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id,
> >               dev ? dev->name : "(apic_id not found)");
> >
> > -       /* Walk the flight plan */
> > +       /* Walk the flight plan, which never returns */
> >         ap_do_flight_plan(dev);
> > -
> > -       /* Park the AP */
> > -       debug("parking\n");
> > +       debug("Unexpected return\n");
> >  done:
> >         stop_this_cpu();
> >  }
> > @@ -455,6 +478,86 @@ static int get_bsp(struct udevice **devp, int
*cpu_countp)
> >         return dev->req_seq;
> >  }
> >
> > +/**
> > + * read_callback() - Read the pointer in a callback slot
> > + *
> > + * This is called by APs to read their callback slow to see if there
is a
> > + * pointer to new instructions
> > + *
> > + * @slot: Pointer to the AP's callback slot
> > + * @return value of that pointer
> > + */
> > +static struct mp_callback *read_callback(struct mp_callback **slot)
> > +{
> > +       struct mp_callback *ret;
> > +
> > +       asm volatile ("mov      %1, %0\n"
> > +               : "=r" (ret)
> > +               : "m" (*slot)
> > +               : "memory"
> > +       );
>
> Why do we need inline assembly here? Prevent compiler reordering the
> codes ("memory")? Can we use C and additional memory barrier to do
> this?

Yes that should work. Will fix.

>
> > +       return ret;
> > +}
> > +
> > +/**
> > + * store_callback() - Store a pointer to the callback slot
> > + *
> > + * This is called by APs to write NULL into the callback slot when
they have
> > + * finished the work requested by the BSP.
> > + *
> > + * @slot: Pointer to the AP's callback slot
> > + * @val: Value to write (e.g. NULL)
> > + */
> > +static void store_callback(struct mp_callback **slot, struct
mp_callback *val)
> > +{
> > +       asm volatile ("mov      %1, %0\n"
> > +               : "=m" (*slot)
> > +               : "r" (val)
> > +               : "memory"
> > +       );
> > +}
> > +
> > +/**
> > + * ap_wait_for_instruction() - Wait for and process requests from the
main CPU
> > + *
> > + * This is called by APs (here, everything other than the main boot
CPU) to
> > + * await instructions. They arrive in the form of a function call and
argument,
> > + * which is then called. This uses a simple mailbox with atomic
read/set
> > + *
> > + * @cpu: CPU that is waiting
> > + * @unused: Optional argument provided by struct mp_flight_record, not
used here
> > + * @return Does not return
> > + */
> > +static int ap_wait_for_instruction(struct udevice *cpu, void *unused)
> > +{
> > +       struct mp_callback lcb;
> > +       struct mp_callback **per_cpu_slot;
> > +
> > +       per_cpu_slot = &ap_callbacks[cpu->req_seq];
> > +
> > +       while (1) {
> > +               struct mp_callback *cb = read_callback(per_cpu_slot);
> > +
> > +               if (!cb) {
> > +                       asm ("pause");
> > +                       continue;
> > +               }
> > +
> > +               /* Copy to local variable before using the value */
> > +               memcpy(&lcb, cb, sizeof(lcb));
> > +               mfence();
> > +               if (lcb.logical_cpu_number == MP_SELECT_ALL ||
> > +                   lcb.logical_cpu_number == MP_SELECT_APS ||
> > +                   cpu->req_seq == lcb.logical_cpu_number)
> > +                       lcb.func(lcb.arg);
> > +
> > +               /* Indicate we are finished */
> > +               store_callback(per_cpu_slot, NULL);
>
> What happens if at the same time BSP put another job to the callback
> slot? There is no protection here.

The BSP is in control of sending jobs to the APs and does not return from
that function (in a later patch) until all APs have finished.

So at present it is not possible for the BSP to start another request on
the same AP.

Regards,
Simon