[PATCH v2 10/25] x86: mp: Support APs waiting for instructions

Simon Glass sjg at chromium.org
Mon Jul 6 07:26:21 CEST 2020


Hi Bin,

On Sun, 28 Jun 2020 at 00:25, Bin Meng <bmeng.cn at gmail.com> wrote:
>
> Hi Simon,
>
> On Mon, Jun 15, 2020 at 1:00 AM Simon Glass <sjg at chromium.org> wrote:
> >
> > At present the APs (non-boot CPUs) are inited once and then parked ready
> > for the OS to use them. However in some cases we want to send new
requests
> > through, such as to change MTRRs and keep them consistent across CPUs.
> >
> > Change the last state of the flight plan to go into a wait loop,
accepting
> > instructions from the main CPU.
> >
> > Signed-off-by: Simon Glass <sjg at chromium.org>
> > ---
> >
> > Changes in v2:
> > - Add more comments
> >
> >  arch/x86/cpu/mp_init.c    | 126 +++++++++++++++++++++++++++++++++++---
> >  arch/x86/include/asm/mp.h |  11 ++++
> >  2 files changed, 128 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/x86/cpu/mp_init.c b/arch/x86/cpu/mp_init.c
> > index cd4cae559d..673fdc9628 100644
> > --- a/arch/x86/cpu/mp_init.c
> > +++ b/arch/x86/cpu/mp_init.c
> > @@ -43,13 +43,38 @@ struct mp_flight_plan {
> >         struct mp_flight_record *records;
> >  };
> >
> > +/**
> > + * struct mp_callback - Callback information for APs
> > + *
> > + * @func: Function to run
> > + * @arg: Argument to pass to the function
> > + * @logical_cpu_number: Either a CPU number (i.e. dev->req_seq) or a
special
> > + *     value like MP_SELECT_BSP. It tells the AP whether it should
process this
> > + *     callback
> > + */
> > +struct mp_callback {
> > +       /**
> > +        * func() - Function to call on the AP
> > +        *
> > +        * @arg: Argument to pass
> > +        */
> > +       void (*func)(void *arg);
> > +       void *arg;
> > +       int logical_cpu_number;
> > +};
> > +
> >  static struct mp_flight_plan mp_info;
> >
> > -struct cpu_map {
> > -       struct udevice *dev;
> > -       int apic_id;
> > -       int err_code;
> > -};
> > +/*
> > + * ap_callbacks - Callback mailbox array
> > + *
> > + * Array of callback, one entry for each available CPU, indexed by the
CPU
> > + * number, which is dev->req_seq. The entry for the main CPU is never
used.
> > + * When this is NULL, there is no pending work for the CPU to run. When
> > + * non-NULL it points to the mp_callback structure. This is shared
between all
> > + * CPUs, so should only be written by the main CPU.
> > + */
> > +static struct mp_callback **ap_callbacks;
> >
> >  static inline void barrier_wait(atomic_t *b)
> >  {
> > @@ -147,11 +172,9 @@ static void ap_init(unsigned int cpu_index)
> >         debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id,
> >               dev ? dev->name : "(apic_id not found)");
> >
> > -       /* Walk the flight plan */
> > +       /* Walk the flight plan, which never returns */
> >         ap_do_flight_plan(dev);
> > -
> > -       /* Park the AP */
> > -       debug("parking\n");
> > +       debug("Unexpected return\n");
> >  done:
> >         stop_this_cpu();
> >  }
> > @@ -455,6 +478,86 @@ static int get_bsp(struct udevice **devp, int
*cpu_countp)
> >         return dev->req_seq;
> >  }
> >
> > +/**
> > + * read_callback() - Read the pointer in a callback slot
> > + *
> > + * This is called by APs to read their callback slow to see if there
is a
> > + * pointer to new instructions
> > + *
> > + * @slot: Pointer to the AP's callback slot
> > + * @return value of that pointer
> > + */
> > +static struct mp_callback *read_callback(struct mp_callback **slot)
> > +{
> > +       struct mp_callback *ret;
> > +
> > +       asm volatile ("mov      %1, %0\n"
> > +               : "=r" (ret)
> > +               : "m" (*slot)
> > +               : "memory"
> > +       );
>
> Why do we need inline assembly here? Prevent compiler reordering the
> codes ("memory")? Can we use C and additional memory barrier to do
> this?

Yes that should work. Will fix.

>
> > +       return ret;
> > +}
> > +
> > +/**
> > + * store_callback() - Store a pointer to the callback slot
> > + *
> > + * This is called by APs to write NULL into the callback slot when
they have
> > + * finished the work requested by the BSP.
> > + *
> > + * @slot: Pointer to the AP's callback slot
> > + * @val: Value to write (e.g. NULL)
> > + */
> > +static void store_callback(struct mp_callback **slot, struct
mp_callback *val)
> > +{
> > +       asm volatile ("mov      %1, %0\n"
> > +               : "=m" (*slot)
> > +               : "r" (val)
> > +               : "memory"
> > +       );
> > +}
> > +
> > +/**
> > + * ap_wait_for_instruction() - Wait for and process requests from the
main CPU
> > + *
> > + * This is called by APs (here, everything other than the main boot
CPU) to
> > + * await instructions. They arrive in the form of a function call and
argument,
> > + * which is then called. This uses a simple mailbox with atomic
read/set
> > + *
> > + * @cpu: CPU that is waiting
> > + * @unused: Optional argument provided by struct mp_flight_record, not
used here
> > + * @return Does not return
> > + */
> > +static int ap_wait_for_instruction(struct udevice *cpu, void *unused)
> > +{
> > +       struct mp_callback lcb;
> > +       struct mp_callback **per_cpu_slot;
> > +
> > +       per_cpu_slot = &ap_callbacks[cpu->req_seq];
> > +
> > +       while (1) {
> > +               struct mp_callback *cb = read_callback(per_cpu_slot);
> > +
> > +               if (!cb) {
> > +                       asm ("pause");
> > +                       continue;
> > +               }
> > +
> > +               /* Copy to local variable before using the value */
> > +               memcpy(&lcb, cb, sizeof(lcb));
> > +               mfence();
> > +               if (lcb.logical_cpu_number == MP_SELECT_ALL ||
> > +                   lcb.logical_cpu_number == MP_SELECT_APS ||
> > +                   cpu->req_seq == lcb.logical_cpu_number)
> > +                       lcb.func(lcb.arg);
> > +
> > +               /* Indicate we are finished */
> > +               store_callback(per_cpu_slot, NULL);
>
> What happens if at the same time BSP put another job to the callback
> slot? There is no protection here.

The BSP is in control of sending jobs to the APs and does not return from
that function (in a later patch) until all APs have finished.

So at present it is not possible for the BSP to start another request on
the same AP.

Regards,
Simon


More information about the U-Boot mailing list