[PATCH v5 27/33] riscv: Fix race conditions when initializing IPI
Sean Anderson
seanga2 at gmail.com
Tue Mar 3 00:43:18 CET 2020
On 3/2/20 6:17 PM, Lukas Auer wrote:
> On Fri, 2020-02-28 at 16:05 -0500, Sean Anderson wrote:
>
>> The IPI code could have race conditions in several places.
>> * Several harts could race on the value of gd->arch->clint/plic
>> * Non-boot harts could race with the main hart on the DM subsystem In
>> addition, if an IPI was pending when U-Boot started, it would cause the
>> IPI handler to jump to address 0.
>>
>> To address these problems, a new function riscv_init_ipi is introduced. It
>> is called once during arch_cpu_init_dm. Before this point, no riscv_*_ipi
>> functions may be called. Access is synchronized by gd->arch->ipi_ready.
>>
>> Signed-off-by: Sean Anderson <seanga2 at gmail.com>
>> ---
>>
>> Changes in v5:
>> - New
>>
>> arch/riscv/cpu/cpu.c | 9 ++++
>> arch/riscv/include/asm/global_data.h | 1 +
>> arch/riscv/include/asm/smp.h | 43 ++++++++++++++++++
>> arch/riscv/lib/andes_plic.c | 34 +++++---------
>> arch/riscv/lib/sbi_ipi.c | 5 ++
>> arch/riscv/lib/sifive_clint.c | 33 +++++---------
>> arch/riscv/lib/smp.c | 68 ++++++++--------------------
>> 7 files changed, 101 insertions(+), 92 deletions(-)
>>
>> diff --git a/arch/riscv/cpu/cpu.c b/arch/riscv/cpu/cpu.c
>> index e457f6acbf..a971ec8694 100644
>> --- a/arch/riscv/cpu/cpu.c
>> +++ b/arch/riscv/cpu/cpu.c
>> @@ -96,6 +96,15 @@ int arch_cpu_init_dm(void)
>> csr_write(CSR_SATP, 0);
>> }
>>
>> +#ifdef CONFIG_SMP
>> + ret = riscv_init_ipi();
>> + if (ret)
>> + return ret;
>> +
>> + /* Atomically set a flag enabling IPI handling */
>> + WRITE_ONCE(gd->arch.ipi_ready, 1);
>> +#endif
>> +
>> return 0;
>> }
>>
>> diff --git a/arch/riscv/include/asm/global_data.h b/arch/riscv/include/asm/global_data.h
>> index 7276d9763f..b24f8fd2a7 100644
>> --- a/arch/riscv/include/asm/global_data.h
>> +++ b/arch/riscv/include/asm/global_data.h
>> @@ -28,6 +28,7 @@ struct arch_global_data {
>> #endif
>> #ifdef CONFIG_SMP
>> struct ipi_data ipi[CONFIG_NR_CPUS];
>> + long ipi_ready; /* Set after riscv_init_ipi is called */
>> #endif
>> #ifndef CONFIG_XIP
>> ulong available_harts;
>> diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
>> index 74de92ed13..1b428856b2 100644
>> --- a/arch/riscv/include/asm/smp.h
>> +++ b/arch/riscv/include/asm/smp.h
>> @@ -51,4 +51,47 @@ void handle_ipi(ulong hart);
>> */
>> int smp_call_function(ulong addr, ulong arg0, ulong arg1, int wait);
>>
>> +/**
>> + * riscv_init_ipi() - Initialize inter-process interrupt (IPI) driver
>> + *
>> + * Platform code must provide this function. This function is called once after
>> + * the cpu driver is initialized. No other riscv_*_ipi() calls will be made
>> + * before this function is called.
>> + *
>> + * @return 0 if OK, -ve on error
>> + */
>> +int riscv_init_ipi(void);
>> +
>> +/**
>> + * riscv_send_ipi() - Send inter-processor interrupt (IPI)
>> + *
>> + * Platform code must provide this function.
>> + *
>> + * @hart: Hart ID of receiving hart
>> + * @return 0 if OK, -ve on error
>> + */
>> +int riscv_send_ipi(int hart);
>> +
>> +/**
>> + * riscv_clear_ipi() - Clear inter-processor interrupt (IPI)
>> + *
>> + * Platform code must provide this function.
>> + *
>> + * @hart: Hart ID of hart to be cleared
>> + * @return 0 if OK, -ve on error
>> + */
>> +int riscv_clear_ipi(int hart);
>> +
>> +/**
>> + * riscv_get_ipi() - Get status of inter-processor interrupt (IPI)
>> + *
>> + * Platform code must provide this function.
>> + *
>> + * @hart: Hart ID of hart to be checked
>> + * @pending: Pointer to variable with result of the check,
>> + * 1 if IPI is pending, 0 otherwise
>> + * @return 0 if OK, -ve on error
>> + */
>> +int riscv_get_ipi(int hart, int *pending);
>> +
>> #endif
>> diff --git a/arch/riscv/lib/andes_plic.c b/arch/riscv/lib/andes_plic.c
>> index 20529ab3eb..8484f76386 100644
>> --- a/arch/riscv/lib/andes_plic.c
>> +++ b/arch/riscv/lib/andes_plic.c
>> @@ -30,20 +30,6 @@
>> #define SEND_IPI_TO_HART(hart) (0x80 >> (hart))
>>
>> DECLARE_GLOBAL_DATA_PTR;
>> -static int init_plic(void);
>> -
>> -#define PLIC_BASE_GET(void) \
>> - do { \
>> - long *ret; \
>> - \
>> - if (!gd->arch.plic) { \
>> - ret = syscon_get_first_range(RISCV_SYSCON_PLIC); \
>> - if (IS_ERR(ret)) \
>> - return PTR_ERR(ret); \
>> - gd->arch.plic = ret; \
>> - init_plic(); \
>> - } \
>> - } while (0)
>>
>> static int enable_ipi(int hart)
>> {
>> @@ -93,13 +79,21 @@ static int init_plic(void)
>> return -ENODEV;
>> }
>>
>> +int riscv_init_ipi(void)
>> +{
>> + int ret = syscon_get_first_range(RISCV_SYSCON_PLIC);
>> +
>> + if (IS_ERR(ret))
>> + return PTR_ERR(ret);
>> + gd->arch.plic = ret;
>> +
>> + return init_plic();
>> +}
>> +
>> int riscv_send_ipi(int hart)
>> {
>> - unsigned int ipi;
>> + unsigned int ipi = (SEND_IPI_TO_HART(hart) << (8 * gd->arch.boot_hart));
>>
>> - PLIC_BASE_GET();
>> -
>> - ipi = (SEND_IPI_TO_HART(hart) << (8 * gd->arch.boot_hart));
>> writel(ipi, (void __iomem *)PENDING_REG(gd->arch.plic,
>> gd->arch.boot_hart));
>>
>> @@ -110,8 +104,6 @@ int riscv_clear_ipi(int hart)
>> {
>> u32 source_id;
>>
>> - PLIC_BASE_GET();
>> -
>> source_id = readl((void __iomem *)CLAIM_REG(gd->arch.plic, hart));
>> writel(source_id, (void __iomem *)CLAIM_REG(gd->arch.plic, hart));
>>
>> @@ -120,8 +112,6 @@ int riscv_clear_ipi(int hart)
>>
>> int riscv_get_ipi(int hart, int *pending)
>> {
>> - PLIC_BASE_GET();
>> -
>> *pending = readl((void __iomem *)PENDING_REG(gd->arch.plic,
>> gd->arch.boot_hart));
>> *pending = !!(*pending & SEND_IPI_TO_HART(hart));
>> diff --git a/arch/riscv/lib/sbi_ipi.c b/arch/riscv/lib/sbi_ipi.c
>> index 9a698ce74e..310d1bd2a4 100644
>> --- a/arch/riscv/lib/sbi_ipi.c
>> +++ b/arch/riscv/lib/sbi_ipi.c
>> @@ -7,6 +7,11 @@
>> #include <common.h>
>> #include <asm/sbi.h>
>>
>> +int riscv_init_ipi(void)
>> +{
>> + return 0;
>> +}
>> +
>> int riscv_send_ipi(int hart)
>> {
>> ulong mask;
>> diff --git a/arch/riscv/lib/sifive_clint.c b/arch/riscv/lib/sifive_clint.c
>> index 5e0d25720b..62c1b2b0ef 100644
>> --- a/arch/riscv/lib/sifive_clint.c
>> +++ b/arch/riscv/lib/sifive_clint.c
>> @@ -24,22 +24,8 @@
>>
>> DECLARE_GLOBAL_DATA_PTR;
>>
>> -#define CLINT_BASE_GET(void) \
>> - do { \
>> - long *ret; \
>> - \
>> - if (!gd->arch.clint) { \
>> - ret = syscon_get_first_range(RISCV_SYSCON_CLINT); \
>> - if (IS_ERR(ret)) \
>> - return PTR_ERR(ret); \
>> - gd->arch.clint = ret; \
>> - } \
>> - } while (0)
>> -
>> int riscv_get_time(u64 *time)
>> {
>> - CLINT_BASE_GET();
>> -
>> *time = readq((void __iomem *)MTIME_REG(gd->arch.clint));
>>
>> return 0;
>> @@ -47,17 +33,24 @@ int riscv_get_time(u64 *time)
>>
>> int riscv_set_timecmp(int hart, u64 cmp)
>> {
>> - CLINT_BASE_GET();
>> -
>> writeq(cmp, (void __iomem *)MTIMECMP_REG(gd->arch.clint, hart));
>>
>> return 0;
>> }
>>
>> +int riscv_init_ipi(void)
>> +{
>> + long *ret = syscon_get_first_range(RISCV_SYSCON_CLINT);
>> +
>> + if (IS_ERR(ret))
>> + return PTR_ERR(ret);
>> + gd->arch.clint = ret;
>> +
>> + return 0;
>> +}
>> +
>
> Please fix the indentation here.
>
Ok.
>> int riscv_send_ipi(int hart)
>> {
>> - CLINT_BASE_GET();
>> -
>> writel(1, (void __iomem *)MSIP_REG(gd->arch.clint, hart));
>>
>> return 0;
>> @@ -65,8 +58,6 @@ int riscv_send_ipi(int hart)
>>
>> int riscv_clear_ipi(int hart)
>> {
>> - CLINT_BASE_GET();
>> -
>> writel(0, (void __iomem *)MSIP_REG(gd->arch.clint, hart));
>>
>> return 0;
>> @@ -74,8 +65,6 @@ int riscv_clear_ipi(int hart)
>>
>> int riscv_get_ipi(int hart, int *pending)
>> {
>> - CLINT_BASE_GET();
>> -
>> *pending = readl((void __iomem *)MSIP_REG(gd->arch.clint, hart));
>>
>> return 0;
>> diff --git a/arch/riscv/lib/smp.c b/arch/riscv/lib/smp.c
>> index 17adb35730..3b1e52e9b2 100644
>> --- a/arch/riscv/lib/smp.c
>> +++ b/arch/riscv/lib/smp.c
>> @@ -12,38 +12,6 @@
>>
>> DECLARE_GLOBAL_DATA_PTR;
>>
>> -/**
>> - * riscv_send_ipi() - Send inter-processor interrupt (IPI)
>> - *
>> - * Platform code must provide this function.
>> - *
>> - * @hart: Hart ID of receiving hart
>> - * @return 0 if OK, -ve on error
>> - */
>> -extern int riscv_send_ipi(int hart);
>> -
>> -/**
>> - * riscv_clear_ipi() - Clear inter-processor interrupt (IPI)
>> - *
>> - * Platform code must provide this function.
>> - *
>> - * @hart: Hart ID of hart to be cleared
>> - * @return 0 if OK, -ve on error
>> - */
>> -extern int riscv_clear_ipi(int hart);
>> -
>> -/**
>> - * riscv_get_ipi() - Get status of inter-processor interrupt (IPI)
>> - *
>> - * Platform code must provide this function.
>> - *
>> - * @hart: Hart ID of hart to be checked
>> - * @pending: Pointer to variable with result of the check,
>> - * 1 if IPI is pending, 0 otherwise
>> - * @return 0 if OK, -ve on error
>> - */
>> -extern int riscv_get_ipi(int hart, int *pending);
>> -
>> static int send_ipi_many(struct ipi_data *ipi, int wait)
>> {
>> ofnode node, cpus;
>> @@ -110,37 +78,41 @@ void handle_ipi(ulong hart)
>> int ret;
>> void (*smp_function)(ulong hart, ulong arg0, ulong arg1);
>>
>> - if (hart >= CONFIG_NR_CPUS)
>> + if (hart >= CONFIG_NR_CPUS || !READ_ONCE(gd->arch.ipi_ready))
>> return;
>>
>> - __smp_mb();
>> -
>> - smp_function = (void (*)(ulong, ulong, ulong))gd->arch.ipi[hart].addr;
>> - invalidate_icache_all();
>> -
>
> Don't move this. It is intended to be run before the IPI is cleared.
Hm, ok. I think I moved it to after because of the 'if (!smp_function)'
check, but those two don't really need to be done together.
>> /*
>> * Clear the IPI to acknowledge the request before jumping to the
>> * requested function.
>> */
>> ret = riscv_clear_ipi(hart);
>> if (ret) {
>> - pr_err("Cannot clear IPI of hart %ld\n", hart);
>> + pr_err("Cannot clear IPI of hart %ld (error %d)\n", hart, ret);
>> return;
>> }
>>
>> + __smp_mb();
>> +
>> + smp_function = (void (*)(ulong, ulong, ulong))gd->arch.ipi[hart].addr;
>> + /*
>> + * There may be an IPI raised before u-boot begins execution, so check
>> + * to ensure we actually have a function to call.
>> + */
>> + if (!smp_function)
>> + return;
>> + log_debug("hart = %lu func = %p\n", hart, smp_function);
>
> The log messages might be corrupted if multiple harts are calling the
> log function here. I have not looked into the details so this might not
> be an issue. In that case it is fine to keep, otherwise please remove
> it.
I ran into this problem a lot when debugging. I ended up implementing a
spinlock around puts/putc. I agree it's probably better to remove this,
but I worry that concurrency bugs will become much harder to track down
without some kind of feedback. (This same criticism applies to the log
message above as well).
>
> Thanks,
> Lukas
>
>> + invalidate_icache_all();
>> +
>> smp_function(hart, gd->arch.ipi[hart].arg0, gd->arch.ipi[hart].arg1);
>> }
>>
>> int smp_call_function(ulong addr, ulong arg0, ulong arg1, int wait)
>> {
>> - int ret = 0;
>> - struct ipi_data ipi;
>> + struct ipi_data ipi = {
>> + .addr = addr,
>> + .arg0 = arg0,
>> + .arg1 = arg1,
>> + };
>>
>> - ipi.addr = addr;
>> - ipi.arg0 = arg0;
>> - ipi.arg1 = arg1;
>> -
>> - ret = send_ipi_many(&ipi, wait);
>> -
>> - return ret;
>> + return send_ipi_many(&ipi, wait);
>> }
More information about the U-Boot
mailing list