[PATCH v5 27/33] riscv: Fix race conditions when initializing IPI

Sean Anderson seanga2 at gmail.com
Tue Mar 3 00:43:18 CET 2020


On 3/2/20 6:17 PM, Lukas Auer wrote:
> On Fri, 2020-02-28 at 16:05 -0500, Sean Anderson wrote:
> 
>> The IPI code could have race conditions in several places.
>> * Several harts could race on the value of gd->arch->clint/plic
>> * Non-boot harts could race with the main hart on the DM subsystem In
>>   addition, if an IPI was pending when U-Boot started, it would cause the
>>   IPI handler to jump to address 0.
>>
>> To address these problems, a new function riscv_init_ipi is introduced. It
>> is called once during arch_cpu_init_dm. Before this point, no riscv_*_ipi
>> functions may be called. Access is synchronized by gd->arch->ipi_ready.
>>
>> Signed-off-by: Sean Anderson <seanga2 at gmail.com>
>> ---
>>
>> Changes in v5:
>> - New
>>
>>  arch/riscv/cpu/cpu.c                 |  9 ++++
>>  arch/riscv/include/asm/global_data.h |  1 +
>>  arch/riscv/include/asm/smp.h         | 43 ++++++++++++++++++
>>  arch/riscv/lib/andes_plic.c          | 34 +++++---------
>>  arch/riscv/lib/sbi_ipi.c             |  5 ++
>>  arch/riscv/lib/sifive_clint.c        | 33 +++++---------
>>  arch/riscv/lib/smp.c                 | 68 ++++++++--------------------
>>  7 files changed, 101 insertions(+), 92 deletions(-)
>>
>> diff --git a/arch/riscv/cpu/cpu.c b/arch/riscv/cpu/cpu.c
>> index e457f6acbf..a971ec8694 100644
>> --- a/arch/riscv/cpu/cpu.c
>> +++ b/arch/riscv/cpu/cpu.c
>> @@ -96,6 +96,15 @@ int arch_cpu_init_dm(void)
>>  			csr_write(CSR_SATP, 0);
>>  	}
>>  
>> +#ifdef CONFIG_SMP
>> +	ret = riscv_init_ipi();
>> +	if (ret)
>> +		return ret;
>> +
>> +	/* Atomically set a flag enabling IPI handling */
>> +	WRITE_ONCE(gd->arch.ipi_ready, 1);
>> +#endif
>> +
>>  	return 0;
>>  }
>>  
>> diff --git a/arch/riscv/include/asm/global_data.h b/arch/riscv/include/asm/global_data.h
>> index 7276d9763f..b24f8fd2a7 100644
>> --- a/arch/riscv/include/asm/global_data.h
>> +++ b/arch/riscv/include/asm/global_data.h
>> @@ -28,6 +28,7 @@ struct arch_global_data {
>>  #endif
>>  #ifdef CONFIG_SMP
>>  	struct ipi_data ipi[CONFIG_NR_CPUS];
>> +	long ipi_ready; /* Set after riscv_init_ipi is called */
>>  #endif
>>  #ifndef CONFIG_XIP
>>  	ulong available_harts;
>> diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
>> index 74de92ed13..1b428856b2 100644
>> --- a/arch/riscv/include/asm/smp.h
>> +++ b/arch/riscv/include/asm/smp.h
>> @@ -51,4 +51,47 @@ void handle_ipi(ulong hart);
>>   */
>>  int smp_call_function(ulong addr, ulong arg0, ulong arg1, int wait);
>>  
>> +/**
>> + * riscv_init_ipi() - Initialize inter-process interrupt (IPI) driver
>> + *
>> + * Platform code must provide this function. This function is called once after
>> + * the cpu driver is initialized. No other riscv_*_ipi() calls will be made
>> + * before this function is called.
>> + *
>> + * @return 0 if OK, -ve on error
>> + */
>> +int riscv_init_ipi(void);
>> +
>> +/**
>> + * riscv_send_ipi() - Send inter-processor interrupt (IPI)
>> + *
>> + * Platform code must provide this function.
>> + *
>> + * @hart: Hart ID of receiving hart
>> + * @return 0 if OK, -ve on error
>> + */
>> +int riscv_send_ipi(int hart);
>> +
>> +/**
>> + * riscv_clear_ipi() - Clear inter-processor interrupt (IPI)
>> + *
>> + * Platform code must provide this function.
>> + *
>> + * @hart: Hart ID of hart to be cleared
>> + * @return 0 if OK, -ve on error
>> + */
>> +int riscv_clear_ipi(int hart);
>> +
>> +/**
>> + * riscv_get_ipi() - Get status of inter-processor interrupt (IPI)
>> + *
>> + * Platform code must provide this function.
>> + *
>> + * @hart: Hart ID of hart to be checked
>> + * @pending: Pointer to variable with result of the check,
>> + *           1 if IPI is pending, 0 otherwise
>> + * @return 0 if OK, -ve on error
>> + */
>> +int riscv_get_ipi(int hart, int *pending);
>> +
>>  #endif
>> diff --git a/arch/riscv/lib/andes_plic.c b/arch/riscv/lib/andes_plic.c
>> index 20529ab3eb..8484f76386 100644
>> --- a/arch/riscv/lib/andes_plic.c
>> +++ b/arch/riscv/lib/andes_plic.c
>> @@ -30,20 +30,6 @@
>>  #define SEND_IPI_TO_HART(hart)  (0x80 >> (hart))
>>  
>>  DECLARE_GLOBAL_DATA_PTR;
>> -static int init_plic(void);
>> -
>> -#define PLIC_BASE_GET(void)						\
>> -	do {								\
>> -		long *ret;						\
>> -									\
>> -		if (!gd->arch.plic) {					\
>> -			ret = syscon_get_first_range(RISCV_SYSCON_PLIC); \
>> -			if (IS_ERR(ret))				\
>> -				return PTR_ERR(ret);			\
>> -			gd->arch.plic = ret;				\
>> -			init_plic();					\
>> -		}							\
>> -	} while (0)
>>  
>>  static int enable_ipi(int hart)
>>  {
>> @@ -93,13 +79,21 @@ static int init_plic(void)
>>  	return -ENODEV;
>>  }
>>  
>> +int riscv_init_ipi(void)
>> +{
>> +	int ret = syscon_get_first_range(RISCV_SYSCON_PLIC);
>> +
>> +	if (IS_ERR(ret))
>> +		return PTR_ERR(ret);
>> +	gd->arch.plic = ret;
>> +
>> +	return init_plic();
>> +}
>> +
>>  int riscv_send_ipi(int hart)
>>  {
>> -	unsigned int ipi;
>> +	unsigned int ipi = (SEND_IPI_TO_HART(hart) << (8 * gd->arch.boot_hart));
>>  
>> -	PLIC_BASE_GET();
>> -
>> -	ipi = (SEND_IPI_TO_HART(hart) << (8 * gd->arch.boot_hart));
>>  	writel(ipi, (void __iomem *)PENDING_REG(gd->arch.plic,
>>  				gd->arch.boot_hart));
>>  
>> @@ -110,8 +104,6 @@ int riscv_clear_ipi(int hart)
>>  {
>>  	u32 source_id;
>>  
>> -	PLIC_BASE_GET();
>> -
>>  	source_id = readl((void __iomem *)CLAIM_REG(gd->arch.plic, hart));
>>  	writel(source_id, (void __iomem *)CLAIM_REG(gd->arch.plic, hart));
>>  
>> @@ -120,8 +112,6 @@ int riscv_clear_ipi(int hart)
>>  
>>  int riscv_get_ipi(int hart, int *pending)
>>  {
>> -	PLIC_BASE_GET();
>> -
>>  	*pending = readl((void __iomem *)PENDING_REG(gd->arch.plic,
>>  						     gd->arch.boot_hart));
>>  	*pending = !!(*pending & SEND_IPI_TO_HART(hart));
>> diff --git a/arch/riscv/lib/sbi_ipi.c b/arch/riscv/lib/sbi_ipi.c
>> index 9a698ce74e..310d1bd2a4 100644
>> --- a/arch/riscv/lib/sbi_ipi.c
>> +++ b/arch/riscv/lib/sbi_ipi.c
>> @@ -7,6 +7,11 @@
>>  #include <common.h>
>>  #include <asm/sbi.h>
>>  
>> +int riscv_init_ipi(void)
>> +{
>> +	return 0;
>> +}
>> +
>>  int riscv_send_ipi(int hart)
>>  {
>>  	ulong mask;
>> diff --git a/arch/riscv/lib/sifive_clint.c b/arch/riscv/lib/sifive_clint.c
>> index 5e0d25720b..62c1b2b0ef 100644
>> --- a/arch/riscv/lib/sifive_clint.c
>> +++ b/arch/riscv/lib/sifive_clint.c
>> @@ -24,22 +24,8 @@
>>  
>>  DECLARE_GLOBAL_DATA_PTR;
>>  
>> -#define CLINT_BASE_GET(void)						\
>> -	do {								\
>> -		long *ret;						\
>> -									\
>> -		if (!gd->arch.clint) {					\
>> -			ret = syscon_get_first_range(RISCV_SYSCON_CLINT); \
>> -			if (IS_ERR(ret))				\
>> -				return PTR_ERR(ret);			\
>> -			gd->arch.clint = ret;				\
>> -		}							\
>> -	} while (0)
>> -
>>  int riscv_get_time(u64 *time)
>>  {
>> -	CLINT_BASE_GET();
>> -
>>  	*time = readq((void __iomem *)MTIME_REG(gd->arch.clint));
>>  
>>  	return 0;
>> @@ -47,17 +33,24 @@ int riscv_get_time(u64 *time)
>>  
>>  int riscv_set_timecmp(int hart, u64 cmp)
>>  {
>> -	CLINT_BASE_GET();
>> -
>>  	writeq(cmp, (void __iomem *)MTIMECMP_REG(gd->arch.clint, hart));
>>  
>>  	return 0;
>>  }
>>  
>> +int riscv_init_ipi(void)
>> +{
>> +		long *ret = syscon_get_first_range(RISCV_SYSCON_CLINT);
>> +
>> +		if (IS_ERR(ret))
>> +			return PTR_ERR(ret);
>> +		gd->arch.clint = ret;
>> +
>> +		return 0;
>> +}
>> +
> 
> Please fix the indentation here.
>

Ok.

>>  int riscv_send_ipi(int hart)
>>  {
>> -	CLINT_BASE_GET();
>> -
>>  	writel(1, (void __iomem *)MSIP_REG(gd->arch.clint, hart));
>>  
>>  	return 0;
>> @@ -65,8 +58,6 @@ int riscv_send_ipi(int hart)
>>  
>>  int riscv_clear_ipi(int hart)
>>  {
>> -	CLINT_BASE_GET();
>> -
>>  	writel(0, (void __iomem *)MSIP_REG(gd->arch.clint, hart));
>>  
>>  	return 0;
>> @@ -74,8 +65,6 @@ int riscv_clear_ipi(int hart)
>>  
>>  int riscv_get_ipi(int hart, int *pending)
>>  {
>> -	CLINT_BASE_GET();
>> -
>>  	*pending = readl((void __iomem *)MSIP_REG(gd->arch.clint, hart));
>>  
>>  	return 0;
>> diff --git a/arch/riscv/lib/smp.c b/arch/riscv/lib/smp.c
>> index 17adb35730..3b1e52e9b2 100644
>> --- a/arch/riscv/lib/smp.c
>> +++ b/arch/riscv/lib/smp.c
>> @@ -12,38 +12,6 @@
>>  
>>  DECLARE_GLOBAL_DATA_PTR;
>>  
>> -/**
>> - * riscv_send_ipi() - Send inter-processor interrupt (IPI)
>> - *
>> - * Platform code must provide this function.
>> - *
>> - * @hart: Hart ID of receiving hart
>> - * @return 0 if OK, -ve on error
>> - */
>> -extern int riscv_send_ipi(int hart);
>> -
>> -/**
>> - * riscv_clear_ipi() - Clear inter-processor interrupt (IPI)
>> - *
>> - * Platform code must provide this function.
>> - *
>> - * @hart: Hart ID of hart to be cleared
>> - * @return 0 if OK, -ve on error
>> - */
>> -extern int riscv_clear_ipi(int hart);
>> -
>> -/**
>> - * riscv_get_ipi() - Get status of inter-processor interrupt (IPI)
>> - *
>> - * Platform code must provide this function.
>> - *
>> - * @hart: Hart ID of hart to be checked
>> - * @pending: Pointer to variable with result of the check,
>> - *           1 if IPI is pending, 0 otherwise
>> - * @return 0 if OK, -ve on error
>> - */
>> -extern int riscv_get_ipi(int hart, int *pending);
>> -
>>  static int send_ipi_many(struct ipi_data *ipi, int wait)
>>  {
>>  	ofnode node, cpus;
>> @@ -110,37 +78,41 @@ void handle_ipi(ulong hart)
>>  	int ret;
>>  	void (*smp_function)(ulong hart, ulong arg0, ulong arg1);
>>  
>> -	if (hart >= CONFIG_NR_CPUS)
>> +	if (hart >= CONFIG_NR_CPUS || !READ_ONCE(gd->arch.ipi_ready))
>>  		return;
>>  
>> -	__smp_mb();
>> -
>> -	smp_function = (void (*)(ulong, ulong, ulong))gd->arch.ipi[hart].addr;
>> -	invalidate_icache_all();
>> -
> 
> Don't move this. It is intended to be run before the IPI is cleared.

Hm, ok. I think I moved it to after because of the 'if (!smp_function)'
check, but those two don't really need to be done together.

>>  	/*
>>  	 * Clear the IPI to acknowledge the request before jumping to the
>>  	 * requested function.
>>  	 */
>>  	ret = riscv_clear_ipi(hart);
>>  	if (ret) {
>> -		pr_err("Cannot clear IPI of hart %ld\n", hart);
>> +		pr_err("Cannot clear IPI of hart %ld (error %d)\n", hart, ret);
>>  		return;
>>  	}
>>  
>> +	__smp_mb();
>> +
>> +	smp_function = (void (*)(ulong, ulong, ulong))gd->arch.ipi[hart].addr;
>> +	/*
>> +	 * There may be an IPI raised before u-boot begins execution, so check
>> +	 * to ensure we actually have a function to call.
>> +	 */
>> +	if (!smp_function)
>> +		return;
>> +	log_debug("hart = %lu func = %p\n", hart, smp_function);
> 
> The log messages might be corrupted if multiple harts are calling the
> log function here. I have not looked into the details so this might not
> be an issue. In that case it is fine to keep, otherwise please remove
> it.

I ran into this problem a lot when debugging. I ended up implementing a
spinlock around puts/putc. I agree it's probably better to remove this,
but I worry that concurrency bugs will become much harder to track down
without some kind of feedback. (This same criticism applies to the log
message above as well).

> 
> Thanks,
> Lukas
> 
>> +	invalidate_icache_all();
>> +
>>  	smp_function(hart, gd->arch.ipi[hart].arg0, gd->arch.ipi[hart].arg1);
>>  }
>>  
>>  int smp_call_function(ulong addr, ulong arg0, ulong arg1, int wait)
>>  {
>> -	int ret = 0;
>> -	struct ipi_data ipi;
>> +	struct ipi_data ipi = {
>> +		.addr = addr,
>> +		.arg0 = arg0,
>> +		.arg1 = arg1,
>> +	};
>>  
>> -	ipi.addr = addr;
>> -	ipi.arg0 = arg0;
>> -	ipi.arg1 = arg1;
>> -
>> -	ret = send_ipi_many(&ipi, wait);
>> -
>> -	return ret;
>> +	return send_ipi_many(&ipi, wait);
>>  }




More information about the U-Boot mailing list