[U-Boot] [PATCH 1/8] MIPS: avoid .set ISA for cache operations

Wed Jan 28 22:18:02 CET 2015

Am 28.01.2015 um 22:08 schrieb Paul Burton:
> On Wed, Jan 28, 2015 at 09:43:25PM +0100, Daniel Schwierzeck wrote:
>>
>>
>> Am 26.01.2015 um 16:02 schrieb Paul Burton:
>>> As a step towards unifying the cache maintenance code for mips32 &
>>> mips64 CPUs, stop using ".set <ISA>" directives in the more developed
>>> mips32 version of the code. Instead, when present make use of the GCC
>>> builtin for emitting a cache instruction. When not present, simply don't
>>> bother with the .set directives since U-boot always builds with
>>> -march=mips32 or higher anyway.
>>>
>>> Signed-off-by: Paul Burton <paul.burton at imgtec.com>
>>> Cc: Daniel Schwierzeck <daniel.schwierzeck at gmail.com>
>>> ---
>>>  arch/mips/cpu/mips32/cache.S     | 18 +++++-------------
>>>  arch/mips/cpu/mips32/cpu.c       | 40 +++++++++++++++-------------------------
>>>  arch/mips/include/asm/cacheops.h |  7 +++++++
>>>  3 files changed, 27 insertions(+), 38 deletions(-)
>>>
>>> diff --git a/arch/mips/cpu/mips32/cache.S b/arch/mips/cpu/mips32/cache.S
>>> index 22bd844..fb1d84b 100644
>>> --- a/arch/mips/cpu/mips32/cache.S
>>> +++ b/arch/mips/cpu/mips32/cache.S
>>> @@ -22,14 +22,6 @@
>>>  
>>>  #define INDEX_BASE	CKSEG0
>>>  
>>> -	.macro	cache_op op addr
>>> -	.set	push
>>> -	.set	noreorder
>>> -	.set	mips3
>>> -	cache	\op, 0(\addr)
>>> -	.set	pop
>>> -	.endm
>>> -
>>>  	.macro	f_fill64 dst, offset, val
>>>  	LONG_S	\val, (\offset +  0 * LONGSIZE)(\dst)
>>>  	LONG_S	\val, (\offset +  1 * LONGSIZE)(\dst)
>>> @@ -60,17 +52,17 @@ LEAF(mips_init_icache)
>>>  	/* clear tag to invalidate */
>>>  	PTR_LI		t0, INDEX_BASE
>>>  	PTR_ADDU	t1, t0, a1
>>> -1:	cache_op	INDEX_STORE_TAG_I t0
>>> +1:	cache		INDEX_STORE_TAG_I, 0(t0)
>>>  	PTR_ADDU	t0, a2
>>>  	bne		t0, t1, 1b
>>>  	/* fill once, so data field parity is correct */
>>>  	PTR_LI		t0, INDEX_BASE
>>> -2:	cache_op	FILL t0
>>> +2:	cache		FILL, 0(t0)
>>>  	PTR_ADDU	t0, a2
>>>  	bne		t0, t1, 2b
>>>  	/* invalidate again - prudent but not strictly neccessary */
>>>  	PTR_LI		t0, INDEX_BASE
>>> -1:	cache_op	INDEX_STORE_TAG_I t0
>>> +1:	cache		INDEX_STORE_TAG_I, 0(t0)
>>>  	PTR_ADDU	t0, a2
>>>  	bne		t0, t1, 1b
>>>  9:	jr		ra
>>> @@ -85,7 +77,7 @@ LEAF(mips_init_dcache)
>>>  	/* clear all tags */
>>>  	PTR_LI		t0, INDEX_BASE
>>>  	PTR_ADDU	t1, t0, a1
>>> -1:	cache_op	INDEX_STORE_TAG_D t0
>>> +1:	cache		INDEX_STORE_TAG_D, 0(t0)
>>>  	PTR_ADDU	t0, a2
>>>  	bne		t0, t1, 1b
>>>  	/* load from each line (in cached space) */
>>> @@ -95,7 +87,7 @@ LEAF(mips_init_dcache)
>>>  	bne		t0, t1, 2b
>>>  	/* clear all tags */
>>>  	PTR_LI		t0, INDEX_BASE
>>> -1:	cache_op	INDEX_STORE_TAG_D t0
>>> +1:	cache		INDEX_STORE_TAG_D, 0(t0)
>>>  	PTR_ADDU	t0, a2
>>>  	bne		t0, t1, 1b
>>>  9:	jr		ra
>>> diff --git a/arch/mips/cpu/mips32/cpu.c b/arch/mips/cpu/mips32/cpu.c
>>> index 278865b..3f247fb 100644
>>> --- a/arch/mips/cpu/mips32/cpu.c
>>> +++ b/arch/mips/cpu/mips32/cpu.c
>>> @@ -12,16 +12,6 @@
>>>  #include <asm/cacheops.h>
>>>  #include <asm/reboot.h>
>>>  
>>> -#define cache_op(op,addr)						\
>>> -	__asm__ __volatile__(						\
>>> -	"	.set	push					\n"	\
>>> -	"	.set	noreorder				\n"	\
>>> -	"	.set	mips3\n\t				\n"	\
>>> -	"	cache	%0, %1					\n"	\
>>> -	"	.set	pop					\n"	\
>>> -	:								\
>>> -	: "i" (op), "R" (*(unsigned char *)(addr)))
>>> -
>>>  void __attribute__((weak)) _machine_restart(void)
>>>  {
>>>  }
>>> @@ -74,20 +64,20 @@ void flush_cache(ulong start_addr, ulong size)
>>>  {
>>>  	unsigned long ilsize = icache_line_size();
>>>  	unsigned long dlsize = dcache_line_size();
>>> -	unsigned long addr, aend;
>>> +	const volatile void *addr, *aend;
>>
>> why do you need volatile?
>>
> 
> I was just reflecting the type of the argument to __mips_builtin_cache:
> 
>   https://gcc.gnu.org/onlinedocs/gcc/Other-MIPS-Built-in-Functions.html
> 

ok, but then it's sufficient if __mips_builtin_cache adds the modifier.
No need to add it to the variables.

>>>  
>>>  	/* aend will be miscalculated when size is zero, so we return here */
>>>  	if (size == 0)
>>>  		return;
>>>  
>>> -	addr = start_addr & ~(dlsize - 1);
>>> -	aend = (start_addr + size - 1) & ~(dlsize - 1);
>>> +	addr = (void *)(start_addr & ~(dlsize - 1));
>>> +	aend = (void *)((start_addr + size - 1) & ~(dlsize - 1));
>>
>> shouldn't that be (const void *) ?
>>
> 
> I don't think it really matters since the assignment is to a more
> restricted type rather than from one, but I can change it if you like.

sure but I think it's more consistent and corresponds to coding style

> 
>>>  
>>>  	if (ilsize == dlsize) {
>>>  		/* flush I-cache & D-cache simultaneously */
>>>  		while (1) {
>>> -			cache_op(HIT_WRITEBACK_INV_D, addr);
>>> -			cache_op(HIT_INVALIDATE_I, addr);
>>> +			mips_cache(HIT_WRITEBACK_INV_D, addr);
>>> +			mips_cache(HIT_INVALIDATE_I, addr);
>>>  			if (addr == aend)
>>>  				break;
>>>  			addr += dlsize;
>>> @@ -97,17 +87,17 @@ void flush_cache(ulong start_addr, ulong size)
>>>  
>>>  	/* flush D-cache */
>>>  	while (1) {
>>> -		cache_op(HIT_WRITEBACK_INV_D, addr);
>>> +		mips_cache(HIT_WRITEBACK_INV_D, addr);
>>>  		if (addr == aend)
>>>  			break;
>>>  		addr += dlsize;
>>>  	}
>>>  
>>>  	/* flush I-cache */
>>> -	addr = start_addr & ~(ilsize - 1);
>>> -	aend = (start_addr + size - 1) & ~(ilsize - 1);
>>> +	addr = (void *)(start_addr & ~(ilsize - 1));
>>> +	aend = (void *)((start_addr + size - 1) & ~(ilsize - 1));
>>>  	while (1) {
>>> -		cache_op(HIT_INVALIDATE_I, addr);
>>> +		mips_cache(HIT_INVALIDATE_I, addr);
>>>  		if (addr == aend)
>>>  			break;
>>>  		addr += ilsize;
>>> @@ -117,11 +107,11 @@ void flush_cache(ulong start_addr, ulong size)
>>>  void flush_dcache_range(ulong start_addr, ulong stop)
>>>  {
>>>  	unsigned long lsize = dcache_line_size();
>>> -	unsigned long addr = start_addr & ~(lsize - 1);
>>> -	unsigned long aend = (stop - 1) & ~(lsize - 1);
>>> +	const volatile void *addr = (void *)(start_addr & ~(lsize - 1));
>>> +	const volatile void *aend = (void *)((stop - 1) & ~(lsize - 1));
>>>  
>>>  	while (1) {
>>> -		cache_op(HIT_WRITEBACK_INV_D, addr);
>>> +		mips_cache(HIT_WRITEBACK_INV_D, addr);
>>>  		if (addr == aend)
>>>  			break;
>>>  		addr += lsize;
>>> @@ -131,11 +121,11 @@ void flush_dcache_range(ulong start_addr, ulong stop)
>>>  void invalidate_dcache_range(ulong start_addr, ulong stop)
>>>  {
>>>  	unsigned long lsize = dcache_line_size();
>>> -	unsigned long addr = start_addr & ~(lsize - 1);
>>> -	unsigned long aend = (stop - 1) & ~(lsize - 1);
>>> +	const volatile void *addr = (void *)(start_addr & ~(lsize - 1));
>>> +	const volatile void *aend = (void *)((stop - 1) & ~(lsize - 1));
>>>  
>>>  	while (1) {
>>> -		cache_op(HIT_INVALIDATE_D, addr);
>>> +		mips_cache(HIT_INVALIDATE_D, addr);
>>>  		if (addr == aend)
>>>  			break;
>>>  		addr += lsize;
>>> diff --git a/arch/mips/include/asm/cacheops.h b/arch/mips/include/asm/cacheops.h
>>> index 6464250..809c966 100644
>>> --- a/arch/mips/include/asm/cacheops.h
>>> +++ b/arch/mips/include/asm/cacheops.h
>>> @@ -11,6 +11,13 @@
>>>  #ifndef	__ASM_CACHEOPS_H
>>>  #define	__ASM_CACHEOPS_H
>>>  
>>> +#ifdef __GCC_HAVE_BUILTIN_MIPS_CACHE
>>> +# define mips_cache __builtin_mips_cache
>>> +#else
>>> +# define mips_cache(op,addr) \
>>
>> space after op,
>>
> 
> Right you are! In my defence it was copied from the original
> implementation in cpu.c ;)

to match the __builtin_mips_cache prototype, maybe we should use

static inline void mips_cache(int op, const volatile void *addr)
{
        __asm__ __volatile__("cache %0, %1" : : "i"(op), "R"(addr));
}

> 
> Paul
> 
>>> +	__asm__ __volatile__("cache	%0, %1" : : "i"(op), "R"(addr))
>>> +#endif
>>> +
>>>  /*
>>>   * Cache Operations available on all MIPS processors with R4000-style caches
>>>   */
>>>
>>
>> -- 
>> - Daniel

-- 
- Daniel