[U-Boot] [PATCH 3/3] MIPS: Abstract cache op loops with a macro

Fri May 27 16:36:21 CEST 2016

On 05/27/2016 12:30 PM, Paul Burton wrote:
> On Thu, May 26, 2016 at 06:13:17PM +0200, Marek Vasut wrote:
>> On 05/26/2016 05:58 PM, Paul Burton wrote:
>>> The various cache maintenance routines perform a number of loops over
>>> cache lines. Rather than duplicate the code for performing such loops,
>>> abstract it out into a new cache_loop macro which performs an arbitrary
>>> number of cache ops on a range of addresses. This reduces duplication in
>>> the existing L1 cache maintenance code & will allow for not adding
>>> further duplication when introducing L2 cache support.
>>>
>>> Signed-off-by: Paul Burton <paul.burton at imgtec.com>
>>>
>>> ---
>>>
>>>  arch/mips/lib/cache.c | 59 ++++++++++++++++-----------------------------------
>>>  1 file changed, 18 insertions(+), 41 deletions(-)
>>>
>>> diff --git a/arch/mips/lib/cache.c b/arch/mips/lib/cache.c
>>> index 2bb91c6..320335c 100644
>>> --- a/arch/mips/lib/cache.c
>>> +++ b/arch/mips/lib/cache.c
>>> @@ -37,82 +37,59 @@ static inline unsigned long dcache_line_size(void)
>>>  	return 2 << dl;
>>>  }
>>>  
>>> +#define cache_loop(start, end, lsize, ops...) do {			\
>>> +	const void *addr = (const void *)(start & ~(lsize - 1));	\
>>> +	const void *aend = (const void *)((end - 1) & ~(lsize - 1));	\
>>> +	const unsigned int cache_ops[] = { ops };			\
>>
>> Can't you turn this into a function instead and pass some flags to
>> select the ops instead ?
> 
> Hi Marek,

Hi!

> The problem is that then both that function & its callers would need to
> know about the types of cache ops, and there'd need to be some mapping
> from flags to the actual cache op values (of which there'll be a couple
> more once L2 support is added).

But the callers already know which ops they must invoke, right ?
You can just have enum {} for the bit definitions. It's all local
to this file, so I don't see a problem.

> I think it's much simpler & cleaner to
> just go with the macro in this case - it keeps knowledge of which cache
> ops to use with the callers, and it ends up generating the same code as
> before (ie. the inner loop gets unrolled to just the relevant cache
> instructions).

I would expect GCC to take care of inlining the code, but you can always
use the always_inline directive if that's your concern. Even cleaner way
might be to use CONFIG_CC_OPTIMIZE_LIBS_FOR_SPEED and compile the
cache file with -O2 .

> Thanks,
>     Paul
> 
>>
>>> +	unsigned int i;							\
>>> +									\
>>> +	for (; addr <= aend; addr += lsize) {				\
>>> +		for (i = 0; i < ARRAY_SIZE(cache_ops); i++)		\
>>> +			mips_cache(cache_ops[i], addr);			\
>>> +	}								\
>>> +} while (0)
>>> +
>>>  void flush_cache(ulong start_addr, ulong size)
>>>  {
>>>  	unsigned long ilsize = icache_line_size();
>>>  	unsigned long dlsize = dcache_line_size();
>>> -	const void *addr, *aend;
>>>  
>>>  	/* aend will be miscalculated when size is zero, so we return here */
>>>  	if (size == 0)
>>>  		return;
>>>  
>>> -	addr = (const void *)(start_addr & ~(dlsize - 1));
>>> -	aend = (const void *)((start_addr + size - 1) & ~(dlsize - 1));
>>> -
>>>  	if (ilsize == dlsize) {
>>>  		/* flush I-cache & D-cache simultaneously */
>>> -		while (1) {
>>> -			mips_cache(HIT_WRITEBACK_INV_D, addr);
>>> -			mips_cache(HIT_INVALIDATE_I, addr);
>>> -			if (addr == aend)
>>> -				break;
>>> -			addr += dlsize;
>>> -		}
>>> +		cache_loop(start_addr, start_addr + size, ilsize,
>>> +			   HIT_WRITEBACK_INV_D, HIT_INVALIDATE_I);
>>>  		return;
>>>  	}
>>>  
>>>  	/* flush D-cache */
>>> -	while (1) {
>>> -		mips_cache(HIT_WRITEBACK_INV_D, addr);
>>> -		if (addr == aend)
>>> -			break;
>>> -		addr += dlsize;
>>> -	}
>>> +	cache_loop(start_addr, start_addr + size, dlsize, HIT_WRITEBACK_INV_D);
>>>  
>>>  	/* flush I-cache */
>>> -	addr = (const void *)(start_addr & ~(ilsize - 1));
>>> -	aend = (const void *)((start_addr + size - 1) & ~(ilsize - 1));
>>> -	while (1) {
>>> -		mips_cache(HIT_INVALIDATE_I, addr);
>>> -		if (addr == aend)
>>> -			break;
>>> -		addr += ilsize;
>>> -	}
>>> +	cache_loop(start_addr, start_addr + size, ilsize, HIT_INVALIDATE_I);
>>>  }
>>>  
>>>  void flush_dcache_range(ulong start_addr, ulong stop)
>>>  {
>>>  	unsigned long lsize = dcache_line_size();
>>> -	const void *addr = (const void *)(start_addr & ~(lsize - 1));
>>> -	const void *aend = (const void *)((stop - 1) & ~(lsize - 1));
>>>  
>>>  	/* aend will be miscalculated when size is zero, so we return here */
>>>  	if (start_addr == stop)
>>>  		return;
>>>  
>>> -	while (1) {
>>> -		mips_cache(HIT_WRITEBACK_INV_D, addr);
>>> -		if (addr == aend)
>>> -			break;
>>> -		addr += lsize;
>>> -	}
>>> +	cache_loop(start_addr, stop, lsize, HIT_WRITEBACK_INV_D);
>>>  }
>>>  
>>>  void invalidate_dcache_range(ulong start_addr, ulong stop)
>>>  {
>>>  	unsigned long lsize = dcache_line_size();
>>> -	const void *addr = (const void *)(start_addr & ~(lsize - 1));
>>> -	const void *aend = (const void *)((stop - 1) & ~(lsize - 1));
>>>  
>>>  	/* aend will be miscalculated when size is zero, so we return here */
>>>  	if (start_addr == stop)
>>>  		return;
>>>  
>>> -	while (1) {
>>> -		mips_cache(HIT_INVALIDATE_D, addr);
>>> -		if (addr == aend)
>>> -			break;
>>> -		addr += lsize;
>>> -	}
>>> +	cache_loop(start_addr, stop, lsize, HIT_INVALIDATE_I);
>>>  }
>>>
>>
>>
>> -- 
>> Best regards,
>> Marek Vasut

-- 
Best regards,
Marek Vasut