[U-Boot] [PATCH 2/8] armv7: cache maintenance operations for armv7

Albert ARIBAUD albert.aribaud at free.fr
Sat Jan 8 07:36:44 CET 2011


Hi Aneesh,

Le 22/12/2010 12:54, Aneesh V a écrit :
> - Add a framework for layered cache maintenance
> 	- separate out SOC specific outer cache maintenance from
> 	  maintenance of caches known to CPU
>
> - Add generic ARMv7 cache maintenance operations that affect all
>    caches known to ARMv7 CPUs. For instance in Cortex-A8 these
>    opertions will affect both L1 and L2 caches. In Cortex-A9
>    these will affect only L1 cache
>
> - D-cache operations supported:
> 	- Invalidate entire D-cache
> 	- Invalidate D-cache range
> 	- Flush(clean&  invalidate) entire D-cache
> 	- Flush D-cache range
> - I-cache operations supported:
> 	- Invalidate entire I-cache
>
> - Add maintenance functions for TLB, branch predictor array etc.
>
> - Enable -march=armv7-a so that armv7 assembly instructions can be
>    used
>
> Signed-off-by: Aneesh V<aneesh at ti.com>
> ---
>   arch/arm/cpu/armv7/Makefile   |    2 +-
>   arch/arm/cpu/armv7/cache_v7.c |  359 +++++++++++++++++++++++++++++++++++++++++
>   arch/arm/cpu/armv7/config.mk  |    2 +-
>   arch/arm/include/asm/armv7.h  |   63 +++++++
>   include/common.h              |    5 +-
>   5 files changed, 428 insertions(+), 3 deletions(-)
>   create mode 100644 arch/arm/cpu/armv7/cache_v7.c
>   create mode 100644 arch/arm/include/asm/armv7.h
>
> diff --git a/arch/arm/cpu/armv7/Makefile b/arch/arm/cpu/armv7/Makefile
> index 8c0e915..299792a 100644
> --- a/arch/arm/cpu/armv7/Makefile
> +++ b/arch/arm/cpu/armv7/Makefile
> @@ -26,7 +26,7 @@ include $(TOPDIR)/config.mk
>   LIB	= $(obj)lib$(CPU).o
>
>   START	:= start.o
> -COBJS	:= cpu.o
> +COBJS	:= cpu.o cache_v7.o
>   COBJS  += syslib.o
>
>   SRCS	:= $(START:.o=.S) $(COBJS:.o=.c)
> diff --git a/arch/arm/cpu/armv7/cache_v7.c b/arch/arm/cpu/armv7/cache_v7.c
> new file mode 100644
> index 0000000..0521d66
> --- /dev/null
> +++ b/arch/arm/cpu/armv7/cache_v7.c
> @@ -0,0 +1,359 @@
> +/*
> + * (C) Copyright 2010
> + * Texas Instruments Incorporated - http://www.ti.com/
> + * Aneesh V<aneesh at ti.com>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +#include<linux/types.h>
> +#include<common.h>
> +#include<asm/armv7.h>
> +
> +#define ARMV7_DCACHE_INVAL_ALL		1
> +#define ARMV7_DCACHE_CLEAN_INVAL_ALL	2
> +#define ARMV7_DCACHE_INVAL_RANGE	3
> +#define ARMV7_DCACHE_CLEAN_INVAL_RANGE	4
> +
> +struct v7_outer_cache_ops v7_outer_cache;
> +
> +#ifndef CONFIG_SYS_NO_DCACHE
> +/*
> + * Write the level and type you want to Cache Size Selection Register(CSSELR)
> + * to get size details from Current Cache Size ID Register(CCSIDR)
> + */
> +static void set_csselr(u32 level, u32 type)
> +{	u32 csselr = level<<  1 | type;
> +	/* Write to Cache Size Selection Register(CSSELR) */
> +	asm volatile ("mcr p15, 2, %0, c0, c0, 0" : : "r" (csselr));
> +}
> +
> +static u32 get_ccsidr(void)
> +{
> +	u32 ccsidr;
> +	/* Read current CP15 Cache Size ID Register */
> +	asm volatile ("mrc p15, 1, %0, c0, c0, 0" : "=r" (ccsidr));
> +	return ccsidr;
> +}
> +
> +static u32 get_clidr(void)
> +{
> +	u32 clidr;
> +	/* Read current CP15 Cache Level ID Register */
> +	asm volatile ("mrc p15,1,%0,c0,c0,1" : "=r" (clidr));
> +	return clidr;
> +}
> +
> +/* round up the input number to a power of 2 and get the log2 */
> +static inline u32 log_2_round_up(u32 num)
> +{
> +	/* count leading zeros */
> +	asm volatile ("CLZ %0, %0" : "+r" (num));
> +
> +	/* position of most significant 1 */
> +	num = 31 - num;
> +
> +	return num;
> +}
> +
> +static void v7_inval_dcache_level_setway(u32 level, u32 num_sets,
> +					 u32 num_ways, u32 way_shift,
> +					 u32 log2_line_len)
> +{
> +	int way, set, setway;
> +	/*
> +	 * For optimal assembly code:
> +	 *	a. count down
> +	 *	b. have bigger loop inside
> +	 */

Out of curiosity, can you elaborate on why the compiler would optimize 
better in these cases?

> +	for (way = num_ways - 1; way>= 0 ; way--)
> +		for (set = num_sets - 1; set>= 0; set--) {

Please fix whitespacing around operators. The best way to ''catch'em 
all'' is to run Linux' checkpatch.pl (I do this with option --no-tree) 
on all patches that you submit to u-boot and, fix all warning and errors 
and if some are left that you think should not be fixed, mention them 
and explain why they're wrongly emitted.

> +			setway = (level<<  1) | (set<<  log2_line_len) |
> +				 (way<<  way_shift);
> +			/* Invalidate data/unified cache line by set/way */
> +			asm volatile ("	mcr p15, 0, %0, c7, c6, 2"
> +					: : "r" (setway));
> +		}
> +	/* Make sure the operation is complete */
> +	asm volatile ("DMB");
> +}
> +
> +static void v7_clean_inval_dcache_level_setway(u32 level, u32 num_sets,
> +					       u32 num_ways, u32 way_shift,
> +					       u32 log2_line_len)
> +{
> +	int way, set, setway;
> +	/*
> +	 * For optimal assembly code:
> +	 *	a. count down
> +	 *	b. have bigger loop inside
> +	 */
> +	for (way = num_ways - 1; way>= 0 ; way--)
> +		for (set = num_sets - 1; set>= 0; set--) {
> +			setway = (level<<  1) | (set<<  log2_line_len) |
> +				 (way<<  way_shift);
> +			/*
> +			 * Clean&  Invalidate data/unified
> +			 * cache line by set/way
> +			 */
> +			asm volatile ("	mcr p15, 0, %0, c7, c14, 2"
> +					: : "r" (setway));
> +		}
> +	/* Make sure the operation is complete */
> +	asm volatile ("DMB");
> +}
> +
> +static void v7_maint_dcache_level_setway(u32 level, u32 operation)
> +{
> +	u32 ccsidr;
> +	u32 num_sets, num_ways, log2_line_len, log2_num_ways;
> +	u32 way_shift;
> +	set_csselr(level, ARMV7_CSSELR_IND_DATA_UNIFIED);
> +
> +	ccsidr = get_ccsidr();
> +
> +	log2_line_len = mask_n_get(ccsidr, 0, 2) + 2;
> +	/* Converting from words to bytes */
> +	log2_line_len += 2;
> +
> +	num_ways  = mask_n_get(ccsidr, 3, 12) + 1;
> +	num_sets  = mask_n_get(ccsidr, 13, 27) + 1;
> +	/*
> +	 * According to ARMv7 ARM number of sets and number of ways need
> +	 * not be a power of 2
> +	 */
> +	log2_num_ways = log_2_round_up(num_ways);
> +
> +	way_shift = (32 - log2_num_ways);
> +	if (operation == ARMV7_DCACHE_INVAL_ALL)
> +		v7_inval_dcache_level_setway(level, num_sets, num_ways,
> +				      way_shift, log2_line_len);
> +	else if (operation == ARMV7_DCACHE_CLEAN_INVAL_ALL)
> +		v7_clean_inval_dcache_level_setway(level, num_sets, num_ways,
> +						   way_shift, log2_line_len);
> +}
> +
> +static void v7_maint_dcache_all(u32 operation)
> +{
> +	u32 level, cache_type, level_start_bit = 0;
> +
> +	u32 clidr = get_clidr();
> +
> +	for (level = 0; level<  7; level++) {
> +		cache_type = mask_n_get(clidr, level_start_bit,
> +					level_start_bit + 2);
> +		if ((cache_type == ARMV7_CLIDR_CTYPE_DATA_ONLY) ||
> +		    (cache_type == ARMV7_CLIDR_CTYPE_INSTRUCTION_DATA) ||
> +		    (cache_type == ARMV7_CLIDR_CTYPE_UNIFIED))
> +			v7_maint_dcache_level_setway(level, operation);
> +		level_start_bit += 3;
> +	}
> +}
> +
> +static void v7_dcache_clean_inval_range(u32 start,
> +					u32 stop, u32 line_len)
> +{
> +	u32 mva;
> +	/* Align start to cache line boundary */
> +	start&= ~(line_len - 1);
> +	for (mva = start; mva<  stop; mva = mva + line_len)
> +		/* DCCIMVAC - Clean&  Invalidate data cache by MVA to PoC */
> +		asm volatile ("mcr p15, 0, %0, c7, c14, 1" : : "r" (mva));
> +}
> +
> +static void v7_dcache_inval_range(u32 start, u32 stop, u32 line_len)
> +{
> +	u32 mva;
> +
> +	/*
> +	 * If start address is not aligned to cache-line flush the first
> +	 * line to prevent affecting somebody else's buffer
> +	 */
> +	if (start&  (line_len - 1)) {
> +		v7_dcache_clean_inval_range(start, start + 1, line_len);
> +		/* move to next cache line */
> +		start = (start + line_len - 1)&  ~(line_len - 1);
> +	}
> +
> +	/*
> +	 * If stop address is not aligned to cache-line flush the last
> +	 * line to prevent affecting somebody else's buffer
> +	 */
> +	if (stop&  (line_len - 1)) {
> +		v7_dcache_clean_inval_range(stop, stop + 1, line_len);
> +		/* align to the beginning of this cache line */
> +		stop&= ~(line_len - 1);
> +	}
> +
> +	for (mva = start; mva<  stop; mva = mva + line_len)
> +		/* DCIMVAC - Invalidate data cache by MVA to PoC */
> +		asm volatile ("mcr p15, 0, %0, c7, c6, 1" : : "r" (mva));
> +}
> +
> +static void v7_dcache_maint_range(u32 start, u32 stop, u32 range_op)
> +{
> +	u32 line_len, ccsidr;
> +	ccsidr = get_ccsidr();
> +	line_len = mask_n_get(ccsidr, 0, 2) + 2;
> +	/* Converting from words to bytes */
> +	line_len += 2;
> +	/* converting from log2(linelen) to linelen */
> +	line_len = 1<<  line_len;
> +
> +	switch (range_op) {
> +	case ARMV7_DCACHE_CLEAN_INVAL_RANGE:
> +		v7_dcache_clean_inval_range(start, stop, line_len);
> +		break;
> +	case ARMV7_DCACHE_INVAL_RANGE:
> +		v7_dcache_inval_range(start, stop, line_len);
> +		break;
> +	}
> +
> +	/* Make sure the operation is complete */
> +	asm volatile ("DMB");
> +}
> +
> +/* Invalidate TLB */
> +static void v7_inval_tlb(void)
> +{
> +	/* Invalidate entire unified TLB */
> +	asm volatile ("mcr p15, 0, %0, c8, c7, 0" : : "r" (0));
> +	/* Invalidate entire data TLB */
> +	asm volatile ("mcr p15, 0, %0, c8, c6, 0" : : "r" (0));
> +	/* Invalidate entire instruction TLB */
> +	asm volatile ("mcr p15, 0, %0, c8, c5, 0" : : "r" (0));
> +	/* Full system DSB - make sure that the invalidation is complete */
> +	asm volatile ("DSB");
> +	/* Full system ISB - make sure the instruction stream sees it */
> +	asm volatile ("ISB");
> +}
> +
> +void invalidate_dcache_all(void)
> +{
> +	v7_maint_dcache_all(ARMV7_DCACHE_INVAL_ALL);
> +	if (v7_outer_cache.inval_all)
> +		v7_outer_cache.inval_all();

Why use pointers here rather than weak functions?

> +}
> +
> +/*
> + * Performs a clean&  invalidation of the entire data cache
> + * at all levels
> + */
> +void flush_dcache_all(void)
> +{
> +	v7_maint_dcache_all(ARMV7_DCACHE_CLEAN_INVAL_ALL);
> +	if (v7_outer_cache.flush_all)
> +		v7_outer_cache.flush_all();
> +}
> +
> +/*
> + * Invalidates range in all levels of D-cache/unified cache used:
> + * Affects the range [start, stop - 1]
> + */
> +void invalidate_dcache_range(unsigned long start, unsigned long stop)
> +{
> +
> +	v7_dcache_maint_range(start, stop, ARMV7_DCACHE_INVAL_RANGE);
> +	if (v7_outer_cache.inval_range)
> +		/* physical address is same as virtual address */
> +		v7_outer_cache.inval_range(start, stop);
> +}
> +
> +/*
> + * Flush range(clean&  invalidate) from all levels of D-cache/unified
> + * cache used:
> + * Affects the range [start, stop - 1]
> + */
> +void flush_dcache_range(unsigned long start, unsigned long stop)
> +{
> +	v7_dcache_maint_range(start, stop, ARMV7_DCACHE_CLEAN_INVAL_RANGE);
> +	if (v7_outer_cache.flush_range)
> +		/* physical address is same as virtual address */
> +		v7_outer_cache.flush_range(start, stop);
> +}
> +static void __v7_setup_outer_cache_ops(void)
> +{
> +	puts("v7_setup_outer_cache_ops: dummy implementation! "
> +	     "real implementation not available!!\n");
> +}
> +
> +void v7_setup_outer_cache_ops(void)
> +	__attribute__((weak, alias("__v7_setup_outer_cache_ops")));
> +
> +void arm_init_before_mmu(void)
> +{
> +	v7_setup_outer_cache_ops();
> +	if (v7_outer_cache.enable)
> +		v7_outer_cache.enable();
> +	invalidate_dcache_all();
> +	v7_inval_tlb();
> +}
> +#else
> +void invalidate_dcache_all(void)
> +{
> +}
> +
> +void flush_dcache_all(void)
> +{
> +}
> +
> +void invalidate_dcache_range(unsigned long start, unsigned long stop)
> +{
> +}
> +
> +void flush_dcache_range(unsigned long start, unsigned long stop)
> +{
> +}
> +
> +void arm_init_before_mmu(void)
> +{
> +}
> +#endif
> +
> +#ifndef CONFIG_SYS_NO_ICACHE
> +/* Invalidate entire I-cache and branch predictor array */
> +void invalidate_icache_all(void)
> +{
> +	/*
> +	 * Invalidate all instruction caches to PoU.
> +	 * Also flushes branch target cache.
> +	 */
> +	asm volatile ("mcr p15, 0, %0, c7, c5, 0" : : "r" (0));
> +
> +	/* Invalidate entire branch predictor array */
> +	asm volatile ("mcr p15, 0, %0, c7, c5, 6" : : "r" (0));
> +
> +	/* Full system DSB - make sure that the invalidation is complete */
> +	asm volatile ("DSB");
> +	/* Full system ISB - make sure the instruction stream sees it */
> +	asm volatile ("ISB");
> +}
> +#else
> +void invalidate_icache_all(void)
> +{
> +}
> +#endif
> +
> +/*
> + * Flush range from all levels of d-cache/unified-cache used:
> + * Affects the range [start, start + size - 1]
> + */
> +void  flush_cache(unsigned long start, unsigned long size)
> +{
> +	flush_dcache_range(start, start + size);
> +}

This function is the only one which is defined to something non-empty 
when CONFIG_SYS_NO_DCACHE is defined. Why is it not in the big #ifndef 
for dcache above ?

> diff --git a/arch/arm/cpu/armv7/config.mk b/arch/arm/cpu/armv7/config.mk
> index 49ac9c7..7f9b171 100644
> --- a/arch/arm/cpu/armv7/config.mk
> +++ b/arch/arm/cpu/armv7/config.mk
> @@ -23,7 +23,7 @@
>   PLATFORM_RELFLAGS += -fno-common -ffixed-r8 -msoft-float
>
>   # Make ARMv5 to allow more compilers to work, even though its v7a.
> -PLATFORM_CPPFLAGS += -march=armv5
> +PLATFORM_CPPFLAGS += -march=armv7-a

Did you check that this does not break any board using armv7?

>   # =========================================================================
>   #
>   # Supply options according to compiler version
> diff --git a/arch/arm/include/asm/armv7.h b/arch/arm/include/asm/armv7.h
> new file mode 100644
> index 0000000..57409b6
> --- /dev/null
> +++ b/arch/arm/include/asm/armv7.h
> @@ -0,0 +1,63 @@
> +/*
> + * (C) Copyright 2010
> + * Texas Instruments Incorporated - http://www.ti.com/
> + *
> + * Aneesh V<aneesh at ti.com>
> + *
> + * See file CREDITS for list of people who contributed to this
> + * project.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 of
> + * the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
> + * MA 02111-1307 USA
> + */
> +#ifndef ARMV7_H
> +#define ARMV7_H
> +
> +#include<linux/types.h>
> +
> +/*
> + * Values for InD field in CSSELR
> + * Selects the type of cache
> + */
> +#define ARMV7_CSSELR_IND_DATA_UNIFIED	0
> +#define ARMV7_CSSELR_IND_INSTRUCTION	1
> +
> +/* Values for Ctype fields in CLIDR */
> +#define ARMV7_CLIDR_CTYPE_NO_CACHE		0
> +#define ARMV7_CLIDR_CTYPE_INSTRUCTION_ONLY	1
> +#define ARMV7_CLIDR_CTYPE_DATA_ONLY		2
> +#define ARMV7_CLIDR_CTYPE_INSTRUCTION_DATA	3
> +#define ARMV7_CLIDR_CTYPE_UNIFIED		4
> +
> +/* some utility macros */
> +#define mask(start, end) \
> +	(((1<<  ((end) - (start) + 1)) - 1)<<  (start))
> +
> +#define mask_n_get(reg, start, end) \
> +	(((reg)&  mask(start, end))>>  (start))

Seeing as these functions are only used in the ARMv7 cache C file, they 
should be moved there.

> +struct v7_outer_cache_ops {
> +	void (*enable)(void);
> +	void (*disable)(void);
> +	void (*flush_all)(void);
> +	void (*inval_all)(void);
> +	void (*flush_range)(u32 start, u32 end);
> +	void (*inval_range)(u32 start, u32 end);
> +};
> +
> +extern struct v7_outer_cache_ops v7_outer_cache;
> +
> +void v7_setup_outer_cache_ops(void);
> +#endif
> diff --git a/include/common.h b/include/common.h
> index 189ad81..d750ff9 100644
> --- a/include/common.h
> +++ b/include/common.h
> @@ -411,6 +411,7 @@ void	icache_disable(void);
>   int	dcache_status (void);
>   void	dcache_enable (void);
>   void	dcache_disable(void);
> +void	mmu_disable(void);
>   void	relocate_code (ulong, gd_t *, ulong) __attribute__ ((noreturn));
>   ulong	get_endaddr   (void);
>   void	trap_init     (ulong);
> @@ -603,9 +604,11 @@ ulong	video_setmem (ulong);
>
>   /* arch/$(ARCH)/lib/cache.c */
>   void	flush_cache   (unsigned long, unsigned long);
> +void	flush_dcache_all(void);
>   void	flush_dcache_range(unsigned long start, unsigned long stop);
>   void	invalidate_dcache_range(unsigned long start, unsigned long stop);
> -
> +void	invalidate_dcache_all(void);
> +void	invalidate_icache_all(void);
>
>   /* arch/$(ARCH)/lib/ticks.S */
>   unsigned long long get_ticks(void);

Amicalement,
-- 
Albert.


More information about the U-Boot mailing list