[U-Boot] [PATCH 3/9] zymqmp: Replace home grown mmu code with generic table approach

Michal Simek michal.simek at xilinx.com
Tue Feb 23 14:07:32 CET 2016


Hi,

before I comment the rest. You need to also fix gem driver because it is
using 1MB mapping.

diff --git a/drivers/net/zynq_gem.c b/drivers/net/zynq_gem.c
index b3821c31a91d..cf1376ce1bd7 100644
--- a/drivers/net/zynq_gem.c
+++ b/drivers/net/zynq_gem.c
@@ -150,10 +150,10 @@ struct emac_bd {
 };

 #define RX_BUF 32
-/* Page table entries are set to 1MB, or multiples of 1MB
- * (not < 1MB). driver uses less bd's so use 1MB bdspace.
+/* Page table entries are set to 2MB, or multiples of 2MB
+ * (not < 2MB). driver uses less bd's so use 2MB bdspace.
  */
-#define BD_SPACE       0x100000
+#define BD_SPACE       0x200000
 /* BD separation space */
 #define BD_SEPRN_SPACE (RX_BUF * sizeof(struct emac_bd))




On 23.2.2016 12:33, Alexander Graf wrote:
> 
> 
> On 23.02.16 12:04, Michal Simek wrote:
>> On 22.2.2016 02:57, Alexander Graf wrote:
>>> Now that we have nice table driven page table creating code that gives
>>> us everything we need, move to that.
>>>
>>> Signed-off-by: Alexander Graf <agraf at suse.de>
>>> ---
>>>  arch/arm/cpu/armv8/zynqmp/cpu.c | 169 ----------------------------------------
>>>  include/configs/xilinx_zynqmp.h |  44 +++++++++++
>>>  2 files changed, 44 insertions(+), 169 deletions(-)
>>>
>>> diff --git a/arch/arm/cpu/armv8/zynqmp/cpu.c b/arch/arm/cpu/armv8/zynqmp/cpu.c
>>> index c71f291..3f661a9 100644
>>> --- a/arch/arm/cpu/armv8/zynqmp/cpu.c
>>> +++ b/arch/arm/cpu/armv8/zynqmp/cpu.c
>>> @@ -44,172 +44,3 @@ unsigned int zynqmp_get_silicon_version(void)
>>>  
>>>  	return ZYNQMP_CSU_VERSION_SILICON;
>>>  }
>>> -
>>> -#ifndef CONFIG_SYS_DCACHE_OFF
>>> -#include <asm/armv8/mmu.h>
>>> -
>>> -#define SECTION_SHIFT_L1	30UL
>>> -#define SECTION_SHIFT_L2	21UL
>>> -#define BLOCK_SIZE_L0		0x8000000000UL
>>> -#define BLOCK_SIZE_L1		(1 << SECTION_SHIFT_L1)
>>> -#define BLOCK_SIZE_L2		(1 << SECTION_SHIFT_L2)
>>> -
>>> -#define TCR_TG1_4K		(1 << 31)
>>> -#define TCR_EPD1_DISABLE	(1 << 23)
>>> -#define ZYNQMO_VA_BITS		40
>>> -#define ZYNQMP_TCR		TCR_TG1_4K | \
>>> -				TCR_EPD1_DISABLE | \
>>> -				TCR_SHARED_OUTER | \
>>> -				TCR_SHARED_INNER | \
>>> -				TCR_IRGN_WBWA | \
>>> -				TCR_ORGN_WBWA | \
>>> -				TCR_T0SZ(ZYNQMO_VA_BITS)
>>> -
>>> -#define MEMORY_ATTR	PMD_SECT_AF | PMD_SECT_INNER_SHARE |	\
>>> -			PMD_ATTRINDX(MT_NORMAL) |	\
>>> -			PMD_TYPE_SECT
>>> -#define DEVICE_ATTR	PMD_SECT_AF | PMD_SECT_PXN |	\
>>> -			PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_NGNRNE) |	\
>>> -			PMD_TYPE_SECT
>>> -
>>> -/* 4K size is required to place 512 entries in each level */
>>> -#define TLB_TABLE_SIZE	0x1000
>>> -
>>> -struct attr_tbl {
>>> -	u32 num;
>>> -	u64 attr;
>>> -};
>>> -
>>> -static struct attr_tbl attr_tbll1t0[4] = { {16, 0x0},
>>> -					   {8, DEVICE_ATTR},
>>> -					   {32, MEMORY_ATTR},
>>> -					   {456, DEVICE_ATTR}
>>> -					 };
>>> -static struct attr_tbl attr_tbll2t3[4] = { {0x180, DEVICE_ATTR},
>>> -					   {0x40, 0x0},
>>> -					   {0x3F, DEVICE_ATTR},
>>> -					   {0x1, MEMORY_ATTR}
>>> -					 };
>>> -
>>> -/*
>>> - * This mmu table looks as below
>>> - * Level 0 table contains two entries to 512GB sizes. One is Level1 Table 0
>>> - * and other Level1 Table1.
>>> - * Level1 Table0 contains entries for each 1GB from 0 to 511GB.
>>> - * Level1 Table1 contains entries for each 1GB from 512GB to 1TB.
>>> - * Level2 Table0, Level2 Table1, Level2 Table2 and Level2 Table3 contains
>>> - * entries for each 2MB starting from 0GB, 1GB, 2GB and 3GB respectively.
>>> - */
>>> -static void zynqmp_mmu_setup(void)
>>> -{
>>> -	int el;
>>> -	u32 index_attr;
>>> -	u64 i, section_l1t0, section_l1t1;
>>> -	u64 section_l2t0, section_l2t1, section_l2t2, section_l2t3;
>>> -	u64 *level0_table = (u64 *)gd->arch.tlb_addr;
>>> -	u64 *level1_table_0 = (u64 *)(gd->arch.tlb_addr + TLB_TABLE_SIZE);
>>> -	u64 *level1_table_1 = (u64 *)(gd->arch.tlb_addr + (2 * TLB_TABLE_SIZE));
>>> -	u64 *level2_table_0 = (u64 *)(gd->arch.tlb_addr + (3 * TLB_TABLE_SIZE));
>>> -	u64 *level2_table_1 = (u64 *)(gd->arch.tlb_addr + (4 * TLB_TABLE_SIZE));
>>> -	u64 *level2_table_2 = (u64 *)(gd->arch.tlb_addr + (5 * TLB_TABLE_SIZE));
>>> -	u64 *level2_table_3 = (u64 *)(gd->arch.tlb_addr + (6 * TLB_TABLE_SIZE));
>>> -
>>> -	level0_table[0] =
>>> -		(u64)level1_table_0 | PMD_TYPE_TABLE;
>>> -	level0_table[1] =
>>> -		(u64)level1_table_1 | PMD_TYPE_TABLE;
>>> -
>>> -	/*
>>> -	 * set level 1 table 0, covering 0 to 512GB
>>> -	 * set level 1 table 1, covering 512GB to 1TB
>>> -	 */
>>> -	section_l1t0 = 0;
>>> -	section_l1t1 = BLOCK_SIZE_L0;
>>> -
>>> -	index_attr = 0;
>>> -	for (i = 0; i < 512; i++) {
>>> -		level1_table_0[i] = section_l1t0;
>>> -		level1_table_0[i] |= attr_tbll1t0[index_attr].attr;
>>> -		attr_tbll1t0[index_attr].num--;
>>> -		if (attr_tbll1t0[index_attr].num == 0)
>>> -			index_attr++;
>>> -		level1_table_1[i] = section_l1t1;
>>> -		level1_table_1[i] |= DEVICE_ATTR;
>>> -		section_l1t0 += BLOCK_SIZE_L1;
>>> -		section_l1t1 += BLOCK_SIZE_L1;
>>> -	}
>>> -
>>> -	level1_table_0[0] =
>>> -		(u64)level2_table_0 | PMD_TYPE_TABLE;
>>> -	level1_table_0[1] =
>>> -		(u64)level2_table_1 | PMD_TYPE_TABLE;
>>> -	level1_table_0[2] =
>>> -		(u64)level2_table_2 | PMD_TYPE_TABLE;
>>> -	level1_table_0[3] =
>>> -		(u64)level2_table_3 | PMD_TYPE_TABLE;
>>> -
>>> -	section_l2t0 = 0;
>>> -	section_l2t1 = section_l2t0 + BLOCK_SIZE_L1; /* 1GB */
>>> -	section_l2t2 = section_l2t1 + BLOCK_SIZE_L1; /* 2GB */
>>> -	section_l2t3 = section_l2t2 + BLOCK_SIZE_L1; /* 3GB */
>>> -
>>> -	index_attr = 0;
>>> -
>>> -	for (i = 0; i < 512; i++) {
>>> -		level2_table_0[i] = section_l2t0 | MEMORY_ATTR;
>>> -		level2_table_1[i] = section_l2t1 | MEMORY_ATTR;
>>> -		level2_table_2[i] = section_l2t2 | DEVICE_ATTR;
>>> -		level2_table_3[i] = section_l2t3 |
>>> -				    attr_tbll2t3[index_attr].attr;
>>> -		attr_tbll2t3[index_attr].num--;
>>> -		if (attr_tbll2t3[index_attr].num == 0)
>>> -			index_attr++;
>>> -		section_l2t0 += BLOCK_SIZE_L2;
>>> -		section_l2t1 += BLOCK_SIZE_L2;
>>> -		section_l2t2 += BLOCK_SIZE_L2;
>>> -		section_l2t3 += BLOCK_SIZE_L2;
>>> -	}
>>> -
>>> -	/* flush new MMU table */
>>> -	flush_dcache_range(gd->arch.tlb_addr,
>>> -			   gd->arch.tlb_addr + gd->arch.tlb_size);
>>> -
>>> -	/* point TTBR to the new table */
>>> -	el = current_el();
>>> -	set_ttbr_tcr_mair(el, gd->arch.tlb_addr,
>>> -			  ZYNQMP_TCR, MEMORY_ATTRIBUTES);
>>> -
>>> -	set_sctlr(get_sctlr() | CR_M);
>>> -}
>>> -
>>> -int arch_cpu_init(void)
>>> -{
>>> -	icache_enable();
>>> -	__asm_invalidate_dcache_all();
>>> -	__asm_invalidate_tlb_all();
>>> -	return 0;
>>> -}
>>> -
>>> -/*
>>> - * This function is called from lib/board.c.
>>> - * It recreates MMU table in main memory. MMU and d-cache are enabled earlier.
>>> - * There is no need to disable d-cache for this operation.
>>> - */
>>> -void enable_caches(void)
>>> -{
>>> -	/* The data cache is not active unless the mmu is enabled */
>>> -	if (!(get_sctlr() & CR_M)) {
>>> -		invalidate_dcache_all();
>>> -		__asm_invalidate_tlb_all();
>>> -		zynqmp_mmu_setup();
>>> -	}
>>> -	puts("Enabling Caches...\n");
>>> -
>>> -	set_sctlr(get_sctlr() | CR_C);
>>> -}
>>> -
>>> -u64 *arch_get_page_table(void)
>>> -{
>>> -	return (u64 *)(gd->arch.tlb_addr + 0x3000);
>>> -}
>>> -#endif
>>> diff --git a/include/configs/xilinx_zynqmp.h b/include/configs/xilinx_zynqmp.h
>>> index 28622de..439f063 100644
>>> --- a/include/configs/xilinx_zynqmp.h
>>> +++ b/include/configs/xilinx_zynqmp.h
>>> @@ -29,6 +29,50 @@
>>>  #define CONFIG_SYS_MEMTEST_START	CONFIG_SYS_SDRAM_BASE
>>>  #define CONFIG_SYS_MEMTEST_END		CONFIG_SYS_SDRAM_SIZE
>>>  
>>> +#define CONFIG_SYS_FULL_VA
>>> +#define CONFIG_SYS_MEM_MAP {						\
>>> +	{								\
>>> +		.base = 0x0UL,						\
>>> +		.size = 0x80000000UL,					\
>>> +		.attrs = PTE_BLOCK_MEMTYPE(MT_NORMAL) |			\
>>> +			 PTE_BLOCK_INNER_SHARE				\
>>> +	}, {								\
>>> +		.base = 0x80000000UL,					\
>>> +		.size = 0x70000000UL,					\
>>> +		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |		\
>>> +			 PTE_BLOCK_NON_SHARE |				\
>>> +			 PTE_BLOCK_PXN | PTE_BLOCK_UXN			\
>>> +	}, {								\
>>> +		.base = 0xf8000000UL,					\
>>> +		.size = 0x07e00000UL,					\
>>> +		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |		\
>>> +			 PTE_BLOCK_NON_SHARE |				\
>>> +			 PTE_BLOCK_PXN | PTE_BLOCK_UXN			\
>>> +	}, {								\
>>> +		.base = 0xffe00000UL,					\
>>> +		.size = 0x00200000UL,					\
>>> +		.attrs = PTE_BLOCK_MEMTYPE(MT_NORMAL) |			\
>>> +			 PTE_BLOCK_INNER_SHARE				\
>>> +	}, {								\
>>> +		.base = 0x400000000UL,					\
>>> +		.size = 0x200000000UL,					\
>>> +		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |		\
>>> +			 PTE_BLOCK_NON_SHARE |				\
>>> +			 PTE_BLOCK_PXN | PTE_BLOCK_UXN			\
>>> +	}, {								\
>>> +		.base = 0x600000000UL,					\
>>> +		.size = 0x800000000UL,					\
>>> +		.attrs = PTE_BLOCK_MEMTYPE(MT_NORMAL) |			\
>>> +			 PTE_BLOCK_INNER_SHARE				\
>>> +	}, {								\
>>> +		.base = 0xe00000000UL,					\
>>> +		.size = 0xf200000000UL,					\
>>> +		.attrs = PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE) |		\
>>> +			 PTE_BLOCK_NON_SHARE |				\
>>> +			 PTE_BLOCK_PXN | PTE_BLOCK_UXN			\
>>> +	},								\
>>> +	}
>>> +
>>>  /* Have release address at the end of 256MB for now */
>>>  #define CPU_RELEASE_ADDR	0xFFFFFF0
>>>  
>>
>> No problem with this default map. I didn't check every entry. Siva will
>> look at it.
>> My problem is that this is static. You should enable option option to
>> generate this table dynamically. The reason is that systems can run just
> 
> The way the code works today, I could easily just use a 0-size element
> as array terminator and use an external pointer rather than the local
> memory map array.
> 
> However, that contradicts with a runtime memory contrained approach to
> determine the page table size. The only good path I could come up with
> here is to generate the page table during build time and then save how
> big it was. We couldn't do that with runtime changing tables.

0-size element is not enough. The problem are base addresses too.
ZynqMP memory map is just this

1. 0-2GB PS memory up to 2GB
2. 2GB-3GB PL part
3. 3GB-4GB PCIE + device + small memories OCM/TCM
4. 4GB-16GB reserved
5. 16-24GB PL part
6. 24-32GB PCIe
7. 32-64GB DDR
8. 64-512GB PL
9. 512-768 PCIe
10. 768-1TB DDR
11. 1TB-16TB PL

PL regions 2, 5, 11 can also contain memories but you are able to find
them via DT for particular HW design.
And start addresses can be in this range.
In Xilinx tree I have patches for reading memory configuration from DT
and saving them to gd->bd->bi_dram based on CONFIG_NR_DRAM_BANKS setting
and I think this should be wired.
It means you should setup just memory attributes for memories which
u-boot is aware of. For the rest of address space it is not that sensitive.

>> with PL DDR not PS and you can't map memory which is not there which can
>> end up in system lock.
>> We want to change current code to setup MMU table for memories at run
>> time based on DT.
> 
> So in your use case, the regions would still be there, just their size
> changes. That means we could still have static tables that then get
> modified by runtime code later on to just span less?

Not entirely. Definitely the part of table can be static but I tend to
have especially memory mapping more dynamic.

> Of course, with a full device tree, we could just generate the mmu
> tables completely from dt. But then we'd need a good answer to how we
> determine the page table pool size...

I don't think it is required but at least memory mapping should be more
dynamic. As is visible we are moving to u-boot which is fully configured
from DT that's why none will want to changes in MMU table that's why
this should be more flexible.

Thanks,
Michal


More information about the U-Boot mailing list