[U-Boot] [PATCH v3 1/7] armv8: New MMU setup code allowing to use 48+ bits PA/VA

Sergey Temerkhanov s.temerkhanov at gmail.com
Thu Aug 13 17:14:56 CEST 2015


This patch adds code which sets up 2-level page tables on ARM64 thus
extending available VA space. CPUs implementing 64k translation
granule are able to use direct PA-VA mapping of the whole 48 bit
address space.
It also adds the ability to reset the SCTRL register at the very beginning
of execution to avoid interference from stale mappings set up by early
firmware/loaders/etc.

Signed-off-by: Sergey Temerkhanov <s.temerkhanov at gmail.com>
Signed-off-by: Radha Mohan Chintakuntla <rchintakuntla at cavium.com>

---

Changes in v3:
- Reduced code duplication
- Renamed CONFIG_SYS_PTL1_BITS to CONFIG_SYS_PTL2_BITS
- Moved 'reset_sctrl' call to the 'reset' label
- Rebased to the actual upstream tree
- Documented newly added config options

Changes in v2:
- Changed code licensing
- Completed the patchset

 arch/arm/cpu/armv8/cache_v8.c      | 80 +++++++++++++++++++++++++++++++++++++-
 arch/arm/cpu/armv8/start.S         | 36 +++++++++++++++++
 arch/arm/include/asm/armv8/mmu.h   | 79 ++++++++++++++++++++++++++++++++++---
 arch/arm/include/asm/global_data.h |  1 +
 arch/arm/include/asm/system.h      |  7 ++++
 arch/arm/lib/board.c               |  6 ++-
 doc/README.arm64                   | 35 ++++++++++++++---
 7 files changed, 229 insertions(+), 15 deletions(-)

diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
index c22f7b6..00778f2 100644
--- a/arch/arm/cpu/armv8/cache_v8.c
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -12,6 +12,69 @@
 DECLARE_GLOBAL_DATA_PTR;
 
 #ifndef CONFIG_SYS_DCACHE_OFF
+
+#ifdef CONFIG_SYS_FULL_VA
+static void set_ptl1_entry(u64 index, u64 ptl2_entry)
+{
+	u64 *pgd = (u64 *)gd->arch.tlb_addr;
+	u64 value;
+
+	value = ptl2_entry | PTL1_TYPE_TABLE;
+	pgd[index] = value;
+}
+
+static void set_ptl2_block(u64 ptl1, u64 bfn, u64 address, u64 memory_type)
+{
+	u64 *pmd = (u64 *)ptl1;
+	u64 value;
+
+	value = address | PTL2_TYPE_BLOCK | PTL2_BLOCK_AF;
+	value |= PMD_ATTRINDX(memory_type);
+	pmd[bfn] = value;
+}
+
+static struct mm_region mem_map[] = CONFIG_SYS_MEM_MAP;
+
+#define PTL1_ENTRIES CONFIG_SYS_PTL1_ENTRIES
+#define PTL2_ENTRIES CONFIG_SYS_PTL2_ENTRIES
+
+static void setup_pgtables(void)
+{
+	int l1_e, l2_e;
+	unsigned long pmd = 0;
+	unsigned long address;
+
+	/* Setup the PMD pointers */
+	for (l1_e = 0; l1_e < CONFIG_SYS_MEM_MAP_SIZE; l1_e++) {
+		gd->arch.pmd_addr[l1_e] = gd->arch.tlb_addr +
+						PTL1_ENTRIES * sizeof(u64);
+		gd->arch.pmd_addr[l1_e] += PTL2_ENTRIES * sizeof(u64) * l1_e;
+		gd->arch.pmd_addr[l1_e] += 0xffffUL;
+		gd->arch.pmd_addr[l1_e] &= ~0xffffUL;
+	}
+
+	/* Setup the page tables */
+	for (l1_e = 0; l1_e < PTL1_ENTRIES; l1_e++) {
+		if (mem_map[pmd].base ==
+			(uintptr_t)l1_e << PTL2_BITS) {
+			set_ptl1_entry(l1_e, gd->arch.pmd_addr[pmd]);
+
+			for (l2_e = 0; l2_e < PTL2_ENTRIES; l2_e++) {
+				address = mem_map[pmd].base
+					+ (uintptr_t)l2_e * BLOCK_SIZE;
+				set_ptl2_block(gd->arch.pmd_addr[pmd], l2_e,
+					       address, mem_map[pmd].attrs);
+			}
+
+			pmd++;
+		} else {
+			set_ptl1_entry(l1_e, 0);
+		}
+	}
+}
+
+#else
+
 void set_pgtable_section(u64 *page_table, u64 index, u64 section,
 			 u64 memory_type)
 {
@@ -22,13 +85,25 @@ void set_pgtable_section(u64 *page_table, u64 index, u64 section,
 	page_table[index] = value;
 }
 
+#endif
+
+
 /* to activate the MMU we need to set up virtual memory */
 static void mmu_setup(void)
 {
+#ifndef CONFIG_SYS_FULL_VA
 	bd_t *bd = gd->bd;
 	u64 *page_table = (u64 *)gd->arch.tlb_addr, i, j;
+#endif
 	int el;
 
+#ifdef CONFIG_SYS_FULL_VA
+	unsigned long coreid = read_mpidr() & CONFIG_COREID_MASK;
+
+	/* Set up page tables only on BSP */
+	if (coreid == BSP_COREID)
+		setup_pgtables();
+#else
 	/* Setup an identity-mapping for all spaces */
 	for (i = 0; i < (PGTABLE_SIZE >> 3); i++) {
 		set_pgtable_section(page_table, i, i << SECTION_SHIFT,
@@ -45,7 +120,7 @@ static void mmu_setup(void)
 					    MT_NORMAL);
 		}
 	}
-
+#endif
 	/* load TTBR0 */
 	el = current_el();
 	if (el == 1) {
@@ -145,6 +220,7 @@ u64 *__weak arch_get_page_table(void) {
 	return NULL;
 }
 
+#ifndef CONFIG_SYS_FULL_VA
 void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size,
 				     enum dcache_option option)
 {
@@ -170,6 +246,8 @@ void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size,
 	flush_dcache_range(start, end);
 	asm volatile("dsb sy");
 }
+#endif
+
 #else	/* CONFIG_SYS_DCACHE_OFF */
 
 void invalidate_dcache_all(void)
diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
index e70bed4..a7dfe23 100644
--- a/arch/arm/cpu/armv8/start.S
+++ b/arch/arm/cpu/armv8/start.S
@@ -43,6 +43,9 @@ _bss_end_ofs:
 	.quad	__bss_end - _start
 
 reset:
+#ifdef CONFIG_SYS_RESET_SCTRL
+	bl reset_sctrl
+#endif
 	/*
 	 * Could be EL3/EL2/EL1, Initial State:
 	 * Little Endian, MMU Disabled, i/dCache Disabled
@@ -97,6 +100,39 @@ master_cpu:
 
 	bl	_main
 
+#ifdef CONFIG_SYS_RESET_SCTRL
+reset_sctrl:
+	switch_el x1, 3f, 2f, 1f
+3:
+	mrs	x0, sctlr_el3
+	b	0f
+2:
+	mrs	x0, sctlr_el2
+	b	0f
+1:
+	mrs	x0, sctlr_el1
+
+0:
+	ldr	x1, =0xfdfffffa
+	and	x0, x0, x1
+
+	switch_el x1, 6f, 5f, 4f
+6:
+	msr	sctlr_el3, x0
+	b	7f
+5:
+	msr	sctlr_el2, x0
+	b	7f
+4:
+	msr	sctlr_el1, x0
+
+7:
+	dsb	sy
+	isb
+	b	__asm_invalidate_tlb_all
+	ret
+#endif
+
 /*-----------------------------------------------------------------------*/
 
 WEAK(apply_core_errata)
diff --git a/arch/arm/include/asm/armv8/mmu.h b/arch/arm/include/asm/armv8/mmu.h
index 04fa0be..a5176ba 100644
--- a/arch/arm/include/asm/armv8/mmu.h
+++ b/arch/arm/include/asm/armv8/mmu.h
@@ -21,7 +21,13 @@
  * The following definitions are related each other, shoud be
  * calculated specifically.
  */
+
+#ifndef CONFIG_SYS_FULL_VA
 #define VA_BITS			(42)	/* 42 bits virtual address */
+#else
+#define VA_BITS			CONFIG_SYS_VA_BITS
+#define PTL1_BITS		CONFIG_SYS_PTL1_BITS
+#endif
 
 /* PAGE_SHIFT determines the page size */
 #undef  PAGE_SIZE
@@ -30,11 +36,18 @@
 #define PAGE_MASK		(~(PAGE_SIZE-1))
 
 /*
- * section address mask and size definitions.
+ * block/section address mask and size definitions.
  */
+#ifndef CONFIG_SYS_FULL_VA
 #define SECTION_SHIFT		29
 #define SECTION_SIZE		(UL(1) << SECTION_SHIFT)
 #define SECTION_MASK		(~(SECTION_SIZE-1))
+#else
+#define BLOCK_SHIFT		CONFIG_SYS_BLOCK_SHIFT
+#define BLOCK_SIZE		(UL(1) << BLOCK_SHIFT)
+#define BLOCK_MASK		(~(BLOCK_SIZE-1))
+#endif
+
 /***************************************************************/
 
 /*
@@ -46,15 +59,51 @@
 #define MT_NORMAL_NC		3
 #define MT_NORMAL		4
 
-#define MEMORY_ATTRIBUTES	((0x00 << (MT_DEVICE_NGNRNE*8)) |	\
-				(0x04 << (MT_DEVICE_NGNRE*8)) |		\
-				(0x0c << (MT_DEVICE_GRE*8)) |		\
-				(0x44 << (MT_NORMAL_NC*8)) |		\
-				(UL(0xff) << (MT_NORMAL*8)))
+#define MEMORY_ATTRIBUTES	((0x00 << (MT_DEVICE_NGNRNE * 8)) |	\
+				(0x04 << (MT_DEVICE_NGNRE * 8))   |	\
+				(0x0c << (MT_DEVICE_GRE * 8))     |	\
+				(0x44 << (MT_NORMAL_NC * 8))      |	\
+				(UL(0xff) << (MT_NORMAL * 8)))
 
 /*
  * Hardware page table definitions.
  *
+ */
+
+#ifdef CONFIG_SYS_FULL_VA
+/*
+ * Level 1 descriptor (PGD).
+ */
+
+#define PTL1_TYPE_MASK		(3 << 0)
+#define PTL1_TYPE_TABLE		(3 << 0)
+
+#define PTL1_TABLE_PXN		(1UL << 59)
+#define PTL1_TABLE_XN		(1UL << 60)
+#define PTL1_TABLE_AP		(1UL << 61)
+#define PTL1_TABLE_NS		(1UL << 63)
+
+
+/*
+ * Level 2 descriptor (PMD).
+ */
+
+#define PTL2_TYPE_MASK		(3 << 0)
+#define PTL2_TYPE_FAULT		(0 << 0)
+#define PTL2_TYPE_TABLE		(3 << 0)
+#define PTL2_TYPE_BLOCK		(1 << 0)
+
+/*
+ * Block
+ */
+#define PTL2_BLOCK_S		(3 << 8)
+#define PTL2_BLOCK_AF		(1 << 10)
+#define PTL2_BLOCK_NG		(1 << 11)
+#define PTL2_BLOCK_PXN		(UL(1) << 53)
+#define PTL2_BLOCK_UXN		(UL(1) << 54)
+
+#else
+/*
  * Level 2 descriptor (PMD).
  */
 #define PMD_TYPE_MASK		(3 << 0)
@@ -72,6 +121,8 @@
 #define PMD_SECT_PXN		(UL(1) << 53)
 #define PMD_SECT_UXN		(UL(1) << 54)
 
+#endif
+
 /*
  * AttrIndx[2:0]
  */
@@ -98,9 +149,16 @@
 #define TCR_TG0_4K		(0 << 14)
 #define TCR_TG0_64K		(1 << 14)
 #define TCR_TG0_16K		(2 << 14)
+
+#ifndef CONFIG_SYS_FULL_VA
 #define TCR_EL1_IPS_BITS	(UL(3) << 32)	/* 42 bits physical address */
 #define TCR_EL2_IPS_BITS	(3 << 16)	/* 42 bits physical address */
 #define TCR_EL3_IPS_BITS	(3 << 16)	/* 42 bits physical address */
+#else
+#define TCR_EL1_IPS_BITS	CONFIG_SYS_TCR_EL1_IPS_BITS
+#define TCR_EL2_IPS_BITS	CONFIG_SYS_TCR_EL2_IPS_BITS
+#define TCR_EL3_IPS_BITS	CONFIG_SYS_TCR_EL3_IPS_BITS
+#endif
 
 /* PTWs cacheable, inner/outer WBWA and non-shareable */
 #define TCR_FLAGS		(TCR_TG0_64K |		\
@@ -110,8 +168,10 @@
 				TCR_T0SZ(VA_BITS))
 
 #ifndef __ASSEMBLY__
+#ifndef CONFIG_SYS_FULL_VA
 void set_pgtable_section(u64 *page_table, u64 index,
 			 u64 section, u64 memory_type);
+#endif
 static inline void set_ttbr_tcr_mair(int el, u64 table, u64 tcr, u64 attr)
 {
 	asm volatile("dsb sy");
@@ -132,5 +192,12 @@ static inline void set_ttbr_tcr_mair(int el, u64 table, u64 tcr, u64 attr)
 	}
 	asm volatile("isb");
 }
+
+struct mm_region {
+	u64 base;
+	u64 size;
+	u64 attrs;
+};
 #endif
+
 #endif /* _ASM_ARMV8_MMU_H_ */
diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h
index 4e3ea55..723539c 100644
--- a/arch/arm/include/asm/global_data.h
+++ b/arch/arm/include/asm/global_data.h
@@ -38,6 +38,7 @@ struct arch_global_data {
 	unsigned long long timer_reset_value;
 #if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF))
 	unsigned long tlb_addr;
+	unsigned long pmd_addr[CONFIG_SYS_PTL1_ENTRIES];
 	unsigned long tlb_size;
 #endif
 
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 868ea54..9803686 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -14,7 +14,12 @@
 #define CR_WXN		(1 << 19)	/* Write Permision Imply XN	*/
 #define CR_EE		(1 << 25)	/* Exception (Big) Endian	*/
 
+#ifndef CONFIG_SYS_FULL_VA
 #define PGTABLE_SIZE	(0x10000)
+#else
+#define PGTABLE_SIZE	CONFIG_SYS_PGTABLE_SIZE
+#endif
+
 /* 2MB granularity */
 #define MMU_SECTION_SHIFT	21
 
@@ -136,7 +141,9 @@ void flush_l3_cache(void);
 #define CR_AFE	(1 << 29)	/* Access flag enable			*/
 #define CR_TE	(1 << 30)	/* Thumb exception enable		*/
 
+#ifndef PGTABLE_SIZE
 #define PGTABLE_SIZE		(4096 * 4)
+#endif
 
 /*
  * This is used to ensure the compiler did actually allocate the register we
diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c
index 37ea6e9..0c9c66a 100644
--- a/arch/arm/lib/board.c
+++ b/arch/arm/lib/board.c
@@ -327,10 +327,12 @@ void board_init_f(ulong bootflag)
 #if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF))
 	/* reserve TLB table */
 	gd->arch.tlb_size = PGTABLE_SIZE;
-	addr -= gd->arch.tlb_size;
+	gd->arch.tlb_size += 0xffff;
+	gd->arch.tlb_size &= ~(0x10000 - 1);
 
-	/* round down to next 64 kB limit */
+	addr -= gd->arch.tlb_size;
 	addr &= ~(0x10000 - 1);
+	/* round down to next 64 kB limit */
 
 	gd->arch.tlb_addr = addr;
 	debug("TLB table from %08lx to %08lx\n", addr, addr + gd->arch.tlb_size);
diff --git a/doc/README.arm64 b/doc/README.arm64
index 75586db..f32108f 100644
--- a/doc/README.arm64
+++ b/doc/README.arm64
@@ -36,11 +36,34 @@ Notes
 6. CONFIG_ARM64 instead of CONFIG_ARMV8 is used to distinguish aarch64 and
    aarch32 specific codes.
 
+7. CONFIG_SYS_FULL_VA is used to enable 2-level page tables. For cores
+   supporting 64k pages it allows usage of full 48+ virtual/physical addresses
+
+   Enabling this option requires the following ones to be defined:
+       - CONFIG_SYS_MEM_MAP - an array of 'struct mm_region' describing the
+         system memory map (start, length, attributes)
+       - CONFIG_SYS_MEM_MAP_SIZE - number of entries in CONFIG_SYS_MEM_MAP
+       - CONFIG_SYS_PTL1_ENTRIES - number of 1st level page table entries
+       - CONFIG_SYS_PTL2_ENTRIES - number of 1nd level page table entries
+         for the largest CONFIG_SYS_MEM_MAP entry
+       - CONFIG_COREID_MASK - the mask value used to get the core from the
+         MPIDR_EL1 register
+       - CONFIG_SYS_PTL2_BITS - number of bits addressed by the 2nd level
+         page tables
+       - CONFIG_SYS_BLOCK_SHIFT - number of bits addressed by a single block
+         entry from L2 page tables
+       - CONFIG_SYS_PGTABLE_SIZE - total size of the page table
+       - CONFIG_SYS_TCR_EL{1,2,3}_IPS_BITS - the IPS field of the TCR_EL{1,2,3}
+
+
+
+
 Contributor
 ===========
-   Tom Rini       <trini at ti.com>
-   Scott Wood     <scottwood at freescale.com>
-   York Sun       <yorksun at freescale.com>
-   Simon Glass    <sjg at chromium.org>
-   Sharma Bhupesh <bhupesh.sharma at freescale.com>
-   Rob Herring    <robherring2 at gmail.com>
+   Tom Rini            <trini at ti.com>
+   Scott Wood          <scottwood at freescale.com>
+   York Sun            <yorksun at freescale.com>
+   Simon Glass         <sjg at chromium.org>
+   Sharma Bhupesh      <bhupesh.sharma at freescale.com>
+   Rob Herring         <robherring2 at gmail.com>
+   Sergey Temerkhanov  <s.temerkhanov at gmail.com>
-- 
2.2.0



More information about the U-Boot mailing list