[U-Boot] [PATCH 2/9] arm64: Make full va map code more dynamic
Simon Glass
sjg at chromium.org
Tue Feb 23 14:17:56 CET 2016
Hi Alex,
On 21 February 2016 at 18:57, Alexander Graf <agraf at suse.de> wrote:
> The idea to generate our pages tables from an array of memory ranges
> is very sound. However, instead of hard coding the code to create up
> to 2 levels of 64k granule page tables, we really should just create
> normal 4k page tables that allow us to set caching attributes on 2M
> or 4k level later on.
>
> So this patch moves the full_va mapping code to 4k page size and
> makes it fully flexible to dynamically create as many levels as
> necessary for a map (including dynamic 1G/2M pages). It also adds
> support to dynamically split a large map into smaller ones when
> some code wants to set dcache attributes.
>
> With all this in place, there is very little reason to create your
> own page tables in board specific files.
>
> Signed-off-by: Alexander Graf <agraf at suse.de>
> ---
> arch/arm/cpu/armv8/cache_v8.c | 346 +++++++++++++++++++++++++++++++------
> arch/arm/include/asm/armv8/mmu.h | 68 ++++----
> arch/arm/include/asm/global_data.h | 4 +-
> arch/arm/include/asm/system.h | 3 +-
> include/configs/thunderx_88xx.h | 14 +-
> 5 files changed, 332 insertions(+), 103 deletions(-)
>
Should the change to the thunderx file go in a separate patch?
> diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
> index 9229532..4369a83 100644
> --- a/arch/arm/cpu/armv8/cache_v8.c
> +++ b/arch/arm/cpu/armv8/cache_v8.c
> @@ -2,6 +2,9 @@
> * (C) Copyright 2013
> * David Feng <fenghua at phytium.com.cn>
> *
> + * (C) Copyright 2016
> + * Alexander Graf <agraf at suse.de>
> + *
> * SPDX-License-Identifier: GPL-2.0+
> */
>
> @@ -9,35 +12,40 @@
> #include <asm/system.h>
> #include <asm/armv8/mmu.h>
>
> -DECLARE_GLOBAL_DATA_PTR;
> -
> -#ifndef CONFIG_SYS_DCACHE_OFF
> +/* #define DEBUG_MMU */
>
> -#ifdef CONFIG_SYS_FULL_VA
> -static void set_ptl1_entry(u64 index, u64 ptl2_entry)
> -{
> - u64 *pgd = (u64 *)gd->arch.tlb_addr;
> - u64 value;
> +#ifdef DEBUG_MMU
> +#define DPRINTF(a, ...) printf("%s:%d: " a, __func__, __LINE__, __VA_ARGS__)
> +#else
> +#define DPRINTF(a, ...) do { } while(0)
> +#endif
Can you use the normal DEBUG and debug()?
>
> - value = ptl2_entry | PTL1_TYPE_TABLE;
> - pgd[index] = value;
> -}
> +DECLARE_GLOBAL_DATA_PTR;
>
> -static void set_ptl2_block(u64 ptl1, u64 bfn, u64 address, u64 memory_attrs)
> -{
> - u64 *pmd = (u64 *)ptl1;
> - u64 value;
> +#ifndef CONFIG_SYS_DCACHE_OFF
>
> - value = address | PTL2_TYPE_BLOCK | PTL2_BLOCK_AF;
> - value |= memory_attrs;
> - pmd[bfn] = value;
> -}
> +/*
> + * With 4k page granule, a virtual address is split into 4 lookup parts
> + * spanning 9 bits each:
> + *
> + * _______________________________________________
> + * | | | | | | |
> + * | 0 | Lv0 | Lv1 | Lv2 | Lv3 | off |
> + * |_______|_______|_______|_______|_______|_______|
> + * 63-48 47-39 38-30 29-21 20-12 11-00
> + *
> + * mask page size
> + *
> + * Lv0: FF8000000000 --
> + * Lv1: 7FC0000000 1G
> + * Lv2: 3FE00000 2M
> + * Lv3: 1FF000 4K
> + * off: FFF
> + */
>
> +#ifdef CONFIG_SYS_FULL_VA
> static struct mm_region mem_map[] = CONFIG_SYS_MEM_MAP;
I am not ken on the idea of using a big #define table on these boards.
Is there not a device-tree binding for this that we can use? It is
just a data table, We are moving to Kconfig and eventually want to
drop the config files.
>
> -#define PTL1_ENTRIES CONFIG_SYS_PTL1_ENTRIES
> -#define PTL2_ENTRIES CONFIG_SYS_PTL2_ENTRIES
> -
> static u64 get_tcr(int el, u64 *pips, u64 *pva_bits)
> {
> u64 max_addr = 0;
> @@ -79,8 +87,8 @@ static u64 get_tcr(int el, u64 *pips, u64 *pva_bits)
> }
>
> /* PTWs cacheable, inner/outer WBWA and inner shareable */
> - tcr |= TCR_TG0_64K | TCR_SHARED_INNER | TCR_ORGN_WBWA | TCR_IRGN_WBWA;
> - tcr |= TCR_T0SZ(VA_BITS);
> + tcr |= TCR_TG0_4K | TCR_SHARED_INNER | TCR_ORGN_WBWA | TCR_IRGN_WBWA;
> + tcr |= TCR_T0SZ(va_bits);
>
> if (pips)
> *pips = ips;
> @@ -90,39 +98,196 @@ static u64 get_tcr(int el, u64 *pips, u64 *pva_bits)
> return tcr;
> }
>
> -static void setup_pgtables(void)
> +#define MAX_PTE_ENTRIES 512
> +
> +static int pte_type(u64 *pte)
> +{
> + return *pte & PTE_TYPE_MASK;
> +}
> +
> +/* Returns the LSB number for a PTE on level <level> */
> +static int level2shift(int level)
> {
> - int l1_e, l2_e;
> - unsigned long pmd = 0;
> - unsigned long address;
> -
> - /* Setup the PMD pointers */
> - for (l1_e = 0; l1_e < CONFIG_SYS_MEM_MAP_SIZE; l1_e++) {
> - gd->arch.pmd_addr[l1_e] = gd->arch.tlb_addr +
> - PTL1_ENTRIES * sizeof(u64);
> - gd->arch.pmd_addr[l1_e] += PTL2_ENTRIES * sizeof(u64) * l1_e;
> - gd->arch.pmd_addr[l1_e] = ALIGN(gd->arch.pmd_addr[l1_e],
> - 0x10000UL);
> + /* Page is 12 bits wide, every level translates 9 bits */
> + return (12 + 9 * (3 - level));
> +}
> +
> +static u64 *find_pte(u64 addr, int level)
> +{
> + int start_level = 0;
> + u64 *pte;
> + u64 idx;
> + u64 va_bits;
> + int i;
> +
> + DPRINTF("addr=%llx level=%d\n", addr, level);
> +
> + get_tcr(0, NULL, &va_bits);
> + if (va_bits < 39)
> + start_level = 1;
> +
> + if (level < start_level)
> + return NULL;
> +
> + /* Walk through all page table levels to find our PTE */
> + pte = (u64*)gd->arch.tlb_addr;
> + for (i = start_level; i < 4; i++) {
> + idx = (addr >> level2shift(i)) & 0x1FF;
> + pte += idx;
> + DPRINTF("idx=%llx PTE %p at level %d: %llx\n", idx, pte, i, *pte);
> +
> + /* Found it */
> + if (i == level)
> + return pte;
> + /* PTE is no table (either invalid or block), can't traverse */
> + if (pte_type(pte) != PTE_TYPE_TABLE)
> + return NULL;
> + /* Off to the next level */
> + pte = (u64*)(*pte & 0x0000fffffffff000ULL);
> }
>
> - /* Setup the page tables */
> - for (l1_e = 0; l1_e < PTL1_ENTRIES; l1_e++) {
> - if (mem_map[pmd].base ==
> - (uintptr_t)l1_e << PTL2_BITS) {
> - set_ptl1_entry(l1_e, gd->arch.pmd_addr[pmd]);
> -
> - for (l2_e = 0; l2_e < PTL2_ENTRIES; l2_e++) {
> - address = mem_map[pmd].base
> - + (uintptr_t)l2_e * BLOCK_SIZE;
> - set_ptl2_block(gd->arch.pmd_addr[pmd], l2_e,
> - address, mem_map[pmd].attrs);
> - }
> + /* Should never reach here */
> + return NULL;
> +}
> +
> +/* Creates a new full table (512 entries) and sets *pte to refer to it */
> +static u64 *create_table(void)
> +{
> + u64 *new_table = (u64*)gd->arch.tlb_fillptr;
> + u64 pt_len = MAX_PTE_ENTRIES * sizeof(u64);
> +
> + /* Allocate MAX_PTE_ENTRIES pte entries */
> + gd->arch.tlb_fillptr += pt_len;
> +
> + if (gd->arch.tlb_fillptr - gd->arch.tlb_addr > gd->arch.tlb_size)
> + panic("Insufficient RAM for page table: 0x%lx > 0x%lx",
> + gd->arch.tlb_fillptr - gd->arch.tlb_addr,
> + gd->arch.tlb_size);
For each of these panic() calls can you please add a comment as to
what the user should do? It needs to be very clear what action should
be taken to resolve the problem.
> +
> + /* Mark all entries as invalid */
> + memset(new_table, 0, pt_len);
>
> - pmd++;
> - } else {
> - set_ptl1_entry(l1_e, 0);
> + return new_table;
> +}
> +
> +static void set_pte_table(u64 *pte, u64 *table)
> +{
> + /* Point *pte to the new table */
> + DPRINTF("Setting %p to addr=%p\n", pte, table);
> + *pte = PTE_TYPE_TABLE | (ulong)table;
> +}
> +
> +/* Add one mm_region map entry to the page tables */
> +static void add_map(struct mm_region *map)
> +{
> + u64 *pte;
> + u64 addr = map->base;
> + u64 size = map->size;
> + u64 attrs = map->attrs | PTE_TYPE_BLOCK | PTE_BLOCK_AF;
> + u64 blocksize;
> + int level;
> + u64 *new_table;
> +
> + while (size) {
> + pte = find_pte(addr, 0);
> + if (pte && (pte_type(pte) == PTE_TYPE_FAULT)) {
> + DPRINTF("Creating table for addr 0x%llx\n", addr);
> + new_table = create_table();
> + set_pte_table(pte, new_table);
> }
> +
> + for (level = 1; level < 4; level++) {
> + pte = find_pte(addr, level);
> + blocksize = 1ULL << level2shift(level);
> + DPRINTF("Checking if pte fits for addr=%llx size=%llx "
> + "blocksize=%llx\n", addr, size, blocksize);
> + if (size >= blocksize && !(addr & (blocksize - 1))) {
> + /* Page fits, create block PTE */
> + DPRINTF("Setting PTE %p to block addr=%llx\n",
> + pte, addr);
> + *pte = addr | attrs;
> + addr += blocksize;
> + size -= blocksize;
> + break;
> + } else if ((pte_type(pte) == PTE_TYPE_FAULT)) {
> + /* Page doesn't fit, create subpages */
> + DPRINTF("Creating subtable for addr 0x%llx "
> + "blksize=%llx\n", addr, blocksize);
> + new_table = create_table();
> + set_pte_table(pte, new_table);
> + }
> + }
> + }
> +}
> +
> +/* Splits a block PTE into table with subpages spanning the old block */
> +static void split_block(u64 *pte, int level)
> +{
> + u64 old_pte = *pte;
> + u64 *new_table;
> + u64 i = 0;
> + /* level describes the parent level, we need the child ones */
> + int levelshift = level2shift(level + 1);
> +
> + if (pte_type(pte) != PTE_TYPE_BLOCK)
> + panic("PTE %p (%llx) is not a block", pte, old_pte);
> +
> + new_table = create_table();
> + DPRINTF("Splitting pte %p (%llx) into %p\n", pte, old_pte, new_table);
> +
> + for (i = 0; i < MAX_PTE_ENTRIES; i++) {
> + new_table[i] = old_pte | (i << levelshift);
> + DPRINTF("Setting new_table[%lld] = %llx\n", i, new_table[i]);
> }
> +
> + /* Set the new table into effect */
> + set_pte_table(pte, new_table);
> +}
> +
> +/* Returns the estimated required size of all page tables */
> +u64 get_page_table_size(void)
> +{
> + int i;
> + u64 one_pt = MAX_PTE_ENTRIES * sizeof(u64);
> + u64 size = 0;
> +
> + /* root page table */
> + size += one_pt;
> +
> + for (i = 0; i < ARRAY_SIZE(mem_map); i++) {
> + struct mm_region *map = &mem_map[i];
> +
> + /* Account for Lv0 page tables */
> + size += one_pt * ((map->size >> 39) + 1);
> +
> + /* 1GB aligned pages fit already, so count the others */
> + if (map->size & 0x3fffffffULL)
> + size += one_pt;
> + if (map->base & 0x3fffffffULL)
> + size += one_pt;
> + }
> +
> + /* Assume we may have to split up to 4 more page tables off */
> + size += one_pt * 4;
I suspect this is a better idea than just allocating a fixed size for
the whole table (like 1MB). But the error you get when this fails
should point to here so people know how to fix it.
> +
> + return size;
> +}
> +
> +static void setup_pgtables(void)
> +{
> + int i;
> +
> + /*
> + * Allocate the first level we're on with invalidate entries.
> + * If the starting level is 0 (va_bits >= 39), then this is our
> + * Lv0 page table, otherwise it's the entry Lv1 page table.
> + */
> + gd->arch.tlb_fillptr = gd->arch.tlb_addr;
> + create_table();
> +
> + /* Now add all MMU table entries one after another to the table */
> + for (i = 0; i < ARRAY_SIZE(mem_map); i++)
> + add_map(&mem_map[i]);
> }
>
> #else
> @@ -157,10 +322,8 @@ __weak void mmu_setup(void)
> int el;
>
> #ifdef CONFIG_SYS_FULL_VA
> - unsigned long coreid = read_mpidr() & CONFIG_COREID_MASK;
> -
> - /* Set up page tables only on BSP */
> - if (coreid == BSP_COREID)
> + /* Set up page tables only once */
> + if (!gd->arch.tlb_fillptr)
> setup_pgtables();
>
> el = current_el();
> @@ -311,6 +474,79 @@ void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size,
> flush_dcache_range(start, end);
> asm volatile("dsb sy");
> }
> +#else
> +static bool is_aligned(u64 addr, u64 size, u64 align)
> +{
> + return !(addr & (align - 1)) && !(size & (align - 1));
> +}
> +
> +static u64 set_one_region(u64 start, u64 size, u64 attrs, int level)
> +{
> + int levelshift = level2shift(level);
> + u64 levelsize = 1ULL << levelshift;
> + u64 *pte = find_pte(start, level);
> +
> + /* Can we can just modify the current level block PTE? */
> + if (is_aligned(start, size, levelsize)) {
> + *pte &= ~PMD_ATTRINDX_MASK;
> + *pte |= attrs;
> + DPRINTF("Set attrs=%llx pte=%p level=%d\n", attrs, pte, level);
> +
> + return levelsize;
> + }
> +
> + /* Unaligned or doesn't fit, maybe split block into table */
> + DPRINTF("addr=%llx level=%d pte=%p (%llx)\n", start, level, pte, *pte);
> +
> + /* Maybe we need to split the block into a table */
> + if (pte_type(pte) == PTE_TYPE_BLOCK)
> + split_block(pte, level);
> +
> + /* And then double-check it became a table or already is one */
> + if (pte_type(pte) != PTE_TYPE_TABLE)
> + panic("PTE %p (%llx) for addr=%llx should be a table",
> + pte, *pte, start);
> +
> + /* Roll on to the next page table level */
> + return 0;
> +}
> +
> +void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size,
> + enum dcache_option option)
> +{
> + u64 attrs = PMD_ATTRINDX(option);
> + u64 real_start = start;
> + u64 real_size = size;
> +
> + DPRINTF("start=%lx size=%lx\n", (ulong)start, (ulong)size);
> +
> + /*
> + * Loop through the address range until we find a page granule that fits
> + * our alignment constraints, then set it to the new cache attributes
> + */
> + while (size > 0) {
> + int level;
> + u64 r;
> +
> + for (level = 1; level < 4; level++) {
> + r = set_one_region(start, size, attrs, level);
> + if (r) {
> + /* PTE successfully replaced */
> + size -= r;
> + start += r;
> + break;
> + }
> + }
> +
> + }
> +
> + asm volatile("dsb sy");
> + __asm_invalidate_tlb_all();
> + asm volatile("dsb sy");
> + asm volatile("isb");
> + flush_dcache_range(real_start, real_start + real_size);
> + asm volatile("dsb sy");
> +}
> #endif
>
> #else /* CONFIG_SYS_DCACHE_OFF */
[snip]
Regards,
Simon
More information about the U-Boot
mailing list