[PATCH v3 2/3] arm64: add software pagetable walker
Caleb Connolly
caleb.connolly at linaro.org
Mon Jun 17 10:03:48 CEST 2024
Add a basic software implementation of the ARM64 pagetable walker. This
can be used for debugging U-Boot's pagetable, as well as dumping the
pagetable from the previous bootloader stage if it used one (by reading
out the ttbr address).
One can either call dump_pagetable() to print the pagetable to the
console with the default formatter, or implement their own pagetable
handler using walke_pagetable() with a custom pte_walker_cb_t callback.
All of the added code is discarded when unused, hence there is no need
to add an additional Kconfig option for this.
Signed-off-by: Caleb Connolly <caleb.connolly at linaro.org>
---
arch/arm/cpu/armv8/cache_v8.c | 245 +++++++++++++++++++++++++++++++++++++++
arch/arm/include/asm/armv8/mmu.h | 56 +++++++++
2 files changed, 301 insertions(+)
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
index d4c64f2d60d9..c3f8dac648ba 100644
--- a/arch/arm/cpu/armv8/cache_v8.c
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -395,8 +395,253 @@ static int count_ranges(void)
return count;
}
+#define ALL_ATTRS (3 << 8 | PMD_ATTRINDX_MASK)
+#define PTE_IS_TABLE(pte, level) (pte_type(&(pte)) == PTE_TYPE_TABLE && (level) < 3)
+
+enum walker_state {
+ WALKER_STATE_START = 0,
+ WALKER_STATE_TABLE,
+ WALKER_STATE_REGION, /* block or page, depending on level */
+};
+
+
+/**
+ * __pagetable_walk() - Walk through the pagetable and call cb() for each memory region
+ *
+ * This is a software implementation of the ARMv8-A MMU translation table walk. As per
+ * section D5.4 of the ARMv8-A Architecture Reference Manual. It recursively walks the
+ * 4 or 3 levels of the page table and calls the callback function for each discrete
+ * region of memory (that being the discovery of a new table, a collection of blocks
+ * with the same attributes, or of pages with the same attributes).
+ *
+ * U-Boot picks the smallest number of virtual address (VA) bits that it can based on the
+ * memory map configured by the board. If this is less than 39 then the MMU will only use
+ * 3 levels of translation instead of 3 - skipping level 0.
+ *
+ * Each level has 512 entries of 64-bits each. Each entry includes attribute bits and
+ * an address. When the attribute bits indicate a table, the address is the physical
+ * address of the table, so we can recursively call _pagetable_walk() on it (after calling
+ * @cb). If instead they indicate a block or page, we record the start address and attributes
+ * and continue walking until we find a region with different attributes, or the end of the
+ * table, in either case we call @cb with the start and end address of the region.
+ *
+ * This approach can be used to fully emulate the MMU's translation table walk, as per
+ * Figure D5-25 of the ARMv8-A Architecture Reference Manual.
+ *
+ * @addr: The address of the table to walk
+ * @tcr: The TCR register value
+ * @level: The current level of the table
+ * @cb: The callback function to call for each region
+ * @priv: Private data to pass to the callback function
+ */
+static void __pagetable_walk(u64 addr, u64 tcr, int level, pte_walker_cb_t cb, void *priv)
+{
+ u64 *table = (u64 *)addr;
+ u64 attrs, last_attrs = 0, last_addr = 0, entry_start = 0;
+ int i;
+ u64 va_bits = 64 - (tcr & (BIT(6) - 1));
+ static enum walker_state state[4] = { 0 };
+ static bool exit;
+
+ if (!level) {
+ exit = false;
+ if (va_bits < 39)
+ level = 1;
+ }
+
+ state[level] = WALKER_STATE_START;
+
+ /* Walk through the table entries */
+ for (i = 0; i < MAX_PTE_ENTRIES; i++) {
+ u64 pte = table[i];
+ u64 _addr = pte & GENMASK_ULL(va_bits, PAGE_SHIFT);
+
+ if (exit)
+ return;
+
+ if (pte_type(&pte) == PTE_TYPE_FAULT)
+ continue;
+
+ attrs = pte & ALL_ATTRS;
+ /* If we're currently inside a block or set of pages */
+ if (state[level] > WALKER_STATE_START && state[level] != WALKER_STATE_TABLE) {
+ /*
+ * Continue walking if this entry has the same attributes as the last and
+ * is one page/block away -- it's a contiguous region.
+ */
+ if (attrs == last_attrs && _addr == last_addr + (1 << level2shift(level))) {
+ last_attrs = attrs;
+ last_addr = _addr;
+ continue;
+ } else {
+ /* We either hit a table or a new region */
+ exit = cb(entry_start, last_addr + (1 << level2shift(level)),
+ va_bits, level, priv);
+ if (exit)
+ return;
+ state[level] = WALKER_STATE_START;
+ }
+ }
+ last_attrs = attrs;
+ last_addr = _addr;
+
+ if (PTE_IS_TABLE(pte, level)) {
+ /* After the end of the table might be corrupted data */
+ if (!_addr || (pte & 0xfff) > 0x3ff)
+ return;
+ state[level] = WALKER_STATE_TABLE;
+ /* Signify the start of a table */
+ exit = cb(pte, 0, va_bits, level, priv);
+ if (exit)
+ return;
+
+ /* Go down a level */
+ __pagetable_walk(_addr, tcr, level + 1, cb, priv);
+ state[level] = WALKER_STATE_START;
+ } else if (pte_type(&pte) == PTE_TYPE_BLOCK || pte_type(&pte) == PTE_TYPE_PAGE) {
+ /* We foud a block or page, start walking */
+ entry_start = pte;
+ state[level] = WALKER_STATE_REGION;
+ }
+ }
+
+ if (state[level] > WALKER_STATE_START)
+ exit = cb(entry_start, last_addr + (1 << level2shift(level)), va_bits, level, priv);
+}
+
+static void pretty_print_pte_type(u64 pte)
+{
+ switch (pte_type(&pte)) {
+ case PTE_TYPE_FAULT:
+ printf(" %-5s", "Fault");
+ break;
+ case PTE_TYPE_BLOCK:
+ printf(" %-5s", "Block");
+ break;
+ case PTE_TYPE_PAGE:
+ printf(" %-5s", "Pages");
+ break;
+ default:
+ printf(" %-5s", "Unk");
+ }
+}
+
+static void pretty_print_table_attrs(u64 pte)
+{
+ int ap = (pte & PTE_TABLE_AP) >> 61;
+
+ printf(" | %2s %10s",
+ (ap & 2) ? "RO" : "",
+ (ap & 1) ? "!EL0" : "");
+ printf(" | %3s %2s %2s",
+ (pte & PTE_TABLE_PXN) ? "PXN" : "",
+ (pte & PTE_TABLE_XN) ? "XN" : "",
+ (pte & PTE_TABLE_NS) ? "NS" : "");
+}
+
+static void pretty_print_block_attrs(u64 pte)
+{
+ u64 attrs = pte & PMD_ATTRINDX_MASK;
+
+ switch (attrs) {
+ case PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE):
+ printf(" | %-13s", "Device-nGnRnE");
+ break;
+ case PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRE):
+ printf(" | %-13s", "Device-nGnRE");
+ break;
+ case PTE_BLOCK_MEMTYPE(MT_DEVICE_GRE):
+ printf(" | %-13s", "Device-GRE");
+ break;
+ case PTE_BLOCK_MEMTYPE(MT_NORMAL_NC):
+ printf(" | %-13s", "Normal-NC");
+ break;
+ case PTE_BLOCK_MEMTYPE(MT_NORMAL):
+ printf(" | %-13s", "Normal");
+ break;
+ default:
+ printf(" | %-13s", "Unknown");
+ }
+}
+
+static void pretty_print_block_memtype(u64 pte)
+{
+ u64 share = pte & (3 << 8);
+
+ switch (share) {
+ case PTE_BLOCK_NON_SHARE:
+ printf(" | %-16s", "Non-shareable");
+ break;
+ case PTE_BLOCK_OUTER_SHARE:
+ printf(" | %-16s", "Outer-shareable");
+ break;
+ case PTE_BLOCK_INNER_SHARE:
+ printf(" | %-16s", "Inner-shareable");
+ break;
+ default:
+ printf(" | %-16s", "Unknown");
+ }
+}
+
+static void print_pte(u64 pte, int level)
+{
+ if (PTE_IS_TABLE(pte, level)) {
+ printf(" %-5s", "Table");
+ pretty_print_table_attrs(pte);
+ } else {
+ pretty_print_pte_type(pte);
+ pretty_print_block_attrs(pte);
+ pretty_print_block_memtype(pte);
+ }
+ printf("\n");
+}
+
+/**
+ * pagetable_print_entry() - Callback function to print a single pagetable region
+ *
+ * This is the default callback used by @dump_pagetable(). It does some basic pretty
+ * printing (see example in the U-Boot arm64 documentation). It can be replaced by
+ * a custom callback function if more detailed information is needed.
+ *
+ * @start_attrs: The start address and attributes of the region (or table address)
+ * @end: The end address of the region (or 0 if it's a table)
+ * @va_bits: The number of bits used for the virtual address
+ * @level: The level of the region
+ * @priv: Private data for the callback (unused)
+ */
+static bool pagetable_print_entry(u64 start_attrs, u64 end, int va_bits, int level, void *priv)
+{
+ u64 _addr = start_attrs & GENMASK_ULL(va_bits, PAGE_SHIFT);
+ int indent = va_bits < 39 ? level - 1 : level;
+
+ printf("%*s", indent * 2, "");
+ if (PTE_IS_TABLE(start_attrs, level))
+ printf("[%#011llx]%14s", _addr, "");
+ else
+ printf("[%#011llx - %#011llx]", _addr, end);
+
+ printf("%*s | ", (3 - level) * 2, "");
+ print_pte(start_attrs, level);
+
+ return false;
+}
+
+void walk_pagetable(u64 ttbr, u64 tcr, pte_walker_cb_t cb, void *priv)
+{
+ __pagetable_walk(ttbr, tcr, 0, cb, priv);
+}
+
+void dump_pagetable(u64 ttbr, u64 tcr)
+{
+ u64 va_bits = 64 - (tcr & (BIT(6) - 1));
+
+ printf("Walking pagetable at %p, va_bits: %lld. Using %d levels\n", (void *)ttbr,
+ va_bits, va_bits < 39 ? 3 : 4);
+ walk_pagetable(ttbr, tcr, pagetable_print_entry, NULL);
+}
+
/* Returns the estimated required size of all page tables */
__weak u64 get_page_table_size(void)
{
u64 one_pt = MAX_PTE_ENTRIES * sizeof(u64);
diff --git a/arch/arm/include/asm/armv8/mmu.h b/arch/arm/include/asm/armv8/mmu.h
index 52cb18b9ed5e..1348db4204ee 100644
--- a/arch/arm/include/asm/armv8/mmu.h
+++ b/arch/arm/include/asm/armv8/mmu.h
@@ -128,8 +128,64 @@ static inline void set_ttbr_tcr_mair(int el, u64 table, u64 tcr, u64 attr)
}
asm volatile("isb");
}
+static inline void get_ttbr_tcr_mair(int el, u64 *table, u64 *tcr, u64 *attr)
+{
+ if (el == 1) {
+ asm volatile("mrs %0, ttbr0_el1" : "=r" (*table));
+ asm volatile("mrs %0, tcr_el1" : "=r" (*tcr));
+ asm volatile("mrs %0, mair_el1" : "=r" (*attr));
+ } else if (el == 2) {
+ asm volatile("mrs %0, ttbr0_el2" : "=r" (*table));
+ asm volatile("mrs %0, tcr_el2" : "=r" (*tcr));
+ asm volatile("mrs %0, mair_el2" : "=r" (*attr));
+ } else if (el == 3) {
+ asm volatile("mrs %0, ttbr0_el3" : "=r" (*table));
+ asm volatile("mrs %0, tcr_el3" : "=r" (*tcr));
+ asm volatile("mrs %0, mair_el3" : "=r" (*attr));
+ } else {
+ hang();
+ }
+}
+
+/**
+ * pte_walker_cb_t - callback function for walk_pagetable.
+ *
+ * This function is called when the walker finds a table entry
+ * or after parsing a block or pages. For a table the @end address
+ * is 0, and @addr is the address of the table. Otherwise, they
+ * are the start and end physical addresses of the block or page.
+ *
+ * @addr: PTE start address (PA), or address of table. Includes attributes.
+ * @end: End address of the region (or 0 for a table)
+ * @va_bits: Number of bits in the virtual address
+ * @level: Table level
+ * @priv: Private data for the callback
+ *
+ * Return: true to stop walking, false to continue
+ */
+typedef bool (*pte_walker_cb_t)(u64 addr, u64 end, int va_bits, int level, void *priv);
+
+/**
+ * walk_pagetable() - Walk the pagetable at ttbr and call @cb for each region
+ *
+ * @ttbr: Address of the pagetable to dump
+ * @tcr: TCR value to use
+ * @cb: Callback function to call for each entry
+ * @priv: Private data for the callback
+ */
+void walk_pagetable(u64 ttbr, u64 tcr, pte_walker_cb_t cb, void *priv);
+
+/**
+ * dump_pagetable() - Dump the pagetable at ttbr, printing each region and
+ * level.
+ *
+ * @ttbr: Address of the pagetable to dump
+ * @tcr: TCR value to use
+ */
+void dump_pagetable(u64 ttbr, u64 tcr);
+
struct mm_region {
u64 virt;
u64 phys;
u64 size;
--
2.45.0
More information about the U-Boot
mailing list