Merge patch series "arm64: add a software pagetable walker"

Caleb Connolly <caleb.connolly@linaro.org> says:

MMU issues are some of the most frustrating to debug. To make this
slightly less unbearable, introduce a software pagetable walker for
ARMv8. This can be called to dump a pagetable with the default
formatter, or a custom callback can be provided to implement more
complicated parsing.

This can also be useful to dump the pagetable used by a previous
bootloader stage (by reading out the ttbr register).

Here is an example of the output when walking U-Boot's own memory map
on a Qualcomm RB3 board:

Walking pagetable at 000000017df90000, va_bits: 36. Using 3 levels
[0x17df91000]                   |  Table |               |
  [0x17df92000]                 |  Table |               |
    [0x000001000 - 0x000200000] |  Pages | Device-nGnRnE | Non-shareable
  [0x000200000 - 0x040000000]   |  Block | Device-nGnRnE | Non-shareable
[0x040000000 - 0x080000000]     |  Block | Device-nGnRnE | Non-shareable
[0x080000000 - 0x140000000]     |  Block | Normal        | Inner-shareable
[0x17df93000]                   |  Table |               |
  [0x140000000 - 0x17de00000]   |  Block | Normal        | Inner-shareable
  [0x17df94000]                 |  Table |               |
    [0x17de00000 - 0x17dfa0000] |  Pages | Normal        | Inner-shareable
This commit is contained in:
Tom Rini 2024-06-25 17:22:36 -06:00
commit 42276c3658
3 changed files with 351 additions and 1 deletions

View file

@ -396,6 +396,251 @@ static int count_ranges(void)
return count;
}
#define ALL_ATTRS (3 << 8 | PMD_ATTRINDX_MASK)
#define PTE_IS_TABLE(pte, level) (pte_type(&(pte)) == PTE_TYPE_TABLE && (level) < 3)
enum walker_state {
WALKER_STATE_START = 0,
WALKER_STATE_TABLE,
WALKER_STATE_REGION, /* block or page, depending on level */
};
/**
* __pagetable_walk() - Walk through the pagetable and call cb() for each memory region
*
* This is a software implementation of the ARMv8-A MMU translation table walk. As per
* section D5.4 of the ARMv8-A Architecture Reference Manual. It recursively walks the
* 4 or 3 levels of the page table and calls the callback function for each discrete
* region of memory (that being the discovery of a new table, a collection of blocks
* with the same attributes, or of pages with the same attributes).
*
* U-Boot picks the smallest number of virtual address (VA) bits that it can based on the
* memory map configured by the board. If this is less than 39 then the MMU will only use
* 3 levels of translation instead of 3 - skipping level 0.
*
* Each level has 512 entries of 64-bits each. Each entry includes attribute bits and
* an address. When the attribute bits indicate a table, the address is the physical
* address of the table, so we can recursively call _pagetable_walk() on it (after calling
* @cb). If instead they indicate a block or page, we record the start address and attributes
* and continue walking until we find a region with different attributes, or the end of the
* table, in either case we call @cb with the start and end address of the region.
*
* This approach can be used to fully emulate the MMU's translation table walk, as per
* Figure D5-25 of the ARMv8-A Architecture Reference Manual.
*
* @addr: The address of the table to walk
* @tcr: The TCR register value
* @level: The current level of the table
* @cb: The callback function to call for each region
* @priv: Private data to pass to the callback function
*/
static void __pagetable_walk(u64 addr, u64 tcr, int level, pte_walker_cb_t cb, void *priv)
{
u64 *table = (u64 *)addr;
u64 attrs, last_attrs = 0, last_addr = 0, entry_start = 0;
int i;
u64 va_bits = 64 - (tcr & (BIT(6) - 1));
static enum walker_state state[4] = { 0 };
static bool exit;
if (!level) {
exit = false;
if (va_bits < 39)
level = 1;
}
state[level] = WALKER_STATE_START;
/* Walk through the table entries */
for (i = 0; i < MAX_PTE_ENTRIES; i++) {
u64 pte = table[i];
u64 _addr = pte & GENMASK_ULL(va_bits, PAGE_SHIFT);
if (exit)
return;
if (pte_type(&pte) == PTE_TYPE_FAULT)
continue;
attrs = pte & ALL_ATTRS;
/* If we're currently inside a block or set of pages */
if (state[level] > WALKER_STATE_START && state[level] != WALKER_STATE_TABLE) {
/*
* Continue walking if this entry has the same attributes as the last and
* is one page/block away -- it's a contiguous region.
*/
if (attrs == last_attrs && _addr == last_addr + (1 << level2shift(level))) {
last_attrs = attrs;
last_addr = _addr;
continue;
} else {
/* We either hit a table or a new region */
exit = cb(entry_start, last_addr + (1 << level2shift(level)),
va_bits, level, priv);
if (exit)
return;
state[level] = WALKER_STATE_START;
}
}
last_attrs = attrs;
last_addr = _addr;
if (PTE_IS_TABLE(pte, level)) {
/* After the end of the table might be corrupted data */
if (!_addr || (pte & 0xfff) > 0x3ff)
return;
state[level] = WALKER_STATE_TABLE;
/* Signify the start of a table */
exit = cb(pte, 0, va_bits, level, priv);
if (exit)
return;
/* Go down a level */
__pagetable_walk(_addr, tcr, level + 1, cb, priv);
state[level] = WALKER_STATE_START;
} else if (pte_type(&pte) == PTE_TYPE_BLOCK || pte_type(&pte) == PTE_TYPE_PAGE) {
/* We foud a block or page, start walking */
entry_start = pte;
state[level] = WALKER_STATE_REGION;
}
}
if (state[level] > WALKER_STATE_START)
exit = cb(entry_start, last_addr + (1 << level2shift(level)), va_bits, level, priv);
}
static void pretty_print_pte_type(u64 pte)
{
switch (pte_type(&pte)) {
case PTE_TYPE_FAULT:
printf(" %-5s", "Fault");
break;
case PTE_TYPE_BLOCK:
printf(" %-5s", "Block");
break;
case PTE_TYPE_PAGE:
printf(" %-5s", "Pages");
break;
default:
printf(" %-5s", "Unk");
}
}
static void pretty_print_table_attrs(u64 pte)
{
int ap = (pte & PTE_TABLE_AP) >> 61;
printf(" | %2s %10s",
(ap & 2) ? "RO" : "",
(ap & 1) ? "!EL0" : "");
printf(" | %3s %2s %2s",
(pte & PTE_TABLE_PXN) ? "PXN" : "",
(pte & PTE_TABLE_XN) ? "XN" : "",
(pte & PTE_TABLE_NS) ? "NS" : "");
}
static void pretty_print_block_attrs(u64 pte)
{
u64 attrs = pte & PMD_ATTRINDX_MASK;
switch (attrs) {
case PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRNE):
printf(" | %-13s", "Device-nGnRnE");
break;
case PTE_BLOCK_MEMTYPE(MT_DEVICE_NGNRE):
printf(" | %-13s", "Device-nGnRE");
break;
case PTE_BLOCK_MEMTYPE(MT_DEVICE_GRE):
printf(" | %-13s", "Device-GRE");
break;
case PTE_BLOCK_MEMTYPE(MT_NORMAL_NC):
printf(" | %-13s", "Normal-NC");
break;
case PTE_BLOCK_MEMTYPE(MT_NORMAL):
printf(" | %-13s", "Normal");
break;
default:
printf(" | %-13s", "Unknown");
}
}
static void pretty_print_block_memtype(u64 pte)
{
u64 share = pte & (3 << 8);
switch (share) {
case PTE_BLOCK_NON_SHARE:
printf(" | %-16s", "Non-shareable");
break;
case PTE_BLOCK_OUTER_SHARE:
printf(" | %-16s", "Outer-shareable");
break;
case PTE_BLOCK_INNER_SHARE:
printf(" | %-16s", "Inner-shareable");
break;
default:
printf(" | %-16s", "Unknown");
}
}
static void print_pte(u64 pte, int level)
{
if (PTE_IS_TABLE(pte, level)) {
printf(" %-5s", "Table");
pretty_print_table_attrs(pte);
} else {
pretty_print_pte_type(pte);
pretty_print_block_attrs(pte);
pretty_print_block_memtype(pte);
}
printf("\n");
}
/**
* pagetable_print_entry() - Callback function to print a single pagetable region
*
* This is the default callback used by @dump_pagetable(). It does some basic pretty
* printing (see example in the U-Boot arm64 documentation). It can be replaced by
* a custom callback function if more detailed information is needed.
*
* @start_attrs: The start address and attributes of the region (or table address)
* @end: The end address of the region (or 0 if it's a table)
* @va_bits: The number of bits used for the virtual address
* @level: The level of the region
* @priv: Private data for the callback (unused)
*/
static bool pagetable_print_entry(u64 start_attrs, u64 end, int va_bits, int level, void *priv)
{
u64 _addr = start_attrs & GENMASK_ULL(va_bits, PAGE_SHIFT);
int indent = va_bits < 39 ? level - 1 : level;
printf("%*s", indent * 2, "");
if (PTE_IS_TABLE(start_attrs, level))
printf("[%#011llx]%14s", _addr, "");
else
printf("[%#011llx - %#011llx]", _addr, end);
printf("%*s | ", (3 - level) * 2, "");
print_pte(start_attrs, level);
return false;
}
void walk_pagetable(u64 ttbr, u64 tcr, pte_walker_cb_t cb, void *priv)
{
__pagetable_walk(ttbr, tcr, 0, cb, priv);
}
void dump_pagetable(u64 ttbr, u64 tcr)
{
u64 va_bits = 64 - (tcr & (BIT(6) - 1));
printf("Walking pagetable at %p, va_bits: %lld. Using %d levels\n", (void *)ttbr,
va_bits, va_bits < 39 ? 3 : 4);
walk_pagetable(ttbr, tcr, pagetable_print_entry, NULL);
}
/* Returns the estimated required size of all page tables */
__weak u64 get_page_table_size(void)
{

View file

@ -51,7 +51,7 @@
#define PTE_TABLE_PXN (1UL << 59)
#define PTE_TABLE_XN (1UL << 60)
#define PTE_TABLE_AP (1UL << 61)
#define PTE_TABLE_AP (3UL << 61)
#define PTE_TABLE_NS (1UL << 63)
/*
@ -129,6 +129,62 @@ static inline void set_ttbr_tcr_mair(int el, u64 table, u64 tcr, u64 attr)
asm volatile("isb");
}
static inline void get_ttbr_tcr_mair(int el, u64 *table, u64 *tcr, u64 *attr)
{
if (el == 1) {
asm volatile("mrs %0, ttbr0_el1" : "=r" (*table));
asm volatile("mrs %0, tcr_el1" : "=r" (*tcr));
asm volatile("mrs %0, mair_el1" : "=r" (*attr));
} else if (el == 2) {
asm volatile("mrs %0, ttbr0_el2" : "=r" (*table));
asm volatile("mrs %0, tcr_el2" : "=r" (*tcr));
asm volatile("mrs %0, mair_el2" : "=r" (*attr));
} else if (el == 3) {
asm volatile("mrs %0, ttbr0_el3" : "=r" (*table));
asm volatile("mrs %0, tcr_el3" : "=r" (*tcr));
asm volatile("mrs %0, mair_el3" : "=r" (*attr));
} else {
hang();
}
}
/**
* typedef pte_walker_cb_t - callback function for walk_pagetable.
*
* This function is called when the walker finds a table entry
* or after parsing a block or pages. For a table the @end address
* is 0, and @addr is the address of the table. Otherwise, they
* are the start and end physical addresses of the block or page.
*
* @addr: PTE start address (PA), or address of table. Includes attributes.
* @end: End address of the region (or 0 for a table)
* @va_bits: Number of bits in the virtual address
* @level: Table level
* @priv: Private data for the callback
*
* Return: true to stop walking, false to continue
*/
typedef bool (*pte_walker_cb_t)(u64 addr, u64 end, int va_bits, int level, void *priv);
/**
* walk_pagetable() - Walk the pagetable at ttbr and call @cb for each region
*
* @ttbr: Address of the pagetable to dump
* @tcr: TCR value to use
* @cb: Callback function to call for each entry
* @priv: Private data for the callback
*/
void walk_pagetable(u64 ttbr, u64 tcr, pte_walker_cb_t cb, void *priv);
/**
* dump_pagetable() - Dump the pagetable at ttbr, printing each region and
* level.
*
* @ttbr: Address of the pagetable to dump
* @tcr: TCR value to use
*/
void dump_pagetable(u64 ttbr, u64 tcr);
struct mm_region {
u64 virt;
u64 phys;

View file

@ -48,6 +48,55 @@ Notes
6. CONFIG_ARM64 instead of CONFIG_ARMV8 is used to distinguish aarch64 and
aarch32 specific codes.
MMU
---
U-Boot uses a simple page table for MMU setup. It uses the smallest number of bits
possible for the virtual address based on the maximum memory address (see the logic
in ``get_tcr()``). If this is less than 39 bits, the MMU will use only 3 levels for
address translation.
As with all platforms, U-Boot on ARM64 uses a 1:1 mapping of virtual to physical addresses.
In general, the memory map is expected to remain static once the MMU is enabled.
Software pagetable walker
^^^^^^^^^^^^^^^^^^^^^^^^^
It is possible to debug the pagetable generated by U-Boot with the built in
``dump_pagetable()`` and ``walk_pagetable()`` functions (the former being a simple
wrapper for the latter). For example the following can be added to ``setup_all_pgtables()``
after the first call to ``setup_pgtables()``:
.. code-block:: c
dump_pagetable(gd->arch.tlb_addr, get_tcr(NULL, NULL));
.. kernel-doc:: arch/arm/cpu/armv8/cache_v8.c
:identifiers: __pagetable_walk pagetable_print_entry
The pagetable walker can be used as follows:
.. kernel-doc:: arch/arm/include/asm/armv8/mmu.h
:identifiers: pte_walker_cb_t walk_pagetable dump_pagetable
This will result in a print like the following:
.. code-block:: text
Walking pagetable at 000000017df90000, va_bits: 36. Using 3 levels
[0x17df91000] | Table | |
[0x17df92000] | Table | |
[0x000001000 - 0x000200000] | Pages | Device-nGnRnE | Non-shareable
[0x000200000 - 0x040000000] | Block | Device-nGnRnE | Non-shareable
[0x040000000 - 0x080000000] | Block | Device-nGnRnE | Non-shareable
[0x080000000 - 0x140000000] | Block | Normal | Inner-shareable
[0x17df93000] | Table | |
[0x140000000 - 0x17de00000] | Block | Normal | Inner-shareable
[0x17df94000] | Table | |
[0x17de00000 - 0x17dfa0000] | Pages | Normal | Inner-shareable
For more information, please refer to the additional function documentation in
``arch/arm/include/asm/armv8/mmu.h``.
Contributors
------------