QEMU NUMA and U-Boot
Heinrich Schuchardt
xypron.glpk at gmx.de
Wed Jul 7 19:39:22 CEST 2021
On 7/7/21 5:15 PM, François Ozog wrote:
> top posting what now works for me:
> - changed calculation of memory size to loop through different memory nodes
> - added numa_node to banks
> - filter out banks that do not match the target mixup node (do not want
> to change ABI to add node information)
>
> That's not satisfying overall but at least my code works with NUMA on Qemu.
Do we expect real hardware with NUMA to be using U-Boot?
If you have real ARM hardware in NUMA configuration, can the boot CPU
access memory of dormant CPUs when U-Boot is entered?
Best regards
Heinrich
>
>
> *diff --git a/Kconfig b/Kconfig*
>
> *index f8c1a77bed..4d3ab8cb49 100644*
>
> *--- a/Kconfig*
>
> *+++ b/Kconfig*
>
> @@ -192,7 +192,9 @@config NR_DRAM_BANKS
>
> default 1 if ARCH_SUNXI || ARCH_OWL
>
> default 4
>
> help
>
> - This defines the number of DRAM banks.
>
> +This defines the number of DRAM banks. For qemu with NUMA, you
>
> +may want to set this value to <slots> * <possible memdev>.
>
> +for instance, for a 2 slot with 4 memdevs set NR_DRAM_BANKS to 8.
>
> config SYS_BOOT_GET_CMDLINE
>
> bool "Enable kernel command line setup"
>
> *diff --git a/arch/arm/lib/bootm-fdt.c b/arch/arm/lib/bootm-fdt.c*
>
> *index 29020bd1c6..2b28ab8108 100644*
>
> *--- a/arch/arm/lib/bootm-fdt.c*
>
> *+++ b/arch/arm/lib/bootm-fdt.c*
>
> @@ -42,12 +42,17 @@int arch_fixup_fdt(void *blob)
>
> u64 size[CONFIG_NR_DRAM_BANKS];
>
> for (bank = 0; bank < CONFIG_NR_DRAM_BANKS; bank++) {
>
> +unsigned char node = bd->bi_dram[bank].numa_node;
>
> start[bank] = bd->bi_dram[bank].start;
>
> size[bank] = bd->bi_dram[bank].size;
>
> #ifdef CONFIG_ARMV7_NONSEC
>
> ret = armv7_apply_memory_carveout(&start[bank], &size[bank]);
>
> if (ret)
>
> return ret;
>
> +#endif
>
> +#ifdef CONFIG_OF_LIBFDT
>
> +/* add node info for the fdt_fixup_memory below */
>
> +start[bank] = (((phys_addr_t)node) << 56) | bd->bi_dram[bank].start;
>
> #endif
>
> }
>
> *diff --git a/common/fdt_support.c b/common/fdt_support.c*
>
> *index a9a32df1e7..3bca2ba888 100644*
>
> *--- a/common/fdt_support.c*
>
> *+++ b/common/fdt_support.c*
>
> @@ -415,16 +415,29 @@static int fdt_pack_reg(const void *fdt, void *buf,
> u64 *address, u64 *size,
>
> return p - (char *)buf;
>
> }
>
> +static inline uint32_t fdt32_ld(const fdt32_t *p)
>
> +{
>
> +const uint8_t *bp = (const uint8_t *)p;
>
> +
>
> +return ((uint32_t)bp[0] << 24)
>
> + | ((uint32_t)bp[1] << 16)
>
> + | ((uint32_t)bp[2] << 8)
>
> + | bp[3];
>
> +}
>
> #if CONFIG_NR_DRAM_BANKS > 4
>
> #define MEMORY_BANKS_MAX CONFIG_NR_DRAM_BANKS
>
> #else
>
> #define MEMORY_BANKS_MAX 4
>
> #endif
>
> +/* NUMA has yet to be properly handled
>
> + * This code appends memory to the first memory node that matches the
> NUMA node.
>
> + */
>
> int fdt_fixup_memory_banks(void *blob, u64 start[], u64 size[], int banks)
>
> {
>
> int err, nodeoffset;
>
> int len, i;
>
> u8 tmp[MEMORY_BANKS_MAX * 16]; /* Up to 64-bit address + 64-bit size */
>
> +unsigned int numa_node;
>
> if (banks > MEMORY_BANKS_MAX) {
>
> printf("%s: num banks %d exceeds hardcoded limit %d."
>
> @@ -444,6 +457,12 @@int fdt_fixup_memory_banks(void *blob, u64 start[],
> u64 size[], int banks)
>
> if (nodeoffset < 0)
>
> return nodeoffset;
>
> +const __be32* numa_node_prop = fdt_getprop(blob, nodeoffset,
> "numa-node-id", &len);
>
> +if (numa_node_prop != NULL && len == sizeof(__be32)) {
>
> +numa_node = fdt32_ld(numa_node_prop);
>
> +}
>
> +else numa_node = 0;
>
> +
>
> err = fdt_setprop(blob, nodeoffset, "device_type", "memory",
>
> sizeof("memory"));
>
> if (err < 0) {
>
> @@ -453,8 +472,27 @@int fdt_fixup_memory_banks(void *blob, u64 start[],
> u64 size[], int banks)
>
> }
>
> for (i = 0; i < banks; i++) {
>
> - if (start[i] == 0 && size[i] == 0)
>
> +/* clear node information */
>
> +unsigned int node;
>
> +recheck:
>
> +if (start[i]== 0 && size[i] == 0)
>
> break;
>
> +node = (start[i] >> 56) & 0xFF;
>
> +start[i] = start[i] & 0x00FFFFFFFFFFFFFF;
>
> +/* for the moment, just ignore the banks that are not in
>
> +* memory NUMA node */
>
> +if (node != numa_node) {
>
> +/* remove the bank from the list */
>
> +int j;
>
> +for (j=i; j < banks-1; j++) {
>
> +start[j] = start[j+1];
>
> +size[j] = size[j+1];
>
> +}
>
> +start[j]=0;
>
> +size[j]=0;
>
> +banks--;
>
> +goto recheck;
>
> +}
>
> }
>
> banks = i;
>
> @@ -470,6 +508,7 @@int fdt_fixup_memory_banks(void *blob, u64 start[],
> u64 size[], int banks)
>
> "reg", fdt_strerror(err));
>
> return err;
>
> }
>
> +
>
> return 0;
>
> }
>
> *diff --git a/configs/qemu_arm64_defconfig b/configs/qemu_arm64_defconfig*
>
> *index f6e586627a..0fdc22d71d 100644*
>
> *--- a/configs/qemu_arm64_defconfig*
>
> *+++ b/configs/qemu_arm64_defconfig*
>
> @@ -1,7 +1,7 @@
>
> CONFIG_ARM=y
>
> CONFIG_POSITION_INDEPENDENT=y
>
> CONFIG_ARCH_QEMU=y
>
> -CONFIG_NR_DRAM_BANKS=1
>
> +CONFIG_NR_DRAM_BANKS=32
>
> CONFIG_ENV_SIZE=0x40000
>
> CONFIG_ENV_SECT_SIZE=0x40000
>
> CONFIG_AHCI=y
>
> *diff --git a/include/asm-generic/u-boot.h b/include/asm-generic/u-boot.h*
>
> *index 637de0c455..3cf45124ec 100644*
>
> *--- a/include/asm-generic/u-boot.h*
>
> *+++ b/include/asm-generic/u-boot.h*
>
> @@ -71,6 +71,8 @@struct bd_info {
>
> struct {/* RAM configuration */
>
> phys_addr_t start;
>
> phys_size_t size;
>
> +unsigned numa_node;
>
> +unsigned pad; /* just to make sure we do not cause alignment */
>
> } bi_dram[CONFIG_NR_DRAM_BANKS];
>
> };
>
> *diff --git a/lib/fdtdec.c b/lib/fdtdec.c*
>
> *index 4b097fb588..b1934edc8d 100644*
>
> *--- a/lib/fdtdec.c*
>
> *+++ b/lib/fdtdec.c*
>
> @@ -1064,77 +1064,101 @@int fdtdec_decode_display_timing(const void
> *blob, int parent, int index,
>
> return ret;
>
> }
>
> +ofnode get_next_memory_node(ofnode mem)
>
> +{
>
> +do {
>
> +mem = ofnode_by_prop_value(mem, "device_type", "memory", 7);
>
> +} while (!ofnode_is_available(mem));
>
> +
>
> +return mem;
>
> +}
>
> +
>
> int fdtdec_setup_mem_size_base(void)
>
> {
>
> int ret;
>
> +int reg;
>
> ofnode mem;
>
> struct resource res;
>
> +phys_addr_t base = ~0;
>
> +phys_size_t size = 0;;
>
> - mem = ofnode_path("/memory");
>
> - if (!ofnode_valid(mem)) {
>
> - debug("%s: Missing /memory node\n", __func__);
>
> - return -EINVAL;
>
> - }
>
> +for (mem = get_next_memory_node(mem); ofnode_valid(mem); mem =
> get_next_memory_node(mem)) {
>
> - ret = ofnode_read_resource(mem, 0, &res);
>
> - if (ret != 0) {
>
> - debug("%s: Unable to decode first memory bank\n", __func__);
>
> - return -EINVAL;
>
> +for(reg = 0, ret = 0; ret == 0 ; reg++) {
>
> +ret = ofnode_read_resource(mem, reg, &res);
>
> +if (ret != 0)
>
> +break;
>
> +if ((phys_addr_t)res.start < base)
>
> +base = (phys_addr_t)res.start;
>
> +size += (phys_size_t)(res.end - res.start + 1);
>
> +}
>
> }
>
> +
>
> +gd->ram_base = (unsigned long)base;
>
> +gd->ram_size = (phys_size_t)size;
>
> - gd->ram_size = (phys_size_t)(res.end - res.start + 1);
>
> - gd->ram_base = (unsigned long)res.start;
>
> +debug("%s: Initial DRAM base %llx\n", __func__,
>
> +(unsigned long long)gd->ram_base);
>
> debug("%s: Initial DRAM size %llx\n", __func__,
>
> (unsigned long long)gd->ram_size);
>
> return 0;
>
> }
>
> -ofnode get_next_memory_node(ofnode mem)
>
> -{
>
> - do {
>
> - mem = ofnode_by_prop_value(mem, "device_type", "memory", 7);
>
> - } while (!ofnode_is_available(mem));
>
> -
>
> - return mem;
>
> -}
>
> -
>
> int fdtdec_setup_memory_banksize(void)
>
> {
>
> int bank, ret, reg = 0;
>
> struct resource res;
>
> ofnode mem = ofnode_null();
>
> +const __be32* numa_node_prop = NULL;
>
> +int len;
>
> +int numa_node = -1;
>
> +int count = 0;
>
> +
>
> +for (mem = get_next_memory_node(mem); ofnode_valid(mem); mem =
> get_next_memory_node(mem)) {
>
> - mem = get_next_memory_node(mem);
>
> - if (!ofnode_valid(mem)) {
>
> - debug("%s: Missing /memory node\n", __func__);
>
> - return -EINVAL;
>
> +count++;
>
> +
>
> +numa_node_prop = ofnode_get_property(mem, "numa-node-id", &len);
>
> +if (numa_node_prop != NULL && len == sizeof(__be32)) {
>
> +numa_node = of_read_number(numa_node_prop, 1);
>
> }
>
> +else numa_node = 0;
>
> - for (bank = 0; bank < CONFIG_NR_DRAM_BANKS; bank++) {
>
> - ret = ofnode_read_resource(mem, reg++, &res);
>
> - if (ret < 0) {
>
> - reg = 0;
>
> - mem = get_next_memory_node(mem);
>
> - if (!ofnode_valid(mem))
>
> - break;
>
> +debug("Found memory for node %d\n", numa_node);
>
> - ret = ofnode_read_resource(mem, reg++, &res);
>
> - if (ret < 0)
>
> - break;
>
> - }
>
> +ret = 0;
>
> +for(reg = 0; ret == 0 && bank < CONFIG_NR_DRAM_BANKS; reg++) {
>
> +ret = ofnode_read_resource(mem, reg, &res);
>
> if (ret != 0)
>
> - return -EINVAL;
>
> +break;
>
> gd->bd->bi_dram[bank].start = (phys_addr_t)res.start;
>
> gd->bd->bi_dram[bank].size =
>
> (phys_size_t)(res.end - res.start + 1);
>
> +gd->bd->bi_dram[bank].numa_node = numa_node;
>
> - debug("%s: DRAM Bank #%d: start = 0x%llx, size = 0x%llx\n",
>
> +debug("%s: DRAM Bank #%d: start = 0x%llx, size = 0x%llx"
>
> +" name_node = %d\n",
>
> __func__, bank,
>
> (unsigned long long)gd->bd->bi_dram[bank].start,
>
> - (unsigned long long)gd->bd->bi_dram[bank].size);
>
> +(unsigned long long)gd->bd->bi_dram[bank].size,
>
> +gd->bd->bi_dram[bank].numa_node);
>
> +
>
> +bank++;
>
> +}
>
> +
>
> +
>
> +}
>
> +
>
> +if (count == 0) {
>
> +debug("%s: Missing /memory node\n", __func__);
>
> +return -EINVAL;
>
> +}
>
> +if (bank >= CONFIG_NR_DRAM_BANKS) {
>
> +printf("Too many DT memory nodes for CONFIG_NR_DRAM_BANKS=%d\n",
>
> +CONFIG_NR_DRAM_BANKS);
>
> }
>
> return 0;
>
>
> On Wed, 7 Jul 2021 at 13:00, François Ozog <francois.ozog at linaro.org
> <mailto:francois.ozog at linaro.org>> wrote:
>
>
>
> On Wed, 7 Jul 2021 at 12:16, AKASHI Takahiro
> <takahiro.akashi at linaro.org <mailto:takahiro.akashi at linaro.org>> wrote:
>
> On Wed, Jul 07, 2021 at 11:37:19AM +0200, Fran??ois Ozog wrote:
> > On Wed, 7 Jul 2021 at 09:40, François Ozog
> <francois.ozog at linaro.org <mailto:francois.ozog at linaro.org>> wrote:
> >
> > > On Wed, 7 Jul 2021 at 05:59, Heinrich Schuchardt
> <xypron.glpk at gmx.de <mailto:xypron.glpk at gmx.de>>
> > > wrote:
> > > >
> > > > Am 7. Juli 2021 05:18:20 MESZ schrieb Heinrich Schuchardt <
> > > xypron.glpk at gmx.de <mailto:xypron.glpk at gmx.de>>:
> > > > >Am 7. Juli 2021 03:44:35 MESZ schrieb AKASHI Takahiro
> > > > ><takahiro.akashi at linaro.org
> <mailto:takahiro.akashi at linaro.org>>:
> > > > >>François,
> > > > >>
> > > > >>On Tue, Jul 06, 2021 at 08:10:08PM +0200, Heinrich
> Schuchardt wrote:
> > > > >>> On 7/6/21 6:13 PM, François Ozog wrote:
> > > > >>> > Hi Heinrich, U-Boot 2021-07rc5 does not take into
> account memory
> > > > >>> > description when using Qemu 5.2 NUMA configuration
> to adapt memory
> > > > >>map
> > > > >>> > (kernel_addr_r...):
> > > > >>> >
> > > > >>> > -smp 4 \
> > > > >>> > -m 8G,slots=2,maxmem=16G \
> > > > >>> > -object memory-backend-ram,size=4G,id=m0 \
> > > > >>> > -object memory-backend-ram,size=4G,id=m1 \
> > > > >>> > -numa node,cpus=0-1,nodeid=0,memdev=m0 \
> > > > >>> > -numa node,cpus=2-3,nodeid=1,memdev=m1
> > > > >>> >
> > > > >>> > kernel_addr_r is still 0x4040000 and thus you can't
> use it to
> > > > >>bootefi.
> > > > >>> >
> > > > >>> > fdt addr 0x13ede6de0; fdt print
> > > > >>> >
> > > > >>> > Displays fdt while I think it should not.
> > > > >>> >
> > > > >>> > If I load the kernel at dram.start, the load works
> but not boot
> > > > >>> >
> > > > >>> > U-Boot 2021.07 (Jul 06 2021 - 13:26:43 +0000)
> > > > >>> >
> > > > >>> >
> > > > >>> > DRAM:4 GiB
> > > > >>> >
> > > > >>> > Flash: 64 MiB
> > > > >>> >
> > > > >>> > Loading Environment from Flash... OK
> > > > >>> >
> > > > >>> > In:pl011 at 9000000
> > > > >>> >
> > > > >>> > Out: pl011 at 9000000
> > > > >>> >
> > > > >>> > Err: pl011 at 9000000
> > > > >>> >
> > > > >>> > Net: eth0: virtio-net#32
> > > > >>> >
> > > > >>> > Hit any key to stop autoboot:0
> > > > >>> >
> > > > >>> > =>
> > > > >>> >
> > > > >>> > => bdinfo
> > > > >>> >
> > > > >>> > boot_params = 0x0000000000000000
> > > > >>> >
> > > > >>> > DRAM bank = 0x0000000000000000
> > > > >>> >
> > > > >>> > -> start= 0x0000000140000000
> > > > >>> >
> > > > >>> > -> size = 0x0000000100000000
> > > > >>> >
> > > > >>> > flashstart= 0x0000000000000000
> > > > >>> >
> > > > >>> > flashsize = 0x0000000004000000
> > > > >>> >
> > > > >>> > flashoffset = 0x00000000000bc990
> > > > >>> >
> > > > >>> > baudrate= 115200 bps
> > > > >>> >
> > > > >>> > relocaddr = 0x000000013ff27000
> > > > >>> >
> > > > >>> > reloc off = 0x000000013ff27000
> > > > >>> >
> > > > >>> > Build = 64-bit
> > > > >>> >
> > > > >>> > current eth = virtio-net#32
> > > > >>> >
> > > > >>> > ethaddr = 52:52:52:52:52:52
> > > > >>> >
> > > > >>> > IP addr = <NULL>
> > > > >>> >
> > > > >>> > fdt_blob= 0x000000013ede6de0
> > > > >>> >
> > > > >>> > new_fdt = 0x000000013ede6de0
> > > > >>> >
> > > > >>> > fdt_size= 0x0000000000100000
> > > > >>> >
> > > > >>> > lmb_dump_all:
> > > > >>> >
> > > > >>> > memory.cnt= 0x1
> > > > >>> >
> > > > >>> > memory.reg[0x0].base = 0x140000000
> > > > >>> >
> > > > >>> > .size = 0x100000000
> > > > >>> >
> > > > >>> >
> > > > >>> > reserved.cnt= 0x0
> > > > >>> >
> > > > >>> > arch_number = 0x0000000000000000
> > > > >>> >
> > > > >>> > TLB addr= 0x000000013fff0000
> > > > >>> >
> > > > >>> > irq_sp= 0x000000013ede6dd0
> > > > >>> >
> > > > >>> > sp start= 0x000000013ede6dd0
> > > > >>> >
> > > > >>> > Early malloc usage: 3a8 / 2000
> > > > >>> >
> > > > >>> > => load virtio 0:1 0x140000000 /oskit.efi
> > > > >>> >
> > > > >>> > 853424 bytes read in 1 ms (813.9 MiB/s)
> > > > >>> >
> > > > >>> > => bootefi0x140000000 0x13ede6dd0
> > > > >>> >
> > > > >>> > ERROR: Failed to register WaitForKey event
> > > > >>> >
> > > > >>> > Setting OsIndications failed
> > > > >>> >
> > > > >>> > Error: Cannot initialize UEFI sub-system, r = 9
> > > > >>> >
> > > > >>> >
> > > > >>> > I think there is a need to calculate memory map
> based on previous
> > > > >>> > firmware (TFA, QEMU can be considered as previous
> frimware)
> > > > >>information
> > > > >>> > (DT or blob_list).
> > > > >>> >
> > > > >>> > What do you think ?
> > > > >>> >
> > > > >>> > Cheers
> > > > >>> >
> > > > >>> > FF
> > > > >>> >
> > > > >>> > --
> > > > >>> >
> > > > >>> > François-Frédéric Ozog | /Director Business
> Development/
> > > > >>> > T: +33.67221.6485
> > > > >>> > francois.ozog at linaro.org
> <mailto:francois.ozog at linaro.org>
> <mailto:francois.ozog at linaro.org <mailto:francois.ozog at linaro.org>>
> > > > >>| Skype: ffozog
> > > > >>> >
> > > > >>> >
> > > > >>>
> > > > >>> The kernel load address is hard coded here:
> > > > >>> include/configs/qemu-arm.h:41:
> "kernel_addr_r=0x40400000\0" \
> > > > >>>
> > > > >>> bdinfo shows:
> > > > >>> DRAM start = 0x140000000
> > > > >>> DRAM size = 0x100000000
> > > > >>>
> > > > >>> fdt addr $fdt_addr
> > > > >>> fdt printf
> > > > >>>
> > > > >>> shows two memory areas. One at 40000000, one at
> 140000000.
> > > > >>
> > > > >>(This shows that U-Boot receives a correct memory map
> via dtb.)
> > > > >>
> > > > >>Is this a NUMA machine, isn't it? Why should we care of
> which
> > > > >>memory region be used here? Please note that this is a
> virtual
> > > > >machine,
> > > > >>there is no practical difference between two regions.
> > > > >>
> > > > >>The root problem is that U-Boot did not recognize there
> were two
> > > > >>memory regions. We can fix this issue in either way:
> > > > >>
> > > > >>1)
> > > > >>diff --git a/configs/qemu_arm64_defconfig
> > > > >>b/configs/qemu_arm64_defconfig
> > > > >>index f6e586627a8e..b70ffae8bf6e 100644
> > > > >>--- a/configs/qemu_arm64_defconfig
> > > > >>+++ b/configs/qemu_arm64_defconfig
> > > > >>@@ -1,7 +1,7 @@
> > > > >> CONFIG_ARM=y
> > > > >> CONFIG_POSITION_INDEPENDENT=y
> > > > >> CONFIG_ARCH_QEMU=y
> > > > >>-CONFIG_NR_DRAM_BANKS=1
> > > > >>+CONFIG_NR_DRAM_BANKS=2
> > > > >> CONFIG_ENV_SIZE=0x40000
> > > > >> CONFIG_ENV_SECT_SIZE=0x40000
> > > > >> CONFIG_AHCI=y
> > > > >>
> > > > >>2)
> > > > >>diff --git a/lib/fdtdec.c b/lib/fdtdec.c
> > > > >>index 4b097fb588ed..4067ea2dead6 100644
> > > > >>--- a/lib/fdtdec.c
> > > > >>+++ b/lib/fdtdec.c
> > > > >>@@ -1111,7 +1111,7 @@ int
> fdtdec_setup_memory_banksize(void)
> > > > >> return -EINVAL;
> > > > >> }
> > > > >>
> > > > >>- for (bank = 0; bank < CONFIG_NR_DRAM_BANKS;
> bank++) {
> > > > >>+ for (bank = 0; ; bank++) {
> > > > >> ret = ofnode_read_resource(mem, reg++,
> &res);
> > > > >> if (ret < 0) {
> > > > >> reg = 0;
> > > > >>
> > > > >> (fdtdec_setup_memory_banksize() is called in
> dram_init_banksize().)
> > > > >>
> > > > >>
> > > > >>(2) seems much better, but I don't know why we had to use
> > > > >>CONFIG_NR_DRAM_BANKS here.
> > > > >>
> > >
> > > 2) alone does not work as other places in the code refer to
> > > CONFIG_NR_DRAM_BANKS. Setting ...BANKS to 32 makes my code
> work and
> > > bdinfo seems now correct:
> > >
> > => bdinfo
> > > boot_params = 0x0000000000000000
> > > DRAM bank = 0x0000000000000000
> > > -> start = 0x0000000140000000
> > > -> size = 0x0000000100000000
> > > DRAM bank = 0x0000000000000001
> > > -> start = 0x0000000040000000
> > > -> size = 0x0000000100000000
> > > flashstart = 0x0000000000000000
> > > flashsize = 0x0000000004000000
> > > flashoffset = 0x00000000000bcb88
> > > baudrate = 115200 bps
> > > relocaddr = 0x000000013ff27000
> > > reloc off = 0x000000013ff27000
> > > Build = 64-bit
> > > current eth = virtio-net#32
> > > ethaddr = 52:52:52:52:52:52
> > > IP addr = <NULL>
> > > fdt_blob = 0x000000013ede6cf0
> > > new_fdt = 0x000000013ede6cf0
> > > fdt_size = 0x0000000000100000
> > > lmb_dump_all:
> > > memory.cnt = 0x1
> > > memory.reg[0x0].base = 0x40000000
> > > .size = 0x200000000
> > > reserved.cnt = 0x1
> > > reserved.reg[0x0].base = 0x13ede58f0
> > > .size = 0x121a710
> > > arch_number = 0x0000000000000000
> > > TLB addr = 0x000000013fff0000
> > > irq_sp = 0x000000013ede6ce0
> > > sp start = 0x000000013ede6ce0
> > > Early malloc usage: 3a8 / 2000
> > >
> > > May I suggest you propose a combined patch Akashi-san? If
> we assume
> > > NUMA systems to be tested up to 8 nodes to mimic real existing
> > > enterprise hardware and up to 4 memory slots (say for
> memory hot
> > > plugging tests) what about a default value of 32?
> Alternatively, we
> > > could set this value to a much higher one if the costs are
> negligible.
> > >
> > >
> > > Well, lets not rush as there are other twists:
> >
> > the 4G bank in node 1 is marked BootServicesData in the UEFI
> GetMemoryMap
> > which I assume is not the case. EDK2 reports it as
> ConventionalMemory.
> >
> > The root cause seem to be gd->ramtop not being setup properly.
> >
> > Further analysis shows that the DT passed to the booted EFI
> payload does
> > not seem to be correct:
> >
> > DT fragment passed to U-Boot
> >
> > memory at 140000000 {
> > numa-node-id = <0x00000001>;
> > reg = <0x00000001 0x40000000 0x00000001 0x00000000>;
> > device_type = "memory";
> > };
> > memory at 40000000 {
> > numa-node-id = <0x00000000>;
> > reg = <0x00000000 0x40000000 0x00000001 0x00000000>;
> > device_type = "memory";
> > };
> >
> > DT passed to payload (as per my debug code):
> >
> > memory at 140000000: memory
> >
> > numa-node-id 1
> >
> > reg (len= 32)
> >
> > 140000000 100000000
> >
> > 40000000 100000000
> >
> > memory at 40000000: memory
> >
> > numa-node-id 0
> >
> > reg (len= 16)
> >
> > 40000000 100000000
> >
> > I am investigating this further...
>
> You should check the logic of fdt_fixup_memory_banks()
> which is called this way:
> efi_dt_fixup()
> image_setup_libfdt()
> arch_fixup_fdt()
> fdt_fixup_memory_banks()
>
> What it does is to put *all* the memory regions unconditionally as
> a single "reg" array into the *first-detected* "memory" node,
> which is
> "memory at 140000000" in this case.
> It means that this function doesn't respect NUMA configuration.
>
> Thanks.
>
> tweaking ram_top to be the correct in
> https://elixir.bootlin.com/u-boot/latest/source/lib/efi_loader/efi_memory.c#L732
> <https://elixir.bootlin.com/u-boot/latest/source/lib/efi_loader/efi_memory.c#L732>
> results in memory on node 1 to be considered as ConventionalMemory
> but U-Boot places all code and data at the end of node 1 while it
> should position it on the current node. That said it is an
> acceptable work around for my test case.
>
> Bottom line, we need to introduce NUMA node management in memory
> management all over the place.
> It is unclear if there is a business case for that. I'll ask LEDGE
> members...
>
> -Takahiro Akashi
>
>
> > > >>In this case, other occurrences of CONFIG_NR_DRAM_BANKS
> in this file
> > > > >>should be replaced with a variable for it.
> > > > >>
> > > > >>> Your use case is well beyond the typical U-Boot
> usage. So I guess it
> > > > >>> will be up to Linaro to provide the necessary patches:
> > > > >>>
> > > > >>> * determine the active CPU
> > > > >>> * determine the RAM assigned to the active CPU according
> > > > >>> to the numa-node-id in the device-tree
> > > > >>> * make sure that U-Boot only uses the memory of the
> active CPU
> > > > >>> internally
> > > > >>> * make sure that the UEFI memory map contains a compliant
> > > > >description
> > > > >>> * possibly, dynamically set up the environment variables
> > > > >>>
> > > > >>> +CC Tuomas Tynkkynen (maintainer for
> qemu_arm64_defconfig)
> > > > >>
> > > > >>For (1), we'd better have a different config, or increase
> > > > >>the value of CONFIG_NR_DRAM_BANKS to a bigger number?
> > > > >
> > > > >Is the system configured such that each CPU can access
> the others CPU's
> > > > >RAM when entering U-Boot?
> > > > >
> > > > >Best regards
> > > > >
> > > > >Heinrich
> > > > >
> > > >
> > > > At least the comments for this patch sound as if on a
> physical system
> > > cross NUMA node memory access is only available after full SMP
> > > initialization:
> > > >
> > > >
> > >
> https://patchwork.kernel.org/project/linux-acpi/patch/20180625130552.5636-1-lorenzo.pieralisi@arm.com/
> <https://patchwork.kernel.org/project/linux-acpi/patch/20180625130552.5636-1-lorenzo.pieralisi@arm.com/>
> > > >
> > > > QEMU may be less restrictive.
> > > >
> > > > QEMU allows the node distance to be 255 indicating that
> cross node
> > > access is infeasible.
> > > >
> > > > Best regards
> > > >
> > > > Heinrich
> > > >
> > > > >>
> > > > >>-Takahiro Akashi
> > > > >>
> > > > >>
> > > > >>> Best regards
> > > > >>>
> > > > >>> Heinrich
> > > >
> > >
> > >
> > > --
> > > François-Frédéric Ozog | Director Business Development
> > > T: +33.67221.6485
> > > francois.ozog at linaro.org <mailto:francois.ozog at linaro.org>
> | Skype: ffozog
> > >
> >
> >
> > --
> > François-Frédéric Ozog | *Director Business Development*
> > T: +33.67221.6485
> > francois.ozog at linaro.org <mailto:francois.ozog at linaro.org> |
> Skype: ffozog
>
>
>
> --
>
> François-Frédéric Ozog | /Director Business Development/
> T: +33.67221.6485
> francois.ozog at linaro.org <mailto:francois.ozog at linaro.org>
> | Skype: ffozog
>
>
>
>
> --
>
> François-Frédéric Ozog | /Director Business Development/
> T: +33.67221.6485
> francois.ozog at linaro.org <mailto:francois.ozog at linaro.org> | Skype: ffozog
>
>
More information about the U-Boot
mailing list