QEMU NUMA and U-Boot

Heinrich Schuchardt xypron.glpk at gmx.de
Wed Jul 7 19:39:22 CEST 2021



On 7/7/21 5:15 PM, François Ozog wrote:
> top posting what now works for me:
> - changed calculation of memory size to loop through different memory nodes
> - added numa_node to banks
> - filter out banks that do not match the target mixup node (do not want
> to change ABI to add node information)
>
> That's not satisfying overall but at least my code works with NUMA on Qemu.

Do we expect real hardware with NUMA to be using U-Boot?

If you have real ARM hardware in NUMA configuration, can the boot CPU
access memory of dormant CPUs when U-Boot is entered?

Best regards

Heinrich

>
>
> *diff --git a/Kconfig b/Kconfig*
>
> *index f8c1a77bed..4d3ab8cb49 100644*
>
> *--- a/Kconfig*
>
> *+++ b/Kconfig*
>
> @@ -192,7 +192,9 @@config NR_DRAM_BANKS
>
> default 1 if ARCH_SUNXI || ARCH_OWL
>
> default 4
>
> help
>
> - This defines the number of DRAM banks.
>
> +This defines the number of DRAM banks. For qemu with NUMA, you
>
> +may want to set this value to <slots> * <possible memdev>.
>
> +for instance, for a 2 slot with 4 memdevs set NR_DRAM_BANKS to 8.
>
> config SYS_BOOT_GET_CMDLINE
>
> bool "Enable kernel command line setup"
>
> *diff --git a/arch/arm/lib/bootm-fdt.c b/arch/arm/lib/bootm-fdt.c*
>
> *index 29020bd1c6..2b28ab8108 100644*
>
> *--- a/arch/arm/lib/bootm-fdt.c*
>
> *+++ b/arch/arm/lib/bootm-fdt.c*
>
> @@ -42,12 +42,17 @@int arch_fixup_fdt(void *blob)
>
> u64 size[CONFIG_NR_DRAM_BANKS];
>
> for (bank = 0; bank < CONFIG_NR_DRAM_BANKS; bank++) {
>
> +unsigned char node = bd->bi_dram[bank].numa_node;
>
> start[bank] = bd->bi_dram[bank].start;
>
> size[bank] = bd->bi_dram[bank].size;
>
> #ifdef CONFIG_ARMV7_NONSEC
>
> ret = armv7_apply_memory_carveout(&start[bank], &size[bank]);
>
> if (ret)
>
> return ret;
>
> +#endif
>
> +#ifdef CONFIG_OF_LIBFDT
>
> +/* add node info for the fdt_fixup_memory below */
>
> +start[bank] = (((phys_addr_t)node) << 56) | bd->bi_dram[bank].start;
>
> #endif
>
> }
>
> *diff --git a/common/fdt_support.c b/common/fdt_support.c*
>
> *index a9a32df1e7..3bca2ba888 100644*
>
> *--- a/common/fdt_support.c*
>
> *+++ b/common/fdt_support.c*
>
> @@ -415,16 +415,29 @@static int fdt_pack_reg(const void *fdt, void *buf,
> u64 *address, u64 *size,
>
> return p - (char *)buf;
>
> }
>
> +static inline uint32_t fdt32_ld(const fdt32_t *p)
>
> +{
>
> +const uint8_t *bp = (const uint8_t *)p;
>
> +
>
> +return ((uint32_t)bp[0] << 24)
>
> + | ((uint32_t)bp[1] << 16)
>
> + | ((uint32_t)bp[2] << 8)
>
> + | bp[3];
>
> +}
>
> #if CONFIG_NR_DRAM_BANKS > 4
>
> #define MEMORY_BANKS_MAX CONFIG_NR_DRAM_BANKS
>
> #else
>
> #define MEMORY_BANKS_MAX 4
>
> #endif
>
> +/* NUMA has yet to be properly handled
>
> + * This code appends memory to the first memory node that matches the
> NUMA node.
>
> + */
>
> int fdt_fixup_memory_banks(void *blob, u64 start[], u64 size[], int banks)
>
> {
>
> int err, nodeoffset;
>
> int len, i;
>
> u8 tmp[MEMORY_BANKS_MAX * 16]; /* Up to 64-bit address + 64-bit size */
>
> +unsigned int numa_node;
>
> if (banks > MEMORY_BANKS_MAX) {
>
> printf("%s: num banks %d exceeds hardcoded limit %d."
>
> @@ -444,6 +457,12 @@int fdt_fixup_memory_banks(void *blob, u64 start[],
> u64 size[], int banks)
>
> if (nodeoffset < 0)
>
> return nodeoffset;
>
> +const __be32* numa_node_prop = fdt_getprop(blob, nodeoffset,
> "numa-node-id", &len);
>
> +if (numa_node_prop != NULL && len == sizeof(__be32)) {
>
> +numa_node = fdt32_ld(numa_node_prop);
>
> +}
>
> +else numa_node = 0;
>
> +
>
> err = fdt_setprop(blob, nodeoffset, "device_type", "memory",
>
> sizeof("memory"));
>
> if (err < 0) {
>
> @@ -453,8 +472,27 @@int fdt_fixup_memory_banks(void *blob, u64 start[],
> u64 size[], int banks)
>
> }
>
> for (i = 0; i < banks; i++) {
>
> - if (start[i] == 0 && size[i] == 0)
>
> +/* clear node information */
>
> +unsigned int node;
>
> +recheck:
>
> +if (start[i]== 0 && size[i] == 0)
>
> break;
>
> +node = (start[i] >> 56) & 0xFF;
>
> +start[i] = start[i] & 0x00FFFFFFFFFFFFFF;
>
> +/* for the moment, just ignore the banks that are not in
>
> +* memory NUMA node */
>
> +if (node != numa_node) {
>
> +/* remove the bank from the list */
>
> +int j;
>
> +for (j=i; j < banks-1; j++) {
>
> +start[j] = start[j+1];
>
> +size[j] = size[j+1];
>
> +}
>
> +start[j]=0;
>
> +size[j]=0;
>
> +banks--;
>
> +goto recheck;
>
> +}
>
> }
>
> banks = i;
>
> @@ -470,6 +508,7 @@int fdt_fixup_memory_banks(void *blob, u64 start[],
> u64 size[], int banks)
>
> "reg", fdt_strerror(err));
>
> return err;
>
> }
>
> +
>
> return 0;
>
> }
>
> *diff --git a/configs/qemu_arm64_defconfig b/configs/qemu_arm64_defconfig*
>
> *index f6e586627a..0fdc22d71d 100644*
>
> *--- a/configs/qemu_arm64_defconfig*
>
> *+++ b/configs/qemu_arm64_defconfig*
>
> @@ -1,7 +1,7 @@
>
> CONFIG_ARM=y
>
> CONFIG_POSITION_INDEPENDENT=y
>
> CONFIG_ARCH_QEMU=y
>
> -CONFIG_NR_DRAM_BANKS=1
>
> +CONFIG_NR_DRAM_BANKS=32
>
> CONFIG_ENV_SIZE=0x40000
>
> CONFIG_ENV_SECT_SIZE=0x40000
>
> CONFIG_AHCI=y
>
> *diff --git a/include/asm-generic/u-boot.h b/include/asm-generic/u-boot.h*
>
> *index 637de0c455..3cf45124ec 100644*
>
> *--- a/include/asm-generic/u-boot.h*
>
> *+++ b/include/asm-generic/u-boot.h*
>
> @@ -71,6 +71,8 @@struct bd_info {
>
> struct {/* RAM configuration */
>
> phys_addr_t start;
>
> phys_size_t size;
>
> +unsigned numa_node;
>
> +unsigned pad; /* just to make sure we do not cause alignment */
>
> } bi_dram[CONFIG_NR_DRAM_BANKS];
>
> };
>
> *diff --git a/lib/fdtdec.c b/lib/fdtdec.c*
>
> *index 4b097fb588..b1934edc8d 100644*
>
> *--- a/lib/fdtdec.c*
>
> *+++ b/lib/fdtdec.c*
>
> @@ -1064,77 +1064,101 @@int fdtdec_decode_display_timing(const void
> *blob, int parent, int index,
>
> return ret;
>
> }
>
> +ofnode get_next_memory_node(ofnode mem)
>
> +{
>
> +do {
>
> +mem = ofnode_by_prop_value(mem, "device_type", "memory", 7);
>
> +} while (!ofnode_is_available(mem));
>
> +
>
> +return mem;
>
> +}
>
> +
>
> int fdtdec_setup_mem_size_base(void)
>
> {
>
> int ret;
>
> +int reg;
>
> ofnode mem;
>
> struct resource res;
>
> +phys_addr_t base = ~0;
>
> +phys_size_t size = 0;;
>
> - mem = ofnode_path("/memory");
>
> - if (!ofnode_valid(mem)) {
>
> - debug("%s: Missing /memory node\n", __func__);
>
> - return -EINVAL;
>
> - }
>
> +for (mem = get_next_memory_node(mem); ofnode_valid(mem); mem =
> get_next_memory_node(mem)) {
>
> - ret = ofnode_read_resource(mem, 0, &res);
>
> - if (ret != 0) {
>
> - debug("%s: Unable to decode first memory bank\n", __func__);
>
> - return -EINVAL;
>
> +for(reg = 0, ret = 0; ret == 0 ; reg++) {
>
> +ret = ofnode_read_resource(mem, reg, &res);
>
> +if (ret != 0)
>
> +break;
>
> +if ((phys_addr_t)res.start < base)
>
> +base = (phys_addr_t)res.start;
>
> +size += (phys_size_t)(res.end - res.start + 1);
>
> +}
>
> }
>
> +
>
> +gd->ram_base = (unsigned long)base;
>
> +gd->ram_size = (phys_size_t)size;
>
> - gd->ram_size = (phys_size_t)(res.end - res.start + 1);
>
> - gd->ram_base = (unsigned long)res.start;
>
> +debug("%s: Initial DRAM base %llx\n", __func__,
>
> +(unsigned long long)gd->ram_base);
>
> debug("%s: Initial DRAM size %llx\n", __func__,
>
> (unsigned long long)gd->ram_size);
>
> return 0;
>
> }
>
> -ofnode get_next_memory_node(ofnode mem)
>
> -{
>
> - do {
>
> - mem = ofnode_by_prop_value(mem, "device_type", "memory", 7);
>
> - } while (!ofnode_is_available(mem));
>
> -
>
> - return mem;
>
> -}
>
> -
>
> int fdtdec_setup_memory_banksize(void)
>
> {
>
> int bank, ret, reg = 0;
>
> struct resource res;
>
> ofnode mem = ofnode_null();
>
> +const __be32* numa_node_prop = NULL;
>
> +int len;
>
> +int numa_node = -1;
>
> +int count = 0;
>
> +
>
> +for (mem = get_next_memory_node(mem); ofnode_valid(mem); mem =
> get_next_memory_node(mem)) {
>
> - mem = get_next_memory_node(mem);
>
> - if (!ofnode_valid(mem)) {
>
> - debug("%s: Missing /memory node\n", __func__);
>
> - return -EINVAL;
>
> +count++;
>
> +
>
> +numa_node_prop = ofnode_get_property(mem, "numa-node-id", &len);
>
> +if (numa_node_prop != NULL && len == sizeof(__be32)) {
>
> +numa_node = of_read_number(numa_node_prop, 1);
>
> }
>
> +else numa_node = 0;
>
> - for (bank = 0; bank < CONFIG_NR_DRAM_BANKS; bank++) {
>
> - ret = ofnode_read_resource(mem, reg++, &res);
>
> - if (ret < 0) {
>
> - reg = 0;
>
> - mem = get_next_memory_node(mem);
>
> - if (!ofnode_valid(mem))
>
> - break;
>
> +debug("Found memory for node %d\n", numa_node);
>
> - ret = ofnode_read_resource(mem, reg++, &res);
>
> - if (ret < 0)
>
> - break;
>
> - }
>
> +ret = 0;
>
> +for(reg = 0; ret == 0 && bank < CONFIG_NR_DRAM_BANKS; reg++) {
>
> +ret = ofnode_read_resource(mem, reg, &res);
>
> if (ret != 0)
>
> - return -EINVAL;
>
> +break;
>
> gd->bd->bi_dram[bank].start = (phys_addr_t)res.start;
>
> gd->bd->bi_dram[bank].size =
>
> (phys_size_t)(res.end - res.start + 1);
>
> +gd->bd->bi_dram[bank].numa_node = numa_node;
>
> - debug("%s: DRAM Bank #%d: start = 0x%llx, size = 0x%llx\n",
>
> +debug("%s: DRAM Bank #%d: start = 0x%llx, size = 0x%llx"
>
> +" name_node = %d\n",
>
> __func__, bank,
>
> (unsigned long long)gd->bd->bi_dram[bank].start,
>
> - (unsigned long long)gd->bd->bi_dram[bank].size);
>
> +(unsigned long long)gd->bd->bi_dram[bank].size,
>
> +gd->bd->bi_dram[bank].numa_node);
>
> +
>
> +bank++;
>
> +}
>
> +
>
> +
>
> +}
>
> +
>
> +if (count == 0) {
>
> +debug("%s: Missing /memory node\n", __func__);
>
> +return -EINVAL;
>
> +}
>
> +if (bank >= CONFIG_NR_DRAM_BANKS) {
>
> +printf("Too many DT memory nodes for CONFIG_NR_DRAM_BANKS=%d\n",
>
> +CONFIG_NR_DRAM_BANKS);
>
> }
>
> return 0;
>
>
> On Wed, 7 Jul 2021 at 13:00, François Ozog <francois.ozog at linaro.org
> <mailto:francois.ozog at linaro.org>> wrote:
>
>
>
>     On Wed, 7 Jul 2021 at 12:16, AKASHI Takahiro
>     <takahiro.akashi at linaro.org <mailto:takahiro.akashi at linaro.org>> wrote:
>
>         On Wed, Jul 07, 2021 at 11:37:19AM +0200, Fran??ois Ozog wrote:
>          > On Wed, 7 Jul 2021 at 09:40, François Ozog
>         <francois.ozog at linaro.org <mailto:francois.ozog at linaro.org>> wrote:
>          >
>          > > On Wed, 7 Jul 2021 at 05:59, Heinrich Schuchardt
>         <xypron.glpk at gmx.de <mailto:xypron.glpk at gmx.de>>
>          > > wrote:
>          > > >
>          > > > Am 7. Juli 2021 05:18:20 MESZ schrieb Heinrich Schuchardt <
>          > > xypron.glpk at gmx.de <mailto:xypron.glpk at gmx.de>>:
>          > > > >Am 7. Juli 2021 03:44:35 MESZ schrieb AKASHI Takahiro
>          > > > ><takahiro.akashi at linaro.org
>         <mailto:takahiro.akashi at linaro.org>>:
>          > > > >>François,
>          > > > >>
>          > > > >>On Tue, Jul 06, 2021 at 08:10:08PM +0200, Heinrich
>         Schuchardt wrote:
>          > > > >>> On 7/6/21 6:13 PM, François Ozog wrote:
>          > > > >>> > Hi Heinrich, U-Boot 2021-07rc5 does not take into
>         account memory
>          > > > >>> > description when using Qemu 5.2 NUMA configuration
>         to adapt memory
>          > > > >>map
>          > > > >>> > (kernel_addr_r...):
>          > > > >>> >
>          > > > >>> >         -smp 4 \
>          > > > >>> >           -m 8G,slots=2,maxmem=16G \
>          > > > >>> >          -object memory-backend-ram,size=4G,id=m0 \
>          > > > >>> >          -object memory-backend-ram,size=4G,id=m1 \
>          > > > >>> >          -numa node,cpus=0-1,nodeid=0,memdev=m0 \
>          > > > >>> >          -numa node,cpus=2-3,nodeid=1,memdev=m1
>          > > > >>> >
>          > > > >>> > kernel_addr_r is still 0x4040000 and thus you can't
>         use it to
>          > > > >>bootefi.
>          > > > >>> >
>          > > > >>> > fdt addr 0x13ede6de0; fdt print
>          > > > >>> >
>          > > > >>> > Displays fdt while I think it should not.
>          > > > >>> >
>          > > > >>> > If I load the kernel at dram.start, the load works
>         but not boot
>          > > > >>> >
>          > > > >>> > U-Boot 2021.07 (Jul 06 2021 - 13:26:43 +0000)
>          > > > >>> >
>          > > > >>> >
>          > > > >>> > DRAM:4 GiB
>          > > > >>> >
>          > > > >>> > Flash: 64 MiB
>          > > > >>> >
>          > > > >>> > Loading Environment from Flash... OK
>          > > > >>> >
>          > > > >>> > In:pl011 at 9000000
>          > > > >>> >
>          > > > >>> > Out: pl011 at 9000000
>          > > > >>> >
>          > > > >>> > Err: pl011 at 9000000
>          > > > >>> >
>          > > > >>> > Net: eth0: virtio-net#32
>          > > > >>> >
>          > > > >>> > Hit any key to stop autoboot:0
>          > > > >>> >
>          > > > >>> > =>
>          > > > >>> >
>          > > > >>> > => bdinfo
>          > > > >>> >
>          > > > >>> > boot_params = 0x0000000000000000
>          > > > >>> >
>          > > > >>> > DRAM bank = 0x0000000000000000
>          > > > >>> >
>          > > > >>> > -> start= 0x0000000140000000
>          > > > >>> >
>          > > > >>> > -> size = 0x0000000100000000
>          > > > >>> >
>          > > > >>> > flashstart= 0x0000000000000000
>          > > > >>> >
>          > > > >>> > flashsize = 0x0000000004000000
>          > > > >>> >
>          > > > >>> > flashoffset = 0x00000000000bc990
>          > > > >>> >
>          > > > >>> > baudrate= 115200 bps
>          > > > >>> >
>          > > > >>> > relocaddr = 0x000000013ff27000
>          > > > >>> >
>          > > > >>> > reloc off = 0x000000013ff27000
>          > > > >>> >
>          > > > >>> > Build = 64-bit
>          > > > >>> >
>          > > > >>> > current eth = virtio-net#32
>          > > > >>> >
>          > > > >>> > ethaddr = 52:52:52:52:52:52
>          > > > >>> >
>          > > > >>> > IP addr = <NULL>
>          > > > >>> >
>          > > > >>> > fdt_blob= 0x000000013ede6de0
>          > > > >>> >
>          > > > >>> > new_fdt = 0x000000013ede6de0
>          > > > >>> >
>          > > > >>> > fdt_size= 0x0000000000100000
>          > > > >>> >
>          > > > >>> > lmb_dump_all:
>          > > > >>> >
>          > > > >>> > memory.cnt= 0x1
>          > > > >>> >
>          > > > >>> > memory.reg[0x0].base = 0x140000000
>          > > > >>> >
>          > > > >>> > .size = 0x100000000
>          > > > >>> >
>          > > > >>> >
>          > > > >>> > reserved.cnt= 0x0
>          > > > >>> >
>          > > > >>> > arch_number = 0x0000000000000000
>          > > > >>> >
>          > > > >>> > TLB addr= 0x000000013fff0000
>          > > > >>> >
>          > > > >>> > irq_sp= 0x000000013ede6dd0
>          > > > >>> >
>          > > > >>> > sp start= 0x000000013ede6dd0
>          > > > >>> >
>          > > > >>> > Early malloc usage: 3a8 / 2000
>          > > > >>> >
>          > > > >>> > => load virtio 0:1 0x140000000 /oskit.efi
>          > > > >>> >
>          > > > >>> > 853424 bytes read in 1 ms (813.9 MiB/s)
>          > > > >>> >
>          > > > >>> > => bootefi0x140000000 0x13ede6dd0
>          > > > >>> >
>          > > > >>> > ERROR: Failed to register WaitForKey event
>          > > > >>> >
>          > > > >>> > Setting OsIndications failed
>          > > > >>> >
>          > > > >>> > Error: Cannot initialize UEFI sub-system, r = 9
>          > > > >>> >
>          > > > >>> >
>          > > > >>> > I think there is a need to calculate memory map
>         based on previous
>          > > > >>> > firmware (TFA, QEMU can be considered as previous
>         frimware)
>          > > > >>information
>          > > > >>> > (DT or blob_list).
>          > > > >>> >
>          > > > >>> > What do you think ?
>          > > > >>> >
>          > > > >>> > Cheers
>          > > > >>> >
>          > > > >>> > FF
>          > > > >>> >
>          > > > >>> > --
>          > > > >>> >
>          > > > >>> > François-Frédéric Ozog | /Director Business
>         Development/
>          > > > >>> > T: +33.67221.6485
>          > > > >>> > francois.ozog at linaro.org
>         <mailto:francois.ozog at linaro.org>
>         <mailto:francois.ozog at linaro.org <mailto:francois.ozog at linaro.org>>
>          > > > >>| Skype: ffozog
>          > > > >>> >
>          > > > >>> >
>          > > > >>>
>          > > > >>> The kernel load address is hard coded here:
>          > > > >>> include/configs/qemu-arm.h:41:
>         "kernel_addr_r=0x40400000\0" \
>          > > > >>>
>          > > > >>> bdinfo shows:
>          > > > >>> DRAM start = 0x140000000
>          > > > >>> DRAM size  = 0x100000000
>          > > > >>>
>          > > > >>> fdt addr $fdt_addr
>          > > > >>> fdt printf
>          > > > >>>
>          > > > >>> shows two memory areas. One at 40000000, one at
>         140000000.
>          > > > >>
>          > > > >>(This shows that U-Boot receives a correct memory map
>         via dtb.)
>          > > > >>
>          > > > >>Is this a NUMA machine, isn't it? Why should we care of
>         which
>          > > > >>memory region be used here? Please note that this is a
>         virtual
>          > > > >machine,
>          > > > >>there is no practical difference between two regions.
>          > > > >>
>          > > > >>The root problem is that U-Boot did not recognize there
>         were two
>          > > > >>memory regions. We can fix this issue in either way:
>          > > > >>
>          > > > >>1)
>          > > > >>diff --git a/configs/qemu_arm64_defconfig
>          > > > >>b/configs/qemu_arm64_defconfig
>          > > > >>index f6e586627a8e..b70ffae8bf6e 100644
>          > > > >>--- a/configs/qemu_arm64_defconfig
>          > > > >>+++ b/configs/qemu_arm64_defconfig
>          > > > >>@@ -1,7 +1,7 @@
>          > > > >> CONFIG_ARM=y
>          > > > >> CONFIG_POSITION_INDEPENDENT=y
>          > > > >> CONFIG_ARCH_QEMU=y
>          > > > >>-CONFIG_NR_DRAM_BANKS=1
>          > > > >>+CONFIG_NR_DRAM_BANKS=2
>          > > > >> CONFIG_ENV_SIZE=0x40000
>          > > > >> CONFIG_ENV_SECT_SIZE=0x40000
>          > > > >> CONFIG_AHCI=y
>          > > > >>
>          > > > >>2)
>          > > > >>diff --git a/lib/fdtdec.c b/lib/fdtdec.c
>          > > > >>index 4b097fb588ed..4067ea2dead6 100644
>          > > > >>--- a/lib/fdtdec.c
>          > > > >>+++ b/lib/fdtdec.c
>          > > > >>@@ -1111,7 +1111,7 @@ int
>         fdtdec_setup_memory_banksize(void)
>          > > > >>                return -EINVAL;
>          > > > >>        }
>          > > > >>
>          > > > >>-       for (bank = 0; bank < CONFIG_NR_DRAM_BANKS;
>         bank++) {
>          > > > >>+       for (bank = 0; ; bank++) {
>          > > > >>                ret = ofnode_read_resource(mem, reg++,
>         &res);
>          > > > >>                if (ret < 0) {
>          > > > >>                        reg = 0;
>          > > > >>
>          > > > >>   (fdtdec_setup_memory_banksize() is called in
>         dram_init_banksize().)
>          > > > >>
>          > > > >>
>          > > > >>(2) seems much better, but I don't know why we had to use
>          > > > >>CONFIG_NR_DRAM_BANKS here.
>          > > > >>
>          > >
>          > > 2) alone does not work as other places in the code refer to
>          > > CONFIG_NR_DRAM_BANKS. Setting ...BANKS to 32 makes my code
>         work and
>          > > bdinfo seems now correct:
>          > >
>          > => bdinfo
>          > > boot_params = 0x0000000000000000
>          > > DRAM bank   = 0x0000000000000000
>          > > -> start    = 0x0000000140000000
>          > > -> size     = 0x0000000100000000
>          > > DRAM bank   = 0x0000000000000001
>          > > -> start    = 0x0000000040000000
>          > > -> size     = 0x0000000100000000
>          > > flashstart  = 0x0000000000000000
>          > > flashsize   = 0x0000000004000000
>          > > flashoffset = 0x00000000000bcb88
>          > > baudrate    = 115200 bps
>          > > relocaddr   = 0x000000013ff27000
>          > > reloc off   = 0x000000013ff27000
>          > > Build       = 64-bit
>          > > current eth = virtio-net#32
>          > > ethaddr     = 52:52:52:52:52:52
>          > > IP addr     = <NULL>
>          > > fdt_blob    = 0x000000013ede6cf0
>          > > new_fdt     = 0x000000013ede6cf0
>          > > fdt_size    = 0x0000000000100000
>          > > lmb_dump_all:
>          > >     memory.cnt   = 0x1
>          > >     memory.reg[0x0].base   = 0x40000000
>          > >   .size   = 0x200000000
>          > >     reserved.cnt   = 0x1
>          > >     reserved.reg[0x0].base = 0x13ede58f0
>          > >     .size = 0x121a710
>          > > arch_number = 0x0000000000000000
>          > > TLB addr    = 0x000000013fff0000
>          > > irq_sp      = 0x000000013ede6ce0
>          > > sp start    = 0x000000013ede6ce0
>          > > Early malloc usage: 3a8 / 2000
>          > >
>          > > May I suggest you propose a combined patch Akashi-san? If
>         we assume
>          > > NUMA systems to be tested up to 8 nodes to mimic real existing
>          > > enterprise hardware and up to 4 memory slots (say for
>         memory hot
>          > > plugging tests) what about a default value of 32?
>         Alternatively, we
>          > > could set this value to a much higher one if the costs are
>         negligible.
>          > >
>          > >
>          > > Well, lets not rush as there are other twists:
>          >
>          > the 4G bank in node 1 is marked BootServicesData in the UEFI
>         GetMemoryMap
>          > which I assume is not the case. EDK2 reports it as
>         ConventionalMemory.
>          >
>          > The root cause seem to be gd->ramtop not being setup properly.
>          >
>          > Further analysis shows that the DT passed to the booted EFI
>         payload does
>          > not seem to be correct:
>          >
>          > DT fragment passed to U-Boot
>          >
>          > memory at 140000000 {
>          > numa-node-id = <0x00000001>;
>          > reg = <0x00000001 0x40000000 0x00000001 0x00000000>;
>          > device_type = "memory";
>          > };
>          > memory at 40000000 {
>          > numa-node-id = <0x00000000>;
>          > reg = <0x00000000 0x40000000 0x00000001 0x00000000>;
>          > device_type = "memory";
>          > };
>          >
>          > DT passed to payload (as per my debug code):
>          >
>          > memory at 140000000: memory
>          >
>          >     numa-node-id 1
>          >
>          >     reg (len= 32)
>          >
>          >          140000000 100000000
>          >
>          >          40000000 100000000
>          >
>          > memory at 40000000: memory
>          >
>          >     numa-node-id 0
>          >
>          >     reg (len= 16)
>          >
>          >          40000000 100000000
>          >
>          > I am investigating this further...
>
>         You should check the logic of fdt_fixup_memory_banks()
>         which is called this way:
>            efi_dt_fixup()
>              image_setup_libfdt()
>                arch_fixup_fdt()
>                  fdt_fixup_memory_banks()
>
>         What it does is to put *all* the memory regions unconditionally as
>         a single "reg" array into the *first-detected* "memory" node,
>         which is
>         "memory at 140000000" in this case.
>         It means that this function doesn't respect NUMA configuration.
>
>     Thanks.
>
>     tweaking ram_top to be the correct in
>     https://elixir.bootlin.com/u-boot/latest/source/lib/efi_loader/efi_memory.c#L732
>     <https://elixir.bootlin.com/u-boot/latest/source/lib/efi_loader/efi_memory.c#L732>
>     results in memory on node 1 to be considered as ConventionalMemory
>     but U-Boot places all code and data at the end of node 1 while it
>     should position it on the current node. That said it is an
>     acceptable work around for my test case.
>
>     Bottom line, we need to introduce NUMA node management in memory
>     management all over the place.
>     It is unclear if there is a business case for that. I'll ask LEDGE
>     members...
>
>         -Takahiro Akashi
>
>
>          > > >>In this case, other occurrences of CONFIG_NR_DRAM_BANKS
>         in this file
>          > > > >>should be replaced with a variable for it.
>          > > > >>
>          > > > >>> Your use case is well beyond the typical U-Boot
>         usage. So I guess it
>          > > > >>> will be up to Linaro to provide the necessary patches:
>          > > > >>>
>          > > > >>> * determine the active CPU
>          > > > >>> * determine the RAM assigned to the active CPU according
>          > > > >>>   to the numa-node-id in the device-tree
>          > > > >>> * make sure that U-Boot only uses the memory of the
>         active CPU
>          > > > >>>   internally
>          > > > >>> * make sure that the UEFI memory map contains a compliant
>          > > > >description
>          > > > >>> * possibly, dynamically set up the environment variables
>          > > > >>>
>          > > > >>> +CC Tuomas Tynkkynen (maintainer for
>         qemu_arm64_defconfig)
>          > > > >>
>          > > > >>For (1), we'd better have a different config, or increase
>          > > > >>the value of CONFIG_NR_DRAM_BANKS to a bigger number?
>          > > > >
>          > > > >Is the system configured such that each CPU can access
>         the others CPU's
>          > > > >RAM when entering U-Boot?
>          > > > >
>          > > > >Best regards
>          > > > >
>          > > > >Heinrich
>          > > > >
>          > > >
>          > > > At least the comments for this patch sound as if on a
>         physical system
>          > > cross NUMA node memory access is only available after full SMP
>          > > initialization:
>          > > >
>          > > >
>          > >
>         https://patchwork.kernel.org/project/linux-acpi/patch/20180625130552.5636-1-lorenzo.pieralisi@arm.com/
>         <https://patchwork.kernel.org/project/linux-acpi/patch/20180625130552.5636-1-lorenzo.pieralisi@arm.com/>
>          > > >
>          > > > QEMU may be less restrictive.
>          > > >
>          > > > QEMU allows the node distance to be 255 indicating that
>         cross node
>          > > access is infeasible.
>          > > >
>          > > > Best regards
>          > > >
>          > > > Heinrich
>          > > >
>          > > > >>
>          > > > >>-Takahiro Akashi
>          > > > >>
>          > > > >>
>          > > > >>> Best regards
>          > > > >>>
>          > > > >>> Heinrich
>          > > >
>          > >
>          > >
>          > > --
>          > > François-Frédéric Ozog | Director Business Development
>          > > T: +33.67221.6485
>          > > francois.ozog at linaro.org <mailto:francois.ozog at linaro.org>
>         | Skype: ffozog
>          > >
>          >
>          >
>          > --
>          > François-Frédéric Ozog | *Director Business Development*
>          > T: +33.67221.6485
>          > francois.ozog at linaro.org <mailto:francois.ozog at linaro.org> |
>         Skype: ffozog
>
>
>
>     --
>
>     François-Frédéric Ozog | /Director Business Development/
>     T: +33.67221.6485
>     francois.ozog at linaro.org <mailto:francois.ozog at linaro.org>
>     | Skype: ffozog
>
>
>
>
> --
>
> François-Frédéric Ozog | /Director Business Development/
> T: +33.67221.6485
> francois.ozog at linaro.org <mailto:francois.ozog at linaro.org> | Skype: ffozog
>
>


More information about the U-Boot mailing list