QEMU NUMA and U-Boot

François Ozog francois.ozog at linaro.org
Wed Mar 23 08:29:21 CET 2022


Le mer. 7 juil. 2021 à 19:39, Heinrich Schuchardt <xypron.glpk at gmx.de> a
écrit :

>
>
> On 7/7/21 5:15 PM, François Ozog wrote:
> > top posting what now works for me:
> > - changed calculation of memory size to loop through different memory
> nodes
> > - added numa_node to banks
> > - filter out banks that do not match the target mixup node (do not want
> > to change ABI to add node information)
> >
> > That's not satisfying overall but at least my code works with NUMA on
> Qemu.
>
> Do we expect real hardware with NUMA to be using U-Boot?

with new Apple M1 Ultra and NVidia Grace I think that we have examples of
what we will find in some automotive Domain Controller Units.
Are there other signs for NUMA requirements for U-Boot?


>
> If you have real ARM hardware in NUMA configuration, can the boot CPU
> access memory of dormant CPUs when U-Boot is entered?
>
> Best regards
>
> Heinrich
>
> >
> >
> > *diff --git a/Kconfig b/Kconfig*
> >
> > *index f8c1a77bed..4d3ab8cb49 100644*
> >
> > *--- a/Kconfig*
> >
> > *+++ b/Kconfig*
> >
> > @@ -192,7 +192,9 @@config NR_DRAM_BANKS
> >
> > default 1 if ARCH_SUNXI || ARCH_OWL
> >
> > default 4
> >
> > help
> >
> > - This defines the number of DRAM banks.
> >
> > +This defines the number of DRAM banks. For qemu with NUMA, you
> >
> > +may want to set this value to <slots> * <possible memdev>.
> >
> > +for instance, for a 2 slot with 4 memdevs set NR_DRAM_BANKS to 8.
> >
> > config SYS_BOOT_GET_CMDLINE
> >
> > bool "Enable kernel command line setup"
> >
> > *diff --git a/arch/arm/lib/bootm-fdt.c b/arch/arm/lib/bootm-fdt.c*
> >
> > *index 29020bd1c6..2b28ab8108 100644*
> >
> > *--- a/arch/arm/lib/bootm-fdt.c*
> >
> > *+++ b/arch/arm/lib/bootm-fdt.c*
> >
> > @@ -42,12 +42,17 @@int arch_fixup_fdt(void *blob)
> >
> > u64 size[CONFIG_NR_DRAM_BANKS];
> >
> > for (bank = 0; bank < CONFIG_NR_DRAM_BANKS; bank++) {
> >
> > +unsigned char node = bd->bi_dram[bank].numa_node;
> >
> > start[bank] = bd->bi_dram[bank].start;
> >
> > size[bank] = bd->bi_dram[bank].size;
> >
> > #ifdef CONFIG_ARMV7_NONSEC
> >
> > ret = armv7_apply_memory_carveout(&start[bank], &size[bank]);
> >
> > if (ret)
> >
> > return ret;
> >
> > +#endif
> >
> > +#ifdef CONFIG_OF_LIBFDT
> >
> > +/* add node info for the fdt_fixup_memory below */
> >
> > +start[bank] = (((phys_addr_t)node) << 56) | bd->bi_dram[bank].start;
> >
> > #endif
> >
> > }
> >
> > *diff --git a/common/fdt_support.c b/common/fdt_support.c*
> >
> > *index a9a32df1e7..3bca2ba888 100644*
> >
> > *--- a/common/fdt_support.c*
> >
> > *+++ b/common/fdt_support.c*
> >
> > @@ -415,16 +415,29 @@static int fdt_pack_reg(const void *fdt, void *buf,
> > u64 *address, u64 *size,
> >
> > return p - (char *)buf;
> >
> > }
> >
> > +static inline uint32_t fdt32_ld(const fdt32_t *p)
> >
> > +{
> >
> > +const uint8_t *bp = (const uint8_t *)p;
> >
> > +
> >
> > +return ((uint32_t)bp[0] << 24)
> >
> > + | ((uint32_t)bp[1] << 16)
> >
> > + | ((uint32_t)bp[2] << 8)
> >
> > + | bp[3];
> >
> > +}
> >
> > #if CONFIG_NR_DRAM_BANKS > 4
> >
> > #define MEMORY_BANKS_MAX CONFIG_NR_DRAM_BANKS
> >
> > #else
> >
> > #define MEMORY_BANKS_MAX 4
> >
> > #endif
> >
> > +/* NUMA has yet to be properly handled
> >
> > + * This code appends memory to the first memory node that matches the
> > NUMA node.
> >
> > + */
> >
> > int fdt_fixup_memory_banks(void *blob, u64 start[], u64 size[], int
> banks)
> >
> > {
> >
> > int err, nodeoffset;
> >
> > int len, i;
> >
> > u8 tmp[MEMORY_BANKS_MAX * 16]; /* Up to 64-bit address + 64-bit size */
> >
> > +unsigned int numa_node;
> >
> > if (banks > MEMORY_BANKS_MAX) {
> >
> > printf("%s: num banks %d exceeds hardcoded limit %d."
> >
> > @@ -444,6 +457,12 @@int fdt_fixup_memory_banks(void *blob, u64 start[],
> > u64 size[], int banks)
> >
> > if (nodeoffset < 0)
> >
> > return nodeoffset;
> >
> > +const __be32* numa_node_prop = fdt_getprop(blob, nodeoffset,
> > "numa-node-id", &len);
> >
> > +if (numa_node_prop != NULL && len == sizeof(__be32)) {
> >
> > +numa_node = fdt32_ld(numa_node_prop);
> >
> > +}
> >
> > +else numa_node = 0;
> >
> > +
> >
> > err = fdt_setprop(blob, nodeoffset, "device_type", "memory",
> >
> > sizeof("memory"));
> >
> > if (err < 0) {
> >
> > @@ -453,8 +472,27 @@int fdt_fixup_memory_banks(void *blob, u64 start[],
> > u64 size[], int banks)
> >
> > }
> >
> > for (i = 0; i < banks; i++) {
> >
> > - if (start[i] == 0 && size[i] == 0)
> >
> > +/* clear node information */
> >
> > +unsigned int node;
> >
> > +recheck:
> >
> > +if (start[i]== 0 && size[i] == 0)
> >
> > break;
> >
> > +node = (start[i] >> 56) & 0xFF;
> >
> > +start[i] = start[i] & 0x00FFFFFFFFFFFFFF;
> >
> > +/* for the moment, just ignore the banks that are not in
> >
> > +* memory NUMA node */
> >
> > +if (node != numa_node) {
> >
> > +/* remove the bank from the list */
> >
> > +int j;
> >
> > +for (j=i; j < banks-1; j++) {
> >
> > +start[j] = start[j+1];
> >
> > +size[j] = size[j+1];
> >
> > +}
> >
> > +start[j]=0;
> >
> > +size[j]=0;
> >
> > +banks--;
> >
> > +goto recheck;
> >
> > +}
> >
> > }
> >
> > banks = i;
> >
> > @@ -470,6 +508,7 @@int fdt_fixup_memory_banks(void *blob, u64 start[],
> > u64 size[], int banks)
> >
> > "reg", fdt_strerror(err));
> >
> > return err;
> >
> > }
> >
> > +
> >
> > return 0;
> >
> > }
> >
> > *diff --git a/configs/qemu_arm64_defconfig
> b/configs/qemu_arm64_defconfig*
> >
> > *index f6e586627a..0fdc22d71d 100644*
> >
> > *--- a/configs/qemu_arm64_defconfig*
> >
> > *+++ b/configs/qemu_arm64_defconfig*
> >
> > @@ -1,7 +1,7 @@
> >
> > CONFIG_ARM=y
> >
> > CONFIG_POSITION_INDEPENDENT=y
> >
> > CONFIG_ARCH_QEMU=y
> >
> > -CONFIG_NR_DRAM_BANKS=1
> >
> > +CONFIG_NR_DRAM_BANKS=32
> >
> > CONFIG_ENV_SIZE=0x40000
> >
> > CONFIG_ENV_SECT_SIZE=0x40000
> >
> > CONFIG_AHCI=y
> >
> > *diff --git a/include/asm-generic/u-boot.h
> b/include/asm-generic/u-boot.h*
> >
> > *index 637de0c455..3cf45124ec 100644*
> >
> > *--- a/include/asm-generic/u-boot.h*
> >
> > *+++ b/include/asm-generic/u-boot.h*
> >
> > @@ -71,6 +71,8 @@struct bd_info {
> >
> > struct {/* RAM configuration */
> >
> > phys_addr_t start;
> >
> > phys_size_t size;
> >
> > +unsigned numa_node;
> >
> > +unsigned pad; /* just to make sure we do not cause alignment */
> >
> > } bi_dram[CONFIG_NR_DRAM_BANKS];
> >
> > };
> >
> > *diff --git a/lib/fdtdec.c b/lib/fdtdec.c*
> >
> > *index 4b097fb588..b1934edc8d 100644*
> >
> > *--- a/lib/fdtdec.c*
> >
> > *+++ b/lib/fdtdec.c*
> >
> > @@ -1064,77 +1064,101 @@int fdtdec_decode_display_timing(const void
> > *blob, int parent, int index,
> >
> > return ret;
> >
> > }
> >
> > +ofnode get_next_memory_node(ofnode mem)
> >
> > +{
> >
> > +do {
> >
> > +mem = ofnode_by_prop_value(mem, "device_type", "memory", 7);
> >
> > +} while (!ofnode_is_available(mem));
> >
> > +
> >
> > +return mem;
> >
> > +}
> >
> > +
> >
> > int fdtdec_setup_mem_size_base(void)
> >
> > {
> >
> > int ret;
> >
> > +int reg;
> >
> > ofnode mem;
> >
> > struct resource res;
> >
> > +phys_addr_t base = ~0;
> >
> > +phys_size_t size = 0;;
> >
> > - mem = ofnode_path("/memory");
> >
> > - if (!ofnode_valid(mem)) {
> >
> > - debug("%s: Missing /memory node\n", __func__);
> >
> > - return -EINVAL;
> >
> > - }
> >
> > +for (mem = get_next_memory_node(mem); ofnode_valid(mem); mem =
> > get_next_memory_node(mem)) {
> >
> > - ret = ofnode_read_resource(mem, 0, &res);
> >
> > - if (ret != 0) {
> >
> > - debug("%s: Unable to decode first memory bank\n", __func__);
> >
> > - return -EINVAL;
> >
> > +for(reg = 0, ret = 0; ret == 0 ; reg++) {
> >
> > +ret = ofnode_read_resource(mem, reg, &res);
> >
> > +if (ret != 0)
> >
> > +break;
> >
> > +if ((phys_addr_t)res.start < base)
> >
> > +base = (phys_addr_t)res.start;
> >
> > +size += (phys_size_t)(res.end - res.start + 1);
> >
> > +}
> >
> > }
> >
> > +
> >
> > +gd->ram_base = (unsigned long)base;
> >
> > +gd->ram_size = (phys_size_t)size;
> >
> > - gd->ram_size = (phys_size_t)(res.end - res.start + 1);
> >
> > - gd->ram_base = (unsigned long)res.start;
> >
> > +debug("%s: Initial DRAM base %llx\n", __func__,
> >
> > +(unsigned long long)gd->ram_base);
> >
> > debug("%s: Initial DRAM size %llx\n", __func__,
> >
> > (unsigned long long)gd->ram_size);
> >
> > return 0;
> >
> > }
> >
> > -ofnode get_next_memory_node(ofnode mem)
> >
> > -{
> >
> > - do {
> >
> > - mem = ofnode_by_prop_value(mem, "device_type", "memory", 7);
> >
> > - } while (!ofnode_is_available(mem));
> >
> > -
> >
> > - return mem;
> >
> > -}
> >
> > -
> >
> > int fdtdec_setup_memory_banksize(void)
> >
> > {
> >
> > int bank, ret, reg = 0;
> >
> > struct resource res;
> >
> > ofnode mem = ofnode_null();
> >
> > +const __be32* numa_node_prop = NULL;
> >
> > +int len;
> >
> > +int numa_node = -1;
> >
> > +int count = 0;
> >
> > +
> >
> > +for (mem = get_next_memory_node(mem); ofnode_valid(mem); mem =
> > get_next_memory_node(mem)) {
> >
> > - mem = get_next_memory_node(mem);
> >
> > - if (!ofnode_valid(mem)) {
> >
> > - debug("%s: Missing /memory node\n", __func__);
> >
> > - return -EINVAL;
> >
> > +count++;
> >
> > +
> >
> > +numa_node_prop = ofnode_get_property(mem, "numa-node-id", &len);
> >
> > +if (numa_node_prop != NULL && len == sizeof(__be32)) {
> >
> > +numa_node = of_read_number(numa_node_prop, 1);
> >
> > }
> >
> > +else numa_node = 0;
> >
> > - for (bank = 0; bank < CONFIG_NR_DRAM_BANKS; bank++) {
> >
> > - ret = ofnode_read_resource(mem, reg++, &res);
> >
> > - if (ret < 0) {
> >
> > - reg = 0;
> >
> > - mem = get_next_memory_node(mem);
> >
> > - if (!ofnode_valid(mem))
> >
> > - break;
> >
> > +debug("Found memory for node %d\n", numa_node);
> >
> > - ret = ofnode_read_resource(mem, reg++, &res);
> >
> > - if (ret < 0)
> >
> > - break;
> >
> > - }
> >
> > +ret = 0;
> >
> > +for(reg = 0; ret == 0 && bank < CONFIG_NR_DRAM_BANKS; reg++) {
> >
> > +ret = ofnode_read_resource(mem, reg, &res);
> >
> > if (ret != 0)
> >
> > - return -EINVAL;
> >
> > +break;
> >
> > gd->bd->bi_dram[bank].start = (phys_addr_t)res.start;
> >
> > gd->bd->bi_dram[bank].size =
> >
> > (phys_size_t)(res.end - res.start + 1);
> >
> > +gd->bd->bi_dram[bank].numa_node = numa_node;
> >
> > - debug("%s: DRAM Bank #%d: start = 0x%llx, size = 0x%llx\n",
> >
> > +debug("%s: DRAM Bank #%d: start = 0x%llx, size = 0x%llx"
> >
> > +" name_node = %d\n",
> >
> > __func__, bank,
> >
> > (unsigned long long)gd->bd->bi_dram[bank].start,
> >
> > - (unsigned long long)gd->bd->bi_dram[bank].size);
> >
> > +(unsigned long long)gd->bd->bi_dram[bank].size,
> >
> > +gd->bd->bi_dram[bank].numa_node);
> >
> > +
> >
> > +bank++;
> >
> > +}
> >
> > +
> >
> > +
> >
> > +}
> >
> > +
> >
> > +if (count == 0) {
> >
> > +debug("%s: Missing /memory node\n", __func__);
> >
> > +return -EINVAL;
> >
> > +}
> >
> > +if (bank >= CONFIG_NR_DRAM_BANKS) {
> >
> > +printf("Too many DT memory nodes for CONFIG_NR_DRAM_BANKS=%d\n",
> >
> > +CONFIG_NR_DRAM_BANKS);
> >
> > }
> >
> > return 0;
> >
> >
> > On Wed, 7 Jul 2021 at 13:00, François Ozog <francois.ozog at linaro.org
> > <mailto:francois.ozog at linaro.org>> wrote:
> >
> >
> >
> >     On Wed, 7 Jul 2021 at 12:16, AKASHI Takahiro
> >     <takahiro.akashi at linaro.org <mailto:takahiro.akashi at linaro.org>>
> wrote:
> >
> >         On Wed, Jul 07, 2021 at 11:37:19AM +0200, Fran??ois Ozog wrote:
> >          > On Wed, 7 Jul 2021 at 09:40, François Ozog
> >         <francois.ozog at linaro.org <mailto:francois.ozog at linaro.org>>
> wrote:
> >          >
> >          > > On Wed, 7 Jul 2021 at 05:59, Heinrich Schuchardt
> >         <xypron.glpk at gmx.de <mailto:xypron.glpk at gmx.de>>
> >          > > wrote:
> >          > > >
> >          > > > Am 7. Juli 2021 05:18:20 MESZ schrieb Heinrich Schuchardt
> <
> >          > > xypron.glpk at gmx.de <mailto:xypron.glpk at gmx.de>>:
> >          > > > >Am 7. Juli 2021 03:44:35 MESZ schrieb AKASHI Takahiro
> >          > > > ><takahiro.akashi at linaro.org
> >         <mailto:takahiro.akashi at linaro.org>>:
> >          > > > >>François,
> >          > > > >>
> >          > > > >>On Tue, Jul 06, 2021 at 08:10:08PM +0200, Heinrich
> >         Schuchardt wrote:
> >          > > > >>> On 7/6/21 6:13 PM, François Ozog wrote:
> >          > > > >>> > Hi Heinrich, U-Boot 2021-07rc5 does not take into
> >         account memory
> >          > > > >>> > description when using Qemu 5.2 NUMA configuration
> >         to adapt memory
> >          > > > >>map
> >          > > > >>> > (kernel_addr_r...):
> >          > > > >>> >
> >          > > > >>> >         -smp 4 \
> >          > > > >>> >           -m 8G,slots=2,maxmem=16G \
> >          > > > >>> >          -object memory-backend-ram,size=4G,id=m0 \
> >          > > > >>> >          -object memory-backend-ram,size=4G,id=m1 \
> >          > > > >>> >          -numa node,cpus=0-1,nodeid=0,memdev=m0 \
> >          > > > >>> >          -numa node,cpus=2-3,nodeid=1,memdev=m1
> >          > > > >>> >
> >          > > > >>> > kernel_addr_r is still 0x4040000 and thus you can't
> >         use it to
> >          > > > >>bootefi.
> >          > > > >>> >
> >          > > > >>> > fdt addr 0x13ede6de0; fdt print
> >          > > > >>> >
> >          > > > >>> > Displays fdt while I think it should not.
> >          > > > >>> >
> >          > > > >>> > If I load the kernel at dram.start, the load works
> >         but not boot
> >          > > > >>> >
> >          > > > >>> > U-Boot 2021.07 (Jul 06 2021 - 13:26:43 +0000)
> >          > > > >>> >
> >          > > > >>> >
> >          > > > >>> > DRAM:4 GiB
> >          > > > >>> >
> >          > > > >>> > Flash: 64 MiB
> >          > > > >>> >
> >          > > > >>> > Loading Environment from Flash... OK
> >          > > > >>> >
> >          > > > >>> > In:pl011 at 9000000
> >          > > > >>> >
> >          > > > >>> > Out: pl011 at 9000000
> >          > > > >>> >
> >          > > > >>> > Err: pl011 at 9000000
> >          > > > >>> >
> >          > > > >>> > Net: eth0: virtio-net#32
> >          > > > >>> >
> >          > > > >>> > Hit any key to stop autoboot:0
> >          > > > >>> >
> >          > > > >>> > =>
> >          > > > >>> >
> >          > > > >>> > => bdinfo
> >          > > > >>> >
> >          > > > >>> > boot_params = 0x0000000000000000
> >          > > > >>> >
> >          > > > >>> > DRAM bank = 0x0000000000000000
> >          > > > >>> >
> >          > > > >>> > -> start= 0x0000000140000000
> >          > > > >>> >
> >          > > > >>> > -> size = 0x0000000100000000
> >          > > > >>> >
> >          > > > >>> > flashstart= 0x0000000000000000
> >          > > > >>> >
> >          > > > >>> > flashsize = 0x0000000004000000
> >          > > > >>> >
> >          > > > >>> > flashoffset = 0x00000000000bc990
> >          > > > >>> >
> >          > > > >>> > baudrate= 115200 bps
> >          > > > >>> >
> >          > > > >>> > relocaddr = 0x000000013ff27000
> >          > > > >>> >
> >          > > > >>> > reloc off = 0x000000013ff27000
> >          > > > >>> >
> >          > > > >>> > Build = 64-bit
> >          > > > >>> >
> >          > > > >>> > current eth = virtio-net#32
> >          > > > >>> >
> >          > > > >>> > ethaddr = 52:52:52:52:52:52
> >          > > > >>> >
> >          > > > >>> > IP addr = <NULL>
> >          > > > >>> >
> >          > > > >>> > fdt_blob= 0x000000013ede6de0
> >          > > > >>> >
> >          > > > >>> > new_fdt = 0x000000013ede6de0
> >          > > > >>> >
> >          > > > >>> > fdt_size= 0x0000000000100000
> >          > > > >>> >
> >          > > > >>> > lmb_dump_all:
> >          > > > >>> >
> >          > > > >>> > memory.cnt= 0x1
> >          > > > >>> >
> >          > > > >>> > memory.reg[0x0].base = 0x140000000
> >          > > > >>> >
> >          > > > >>> > .size = 0x100000000
> >          > > > >>> >
> >          > > > >>> >
> >          > > > >>> > reserved.cnt= 0x0
> >          > > > >>> >
> >          > > > >>> > arch_number = 0x0000000000000000
> >          > > > >>> >
> >          > > > >>> > TLB addr= 0x000000013fff0000
> >          > > > >>> >
> >          > > > >>> > irq_sp= 0x000000013ede6dd0
> >          > > > >>> >
> >          > > > >>> > sp start= 0x000000013ede6dd0
> >          > > > >>> >
> >          > > > >>> > Early malloc usage: 3a8 / 2000
> >          > > > >>> >
> >          > > > >>> > => load virtio 0:1 0x140000000 /oskit.efi
> >          > > > >>> >
> >          > > > >>> > 853424 bytes read in 1 ms (813.9 MiB/s)
> >          > > > >>> >
> >          > > > >>> > => bootefi0x140000000 0x13ede6dd0
> >          > > > >>> >
> >          > > > >>> > ERROR: Failed to register WaitForKey event
> >          > > > >>> >
> >          > > > >>> > Setting OsIndications failed
> >          > > > >>> >
> >          > > > >>> > Error: Cannot initialize UEFI sub-system, r = 9
> >          > > > >>> >
> >          > > > >>> >
> >          > > > >>> > I think there is a need to calculate memory map
> >         based on previous
> >          > > > >>> > firmware (TFA, QEMU can be considered as previous
> >         frimware)
> >          > > > >>information
> >          > > > >>> > (DT or blob_list).
> >          > > > >>> >
> >          > > > >>> > What do you think ?
> >          > > > >>> >
> >          > > > >>> > Cheers
> >          > > > >>> >
> >          > > > >>> > FF
> >          > > > >>> >
> >          > > > >>> > --
> >          > > > >>> >
> >          > > > >>> > François-Frédéric Ozog | /Director Business
> >         Development/
> >          > > > >>> > T: +33.67221.6485
> >          > > > >>> > francois.ozog at linaro.org
> >         <mailto:francois.ozog at linaro.org>
> >         <mailto:francois.ozog at linaro.org <mailto:
> francois.ozog at linaro.org>>
> >          > > > >>| Skype: ffozog
> >          > > > >>> >
> >          > > > >>> >
> >          > > > >>>
> >          > > > >>> The kernel load address is hard coded here:
> >          > > > >>> include/configs/qemu-arm.h:41:
> >         "kernel_addr_r=0x40400000\0" \
> >          > > > >>>
> >          > > > >>> bdinfo shows:
> >          > > > >>> DRAM start = 0x140000000
> >          > > > >>> DRAM size  = 0x100000000
> >          > > > >>>
> >          > > > >>> fdt addr $fdt_addr
> >          > > > >>> fdt printf
> >          > > > >>>
> >          > > > >>> shows two memory areas. One at 40000000, one at
> >         140000000.
> >          > > > >>
> >          > > > >>(This shows that U-Boot receives a correct memory map
> >         via dtb.)
> >          > > > >>
> >          > > > >>Is this a NUMA machine, isn't it? Why should we care of
> >         which
> >          > > > >>memory region be used here? Please note that this is a
> >         virtual
> >          > > > >machine,
> >          > > > >>there is no practical difference between two regions.
> >          > > > >>
> >          > > > >>The root problem is that U-Boot did not recognize there
> >         were two
> >          > > > >>memory regions. We can fix this issue in either way:
> >          > > > >>
> >          > > > >>1)
> >          > > > >>diff --git a/configs/qemu_arm64_defconfig
> >          > > > >>b/configs/qemu_arm64_defconfig
> >          > > > >>index f6e586627a8e..b70ffae8bf6e 100644
> >          > > > >>--- a/configs/qemu_arm64_defconfig
> >          > > > >>+++ b/configs/qemu_arm64_defconfig
> >          > > > >>@@ -1,7 +1,7 @@
> >          > > > >> CONFIG_ARM=y
> >          > > > >> CONFIG_POSITION_INDEPENDENT=y
> >          > > > >> CONFIG_ARCH_QEMU=y
> >          > > > >>-CONFIG_NR_DRAM_BANKS=1
> >          > > > >>+CONFIG_NR_DRAM_BANKS=2
> >          > > > >> CONFIG_ENV_SIZE=0x40000
> >          > > > >> CONFIG_ENV_SECT_SIZE=0x40000
> >          > > > >> CONFIG_AHCI=y
> >          > > > >>
> >          > > > >>2)
> >          > > > >>diff --git a/lib/fdtdec.c b/lib/fdtdec.c
> >          > > > >>index 4b097fb588ed..4067ea2dead6 100644
> >          > > > >>--- a/lib/fdtdec.c
> >          > > > >>+++ b/lib/fdtdec.c
> >          > > > >>@@ -1111,7 +1111,7 @@ int
> >         fdtdec_setup_memory_banksize(void)
> >          > > > >>                return -EINVAL;
> >          > > > >>        }
> >          > > > >>
> >          > > > >>-       for (bank = 0; bank < CONFIG_NR_DRAM_BANKS;
> >         bank++) {
> >          > > > >>+       for (bank = 0; ; bank++) {
> >          > > > >>                ret = ofnode_read_resource(mem, reg++,
> >         &res);
> >          > > > >>                if (ret < 0) {
> >          > > > >>                        reg = 0;
> >          > > > >>
> >          > > > >>   (fdtdec_setup_memory_banksize() is called in
> >         dram_init_banksize().)
> >          > > > >>
> >          > > > >>
> >          > > > >>(2) seems much better, but I don't know why we had to
> use
> >          > > > >>CONFIG_NR_DRAM_BANKS here.
> >          > > > >>
> >          > >
> >          > > 2) alone does not work as other places in the code refer to
> >          > > CONFIG_NR_DRAM_BANKS. Setting ...BANKS to 32 makes my code
> >         work and
> >          > > bdinfo seems now correct:
> >          > >
> >          > => bdinfo
> >          > > boot_params = 0x0000000000000000
> >          > > DRAM bank   = 0x0000000000000000
> >          > > -> start    = 0x0000000140000000
> >          > > -> size     = 0x0000000100000000
> >          > > DRAM bank   = 0x0000000000000001
> >          > > -> start    = 0x0000000040000000
> >          > > -> size     = 0x0000000100000000
> >          > > flashstart  = 0x0000000000000000
> >          > > flashsize   = 0x0000000004000000
> >          > > flashoffset = 0x00000000000bcb88
> >          > > baudrate    = 115200 bps
> >          > > relocaddr   = 0x000000013ff27000
> >          > > reloc off   = 0x000000013ff27000
> >          > > Build       = 64-bit
> >          > > current eth = virtio-net#32
> >          > > ethaddr     = 52:52:52:52:52:52
> >          > > IP addr     = <NULL>
> >          > > fdt_blob    = 0x000000013ede6cf0
> >          > > new_fdt     = 0x000000013ede6cf0
> >          > > fdt_size    = 0x0000000000100000
> >          > > lmb_dump_all:
> >          > >     memory.cnt   = 0x1
> >          > >     memory.reg[0x0].base   = 0x40000000
> >          > >   .size   = 0x200000000
> >          > >     reserved.cnt   = 0x1
> >          > >     reserved.reg[0x0].base = 0x13ede58f0
> >          > >     .size = 0x121a710
> >          > > arch_number = 0x0000000000000000
> >          > > TLB addr    = 0x000000013fff0000
> >          > > irq_sp      = 0x000000013ede6ce0
> >          > > sp start    = 0x000000013ede6ce0
> >          > > Early malloc usage: 3a8 / 2000
> >          > >
> >          > > May I suggest you propose a combined patch Akashi-san? If
> >         we assume
> >          > > NUMA systems to be tested up to 8 nodes to mimic real
> existing
> >          > > enterprise hardware and up to 4 memory slots (say for
> >         memory hot
> >          > > plugging tests) what about a default value of 32?
> >         Alternatively, we
> >          > > could set this value to a much higher one if the costs are
> >         negligible.
> >          > >
> >          > >
> >          > > Well, lets not rush as there are other twists:
> >          >
> >          > the 4G bank in node 1 is marked BootServicesData in the UEFI
> >         GetMemoryMap
> >          > which I assume is not the case. EDK2 reports it as
> >         ConventionalMemory.
> >          >
> >          > The root cause seem to be gd->ramtop not being setup properly.
> >          >
> >          > Further analysis shows that the DT passed to the booted EFI
> >         payload does
> >          > not seem to be correct:
> >          >
> >          > DT fragment passed to U-Boot
> >          >
> >          > memory at 140000000 {
> >          > numa-node-id = <0x00000001>;
> >          > reg = <0x00000001 0x40000000 0x00000001 0x00000000>;
> >          > device_type = "memory";
> >          > };
> >          > memory at 40000000 {
> >          > numa-node-id = <0x00000000>;
> >          > reg = <0x00000000 0x40000000 0x00000001 0x00000000>;
> >          > device_type = "memory";
> >          > };
> >          >
> >          > DT passed to payload (as per my debug code):
> >          >
> >          > memory at 140000000: memory
> >          >
> >          >     numa-node-id 1
> >          >
> >          >     reg (len= 32)
> >          >
> >          >          140000000 100000000
> >          >
> >          >          40000000 100000000
> >          >
> >          > memory at 40000000: memory
> >          >
> >          >     numa-node-id 0
> >          >
> >          >     reg (len= 16)
> >          >
> >          >          40000000 100000000
> >          >
> >          > I am investigating this further...
> >
> >         You should check the logic of fdt_fixup_memory_banks()
> >         which is called this way:
> >            efi_dt_fixup()
> >              image_setup_libfdt()
> >                arch_fixup_fdt()
> >                  fdt_fixup_memory_banks()
> >
> >         What it does is to put *all* the memory regions unconditionally
> as
> >         a single "reg" array into the *first-detected* "memory" node,
> >         which is
> >         "memory at 140000000" in this case.
> >         It means that this function doesn't respect NUMA configuration.
> >
> >     Thanks.
> >
> >     tweaking ram_top to be the correct in
> >
> https://elixir.bootlin.com/u-boot/latest/source/lib/efi_loader/efi_memory.c#L732
> >     <
> https://elixir.bootlin.com/u-boot/latest/source/lib/efi_loader/efi_memory.c#L732
> >
> >     results in memory on node 1 to be considered as ConventionalMemory
> >     but U-Boot places all code and data at the end of node 1 while it
> >     should position it on the current node. That said it is an
> >     acceptable work around for my test case.
> >
> >     Bottom line, we need to introduce NUMA node management in memory
> >     management all over the place.
> >     It is unclear if there is a business case for that. I'll ask LEDGE
> >     members...
> >
> >         -Takahiro Akashi
> >
> >
> >          > > >>In this case, other occurrences of CONFIG_NR_DRAM_BANKS
> >         in this file
> >          > > > >>should be replaced with a variable for it.
> >          > > > >>
> >          > > > >>> Your use case is well beyond the typical U-Boot
> >         usage. So I guess it
> >          > > > >>> will be up to Linaro to provide the necessary patches:
> >          > > > >>>
> >          > > > >>> * determine the active CPU
> >          > > > >>> * determine the RAM assigned to the active CPU
> according
> >          > > > >>>   to the numa-node-id in the device-tree
> >          > > > >>> * make sure that U-Boot only uses the memory of the
> >         active CPU
> >          > > > >>>   internally
> >          > > > >>> * make sure that the UEFI memory map contains a
> compliant
> >          > > > >description
> >          > > > >>> * possibly, dynamically set up the environment
> variables
> >          > > > >>>
> >          > > > >>> +CC Tuomas Tynkkynen (maintainer for
> >         qemu_arm64_defconfig)
> >          > > > >>
> >          > > > >>For (1), we'd better have a different config, or
> increase
> >          > > > >>the value of CONFIG_NR_DRAM_BANKS to a bigger number?
> >          > > > >
> >          > > > >Is the system configured such that each CPU can access
> >         the others CPU's
> >          > > > >RAM when entering U-Boot?
> >          > > > >
> >          > > > >Best regards
> >          > > > >
> >          > > > >Heinrich
> >          > > > >
> >          > > >
> >          > > > At least the comments for this patch sound as if on a
> >         physical system
> >          > > cross NUMA node memory access is only available after full
> SMP
> >          > > initialization:
> >          > > >
> >          > > >
> >          > >
> >
> https://patchwork.kernel.org/project/linux-acpi/patch/20180625130552.5636-1-lorenzo.pieralisi@arm.com/
> >         <
> https://patchwork.kernel.org/project/linux-acpi/patch/20180625130552.5636-1-lorenzo.pieralisi@arm.com/
> >
> >          > > >
> >          > > > QEMU may be less restrictive.
> >          > > >
> >          > > > QEMU allows the node distance to be 255 indicating that
> >         cross node
> >          > > access is infeasible.
> >          > > >
> >          > > > Best regards
> >          > > >
> >          > > > Heinrich
> >          > > >
> >          > > > >>
> >          > > > >>-Takahiro Akashi
> >          > > > >>
> >          > > > >>
> >          > > > >>> Best regards
> >          > > > >>>
> >          > > > >>> Heinrich
> >          > > >
> >          > >
> >          > >
> >          > > --
> >          > > François-Frédéric Ozog | Director Business Development
> >          > > T: +33.67221.6485
> >          > > francois.ozog at linaro.org <mailto:francois.ozog at linaro.org>
> >         | Skype: ffozog
> >          > >
> >          >
> >          >
> >          > --
> >          > François-Frédéric Ozog | *Director Business Development*
> >          > T: +33.67221.6485
> >          > francois.ozog at linaro.org <mailto:francois.ozog at linaro.org> |
> >         Skype: ffozog
> >
> >
> >
> >     --
> >
> >     François-Frédéric Ozog | /Director Business Development/
> >     T: +33.67221.6485
> >     francois.ozog at linaro.org <mailto:francois.ozog at linaro.org>
> >     | Skype: ffozog
> >
> >
> >
> >
> > --
> >
> > François-Frédéric Ozog | /Director Business Development/
> > T: +33.67221.6485
> > francois.ozog at linaro.org <mailto:francois.ozog at linaro.org>
> | Skype: ffozog
> >
> >
>
-- 
François-Frédéric Ozog | *Director Business Development*
T: +33.67221.6485
francois.ozog at linaro.org | Skype: ffozog


More information about the U-Boot mailing list