[PATCH v1] arch: riscv: jh7110: Correctly zero L2 LIM

Bo Gan ganboing at gmail.com
Sun May 21 13:30:16 CEST 2023


Background information:
 JH7110 SPL runs in L2 LIM (2M in size mapped at 0x8000000). It
 consists of 16 0x20000 sized regions, each one can be used as
 either L2 cache way or SRAM (not both). From top to bottom, there're
 ways 0-15. The way 0 is always enabled, at most 0x1e0000 can be used.

In SPL, we don't enable any cache ways, thus all 15 (except way 0)
ways can be used. However, due to HW requirement, we must zero the
LIM before use. This is because ECC is applied to LIM, and if not
cleared first, the ECC part is invalid, which can trigger ECC errors
when reading/writing data.

There are several issues currently. We clear L2 LIM from __bss_end
to 0x81FFFFF in `harts_early_init`. This is wrong because:

 a. Each hart (in the middle of a function call) overwriting its own
    stack and other harts' stacks.
    (data-race and data-corruption)
 b. Lottery winner hart can be doing `board_init_f_init_reserve`,
    while other harts're in the middle of zeroing L2 LIM.
    (data-race)

To fix this, we split the job, such that there's one and only one
owner of zeroing a specific region (No data-race). A new SPL config
option `SPL_ZERO_MEM_BEFORE_USE` is introduced. Allowing The zeroing
to happen in the same code path. (much easier to reason about).

Another option `SPL_SYS_MALLOC_CLEAR_ON_INIT` also gets introduced.
It allows us to also zero late malloc (dlmalloc) area, in case it gets
configured to be inside L2 LIM (via `CUSTOM_SYS_SPL_MALLOC_ADDR`)
We by default enable it to be on the safe side.

`CONFIG_SPL_STACK` is adjusted to reduce the waste of L2 LIM space.
When setting

    CONFIG_CUSTOM_SYS_SPL_MALLOC_ADDR=0x8100000
    CONFIG_SYS_SPL_MALLOC_SIZE=0xe0000

A 0.875M late malloc arena is available in L2 LIM. It's sufficient for
loading JH7110 FIT Image (gzip compressed OpenSBI+UBOOT+DTB).
The advantage of this config is allowing OpenSBI/UBOOT to be loaded at
any DDR memory address. By default the malloc arena is configured in
DDR memory, so OpenSBI/UBOOT loading address must not collide with it.

    CONFIG_CUSTOM_SYS_SPL_MALLOC_ADDR=0x80000000 (DDR +1GB)
    CONFIG_SYS_SPL_MALLOC_SIZE=0x400000

JH7110 SPL memory map (based on the following defconfig):

 CONFIG_NR_CPUS=8
 CONFIG_STACK_SIZE_SHIFT=14
 CONFIG_SPL_STACK=0x8100000
 CONFIG_SPL_SYS_MALLOC_F_LEN=0x10000
 CONFIG_SPL_BSS_START_ADDR=0x8040000

  +----------------+ 0x81e0000
  |  Free          |
  |                |
  +----------------+ 0x8100000
  |  hart 0 stack  |           <--- cleared by each hart (start.S)
  +----------------+ 0x80fc000
  |  hart 1 stack  |                .
  +----------------+ 0x80f8000      .
  |  ......        |                .
  +----------------+
  |  hart N-1 stack|           <--- cleared by each hart (start.S)
  +----------------+ 0x80e0000
  |  ......        |           <--- cleared by lottery winner hart
  |  malloc_base   |                  board_init_f_init_reserve()
  +----------------+ 0x80d0000
  |  GD            |           <--- cleared by lottery winner hart
  +----------------+                  board_init_f_init_reserve()
  |                |
  |  hole          |
  |                |
  +----------------+
  |  BSS           |           <--- cleared by lottery winner hart
  +----------------+ 0x8040000        spl_clear_bss (start.S)
  |  hole          |
  +----------------+
  |  Image+DTB     |           <--- Assuming cleared/loaded by ROM
  +----------------+ 0x8000000

Signed-off-by: Bo Gan <ganboing at gmail.com>
Cc: samin . guo <samin.guo at starfivetech.com>
Cc: Yanhong Wang <yanhong.wang at starfivetech.com>
Cc: Rick Chen <rick at andestech.com>
Cc: Leo <ycliang at andestech.com>
Cc: Sean Anderson <seanga2 at gmail.com>
Cc: Lukasz Majewski <lukma at denx.de>
---

v1:
- patch is on top of https://patchwork.ozlabs.org/project/uboot/patch/1684650044-313122-1-git-send-email-ganboing@gmail.com/
- Tested on VisionFive 2 board (4G)
---
 Kconfig                                | 11 +++++++++++
 arch/riscv/Kconfig                     |  7 +++++++
 arch/riscv/cpu/jh7110/Kconfig          |  2 ++
 arch/riscv/cpu/jh7110/spl.c            | 25 -------------------------
 arch/riscv/cpu/start.S                 | 12 ++++++++++++
 common/dlmalloc.c                      |  6 +++---
 common/init/board_init.c               |  3 +++
 configs/starfive_visionfive2_defconfig |  3 ++-
 8 files changed, 40 insertions(+), 29 deletions(-)

diff --git a/Kconfig b/Kconfig
index 70efb41..e5eec1b 100644
--- a/Kconfig
+++ b/Kconfig
@@ -372,6 +372,17 @@ if EXPERT
 	  When disabling this, please check if malloc calls, maybe
 	  should be replaced by calloc - if one expects zeroed memory.
 
+	config SPL_SYS_MALLOC_CLEAR_ON_INIT
+	bool "Init with zeros the memory reserved for malloc (slow) in SPL"
+	depends on SPL
+	default SYS_MALLOC_CLEAR_ON_INIT
+	help
+	  Same as SYS_MALLOC_CLEAR_ON_INIT, but for SPL. It's possible to
+	  Enable it without SYS_MALLOC_CLEAR_ON_INIT. It's useful for boards
+	  that must have particular memory regions zero'ed before first use.
+	  If SYS_SPL_MALLOC_START is configured to be in such region, this
+	  option should be enabled.
+
 config SYS_MALLOC_DEFAULT_TO_INIT
 	bool "Default malloc to init while reserving the memory for it"
 	help
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index f6ed059..5d942a8 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -60,6 +60,13 @@ config SPL_SYS_DCACHE_OFF
 	help
 	  Do not enable data cache in SPL.
 
+config SPL_ZERO_MEM_BEFORE_USE
+	bool "Zero memory before use"
+	depends on SPL
+	default n
+	help
+	  Zero stack/GD/malloc area in SPL.
+
 # board-specific options below
 source "board/AndesTech/ae350/Kconfig"
 source "board/emulation/qemu-riscv/Kconfig"
diff --git a/arch/riscv/cpu/jh7110/Kconfig b/arch/riscv/cpu/jh7110/Kconfig
index 3f14541..de1507d 100644
--- a/arch/riscv/cpu/jh7110/Kconfig
+++ b/arch/riscv/cpu/jh7110/Kconfig
@@ -13,6 +13,7 @@ config STARFIVE_JH7110
 	select SUPPORT_SPL
 	select SPL_RAM if SPL
 	select SPL_STARFIVE_DDR
+	select SPL_ZERO_MEM_BEFORE_USE
 	select PINCTRL_STARFIVE_JH7110
 	imply MMC
 	imply MMC_BROKEN_CD
@@ -26,3 +27,4 @@ config STARFIVE_JH7110
 	imply SPL_LOAD_FIT
 	imply SPL_OPENSBI
 	imply SPL_SIFIVE_CLINT
+	imply SPL_SYS_MALLOC_CLEAR_ON_INIT
diff --git a/arch/riscv/cpu/jh7110/spl.c b/arch/riscv/cpu/jh7110/spl.c
index 104f0fe..72c5b25 100644
--- a/arch/riscv/cpu/jh7110/spl.c
+++ b/arch/riscv/cpu/jh7110/spl.c
@@ -10,7 +10,6 @@
 #include <log.h>
 
 #define CSR_U74_FEATURE_DISABLE	0x7c1
-#define L2_LIM_MEM_END	0x81FFFFFUL
 
 int spl_soc_init(void)
 {
@@ -29,9 +28,6 @@ int spl_soc_init(void)
 
 void harts_early_init(void)
 {
-	ulong *ptr;
-	u8 *tmp;
-	ulong len, remain;
 	/*
 	 * Feature Disable CSR
 	 *
@@ -40,25 +36,4 @@ void harts_early_init(void)
 	 */
 	if (CONFIG_IS_ENABLED(RISCV_MMODE))
 		csr_write(CSR_U74_FEATURE_DISABLE, 0);
-
-	/* clear L2 LIM  memory
-	 * set __bss_end to 0x81FFFFF region to zero
-	 * The L2 Cache Controller supports ECC. ECC is applied to SRAM.
-	 * If it is not cleared, the ECC part is invalid, and an ECC error
-	 * will be reported when reading data.
-	 */
-	ptr = (ulong *)&__bss_end;
-	len = L2_LIM_MEM_END - (ulong)&__bss_end;
-	remain = len % sizeof(ulong);
-	len /= sizeof(ulong);
-
-	while (len--)
-		*ptr++ = 0;
-
-	/* clear the remain bytes */
-	if (remain) {
-		tmp = (u8 *)ptr;
-		while (remain--)
-			*tmp++ = 0;
-	}
 }
diff --git a/arch/riscv/cpu/start.S b/arch/riscv/cpu/start.S
index 59d58a5..1a0b01e 100644
--- a/arch/riscv/cpu/start.S
+++ b/arch/riscv/cpu/start.S
@@ -111,6 +111,18 @@ call_board_init_f:
  * It's essential before any function call, otherwise, we get data-race.
  */
 
+	/* clear stack if necessary */
+#if CONFIG_IS_ENABLED(ZERO_MEM_BEFORE_USE)
+clear_stack:
+	li	t1, 1
+	slli	t1, t1, CONFIG_STACK_SIZE_SHIFT
+	sub	t1, sp, t1
+clear_stack_loop:
+	SREG	zero, 0(t1)		/* t1 is always 16 byte aligned */
+	addi	t1, t1, REGBYTES
+	blt	t1, sp, clear_stack_loop
+#endif
+
 call_board_init_f_0:
 	/* find top of reserve space */
 #if CONFIG_IS_ENABLED(SMP)
diff --git a/common/dlmalloc.c b/common/dlmalloc.c
index 0f9b726..dcecdb8 100644
--- a/common/dlmalloc.c
+++ b/common/dlmalloc.c
@@ -631,7 +631,7 @@ void mem_malloc_init(ulong start, ulong size)
 
 	debug("using memory %#lx-%#lx for malloc()\n", mem_malloc_start,
 	      mem_malloc_end);
-#ifdef CONFIG_SYS_MALLOC_CLEAR_ON_INIT
+#if CONFIG_IS_ENABLED(SYS_MALLOC_CLEAR_ON_INIT)
 	memset((void *)mem_malloc_start, 0x0, size);
 #endif
 	malloc_bin_reloc();
@@ -2153,7 +2153,7 @@ Void_t* cALLOc(n, elem_size) size_t n; size_t elem_size;
 
 
   /* check if expand_top called, in which case don't need to clear */
-#ifdef CONFIG_SYS_MALLOC_CLEAR_ON_INIT
+#if CONFIG_IS_ENABLED(SYS_MALLOC_CLEAR_ON_INIT)
 #if MORECORE_CLEARS
   mchunkptr oldtop = top;
   INTERNAL_SIZE_T oldtopsize = chunksize(top);
@@ -2184,7 +2184,7 @@ Void_t* cALLOc(n, elem_size) size_t n; size_t elem_size;
 
     csz = chunksize(p);
 
-#ifdef CONFIG_SYS_MALLOC_CLEAR_ON_INIT
+#if CONFIG_IS_ENABLED(SYS_MALLOC_CLEAR_ON_INIT)
 #if MORECORE_CLEARS
     if (p == oldtop && csz > oldtopsize)
     {
diff --git a/common/init/board_init.c b/common/init/board_init.c
index 96ffb79..ab8c508 100644
--- a/common/init/board_init.c
+++ b/common/init/board_init.c
@@ -162,6 +162,9 @@ void board_init_f_init_reserve(ulong base)
 #if CONFIG_VAL(SYS_MALLOC_F_LEN)
 	/* go down one 'early malloc arena' */
 	gd->malloc_base = base;
+#if CONFIG_IS_ENABLED(ZERO_MEM_BEFORE_USE)
+	memset((void *)base, '\0', CONFIG_VAL(SYS_MALLOC_F_LEN));
+#endif
 #endif
 
 	if (CONFIG_IS_ENABLED(SYS_REPORT_STACK_F_USAGE))
diff --git a/configs/starfive_visionfive2_defconfig b/configs/starfive_visionfive2_defconfig
index ffbc4b9..4ce0f45 100644
--- a/configs/starfive_visionfive2_defconfig
+++ b/configs/starfive_visionfive2_defconfig
@@ -1,6 +1,7 @@
 CONFIG_RISCV=y
 CONFIG_SYS_MALLOC_LEN=0x800000
 CONFIG_SYS_MALLOC_F_LEN=0x10000
+# CONFIG_SYS_MALLOC_CLEAR_ON_INIT is not set
 CONFIG_SPL_GPIO=y
 CONFIG_NR_DRAM_BANKS=1
 CONFIG_HAS_CUSTOM_SYS_INIT_SP_ADDR=y
@@ -13,7 +14,7 @@ CONFIG_SYS_PROMPT="StarFive #"
 CONFIG_OF_LIBFDT_OVERLAY=y
 CONFIG_DM_RESET=y
 CONFIG_SPL_MMC=y
-CONFIG_SPL_STACK=0x8180000
+CONFIG_SPL_STACK=0x8100000
 CONFIG_SPL=y
 CONFIG_SPL_SPI_FLASH_SUPPORT=y
 CONFIG_SPL_SPI=y
-- 
2.7.4



More information about the U-Boot mailing list