[RFC PATCH 2/4] arm: move inline assembly CP15 instructions to separate .S files
Jerome Forissier
jerome.forissier at linaro.org
Tue Jul 8 12:02:48 CEST 2025
This is a step towards enabling LTO on some files where it is currently
disabled and/or allow building those files in Thumb mode.
Signed-off-by: Jerome Forissier <jerome.forissier at linaro.org>
---
arch/arm/cpu/arm926ejs/Makefile | 4 +-
arch/arm/cpu/arm926ejs/cache.c | 32 +++-----
arch/arm/cpu/arm926ejs/cp15.S | 46 ++++++++++++
arch/arm/cpu/arm926ejs/cpu.c | 10 +--
arch/arm/include/asm/system.h | 22 +++---
arch/arm/lib/Makefile | 9 ++-
arch/arm/lib/cache-cp15.c | 62 ++++++---------
arch/arm/lib/cache.c | 6 +-
arch/arm/lib/cp15.S | 92 +++++++++++++++++++++++
arch/arm/mach-kirkwood/Makefile | 5 +-
arch/arm/mach-kirkwood/cp15.S | 13 ++++
arch/arm/mach-kirkwood/include/mach/cpu.h | 12 +--
12 files changed, 216 insertions(+), 97 deletions(-)
create mode 100644 arch/arm/cpu/arm926ejs/cp15.S
create mode 100644 arch/arm/lib/cp15.S
create mode 100644 arch/arm/mach-kirkwood/cp15.S
diff --git a/arch/arm/cpu/arm926ejs/Makefile b/arch/arm/cpu/arm926ejs/Makefile
index 41d8af506d8..1b40d901413 100644
--- a/arch/arm/cpu/arm926ejs/Makefile
+++ b/arch/arm/cpu/arm926ejs/Makefile
@@ -4,7 +4,7 @@
# Wolfgang Denk, DENX Software Engineering, wd at denx.de.
extra-y = start.o
-obj-y = cpu.o cache.o
+obj-y = cpu.o cache.o cp15.o
ifdef CONFIG_XPL_BUILD
ifdef CONFIG_SPL_NO_CPU_SUPPORT
@@ -25,5 +25,7 @@ CFLAGS_cache.o := -marm
CFLAGS_REMOVE_cpu.o := $(LTO_CFLAGS)
CFLAGS_REMOVE_cache.o := $(LTO_CFLAGS)
+AFLAGS_REMOVE_cp15.o := -mthumb -mthumb-interwork
+
endif
endif
diff --git a/arch/arm/cpu/arm926ejs/cache.c b/arch/arm/cpu/arm926ejs/cache.c
index 71b8ad0f71d..3524379d335 100644
--- a/arch/arm/cpu/arm926ejs/cache.c
+++ b/arch/arm/cpu/arm926ejs/cache.c
@@ -9,44 +9,33 @@
#include <linux/types.h>
#if !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)
+void _invalidate_dcache_all(void);
void invalidate_dcache_all(void)
{
- asm volatile("mcr p15, 0, %0, c7, c6, 0\n" : : "r"(0));
+ _invalidate_dcache_all();
}
+void _flush_dcache_all(void);
void flush_dcache_all(void)
{
- asm volatile(
- "0:"
- "mrc p15, 0, r15, c7, c14, 3\n"
- "bne 0b\n"
- "mcr p15, 0, %0, c7, c10, 4\n"
- : : "r"(0) : "memory"
- );
+ _flush_dcache_all();
}
+void _invalidate_dcache_range(unsigned long start, unsigned long stop);
void invalidate_dcache_range(unsigned long start, unsigned long stop)
{
if (!check_cache_range(start, stop))
return;
-
- while (start < stop) {
- asm volatile("mcr p15, 0, %0, c7, c6, 1\n" : : "r"(start));
- start += CONFIG_SYS_CACHELINE_SIZE;
- }
+ _invalidate_dcache_range(start, stop);
}
+void _flush_dcache_range(unsigned long start, unsigned long stop);
void flush_dcache_range(unsigned long start, unsigned long stop)
{
if (!check_cache_range(start, stop))
return;
- while (start < stop) {
- asm volatile("mcr p15, 0, %0, c7, c14, 1\n" : : "r"(start));
- start += CONFIG_SYS_CACHELINE_SIZE;
- }
-
- asm volatile("mcr p15, 0, %0, c7, c10, 4\n" : : "r"(0));
+ _flush_dcache_range(start, stop);
}
#else /* #if !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) */
void invalidate_dcache_all(void)
@@ -70,11 +59,10 @@ __weak void invalidate_l2_cache(void) {}
#if !CONFIG_IS_ENABLED(SYS_ICACHE_OFF)
/* Invalidate entire I-cache and branch predictor array */
+void _invalidate_icache_all(void);
void invalidate_icache_all(void)
{
- unsigned long i = 0;
-
- asm ("mcr p15, 0, %0, c7, c5, 0" : : "r" (i));
+ _invalidate_icache_all();
}
#else
void invalidate_icache_all(void) {}
diff --git a/arch/arm/cpu/arm926ejs/cp15.S b/arch/arm/cpu/arm926ejs/cp15.S
new file mode 100644
index 00000000000..2d7626bc858
--- /dev/null
+++ b/arch/arm/cpu/arm926ejs/cp15.S
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <config.h>
+#include <linux/linkage.h>
+
+ENTRY(_cache_flush)
+ mcr p15, 0, r0, c7, c7
+ bx lr
+ENDPROC(_cache_flush)
+
+ENTRY(_invalidate_dcache_all)
+ mcr p15, 0, r0, c7, c6, 0
+ bx lr
+ENDPROC(_invalidate_dcache_all)
+
+ENTRY(_flush_dcache_all)
+0:
+ mrc p15, 0, r15, c7, c14, 3
+ bne 0b
+ mcr p15, 0, r0, c7, c10, 4
+ bx lr
+ENDPROC(_flush_dcache_all)
+
+ENTRY(_invalidate_dcache_range)
+0:
+ mcr p15, 0, r0, c7, c6, 1
+ add r0, r0, #CONFIG_SYS_CACHELINE_SIZE
+ cmp r0, r1
+ blt 0b
+ bx lr
+ENDPROC(_invalidate_dcache_range)
+
+ENTRY(_flush_dcache_range)
+0:
+ mcr p15, 0, r0, c7, c14, 1
+ add r0, r0, #CONFIG_SYS_CACHELINE_SIZE
+ cmp r0, r1
+ blt 0b
+ mcr p15, 0, r0, c7, c10, 4
+ bx lr
+ENDPROC(_flush_dcache_range)
+
+ENTRY(_invalidate_icache_all)
+ mcr p15, 0, r0, c7, c5, 0
+ bx lr
+ENDPROC(_invalidate_icache_all)
diff --git a/arch/arm/cpu/arm926ejs/cpu.c b/arch/arm/cpu/arm926ejs/cpu.c
index 0e100e6f13d..9e59ff6c66f 100644
--- a/arch/arm/cpu/arm926ejs/cpu.c
+++ b/arch/arm/cpu/arm926ejs/cpu.c
@@ -55,12 +55,12 @@ int cleanup_before_linux (void)
return 0;
}
+void _cache_flush(void);
+
/* flush I/D-cache */
static void cache_flush (void)
{
-#if !(CONFIG_IS_ENABLED(SYS_ICACHE_OFF) && CONFIG_IS_ENABLED(SYS_DCACHE_OFF))
- unsigned long i = 0;
-
- asm ("mcr p15, 0, %0, c7, c7, 0": :"r" (i));
-#endif
+ if (!(CONFIG_IS_ENABLED(SYS_ICACHE_OFF) &&
+ CONFIG_IS_ENABLED(SYS_DCACHE_OFF)))
+ _cache_flush();
}
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 849b3d0efb7..e559a48a6c7 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -456,31 +456,29 @@ static inline int is_hyp(void)
#endif
}
+unsigned int _get_cr_hyp(void);
+unsigned int _get_cr_nohyp(void);
+
static inline unsigned int get_cr(void)
{
unsigned int val;
if (is_hyp())
- asm volatile("mrc p15, 4, %0, c1, c0, 0 @ get CR" : "=r" (val)
- :
- : "cc");
+ val = _get_cr_hyp();
else
- asm volatile("mrc p15, 0, %0, c1, c0, 0 @ get CR" : "=r" (val)
- :
- : "cc");
+ val = _get_cr_nohyp();
return val;
}
+unsigned int _set_cr_hyp(unsigned int val);
+unsigned int _set_cr_nohyp(unsigned int val);
+
static inline void set_cr(unsigned int val)
{
if (is_hyp())
- asm volatile("mcr p15, 4, %0, c1, c0, 0 @ set CR" :
- : "r" (val)
- : "cc");
+ _set_cr_hyp(val);
else
- asm volatile("mcr p15, 0, %0, c1, c0, 0 @ set CR" :
- : "r" (val)
- : "cc");
+ _set_cr_nohyp(val);
isb();
}
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index ade42d0ca43..23f73dbba32 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -81,6 +81,10 @@ endif
obj-y += cache.o
obj-$(CONFIG_SYS_ARM_CACHE_CP15) += cache-cp15.o
CFLAGS_REMOVE_cache-cp15.o := $(LTO_CFLAGS)
+# Low-level CP15 instructions (mrc p15...) cause problems with LTO
+# when they are coded as inline assembly. They are implemented
+# in their own .S file instead.
+obj-$(CONFIG_SYS_ARM_CACHE_CP15) += cp15.o
obj-y += psci-dt.o
@@ -111,13 +115,14 @@ endif
# For .S, drop -mthumb* and other thumb-related options.
# CFLAGS_REMOVE_* would not have an effet, so AFLAGS_REMOVE_*
# was implemented and is used here.
-# Also, define ${target}_NO_THUMB_BUILD for these two targets
-# so that the code knows it should not use Thumb.
+# Also, define ${target}_NO_THUMB_BUILD for the targets that
+# need to know they should not use Thumb.
AFLAGS_REMOVE_memset.o := -mthumb -mthumb-interwork
AFLAGS_REMOVE_memcpy.o := -mthumb -mthumb-interwork
AFLAGS_memset.o := -DMEMSET_NO_THUMB_BUILD
AFLAGS_memcpy.o := -DMEMCPY_NO_THUMB_BUILD
+AFLAGS_REMOVE_cp15.o := -mthumb -mthumb-interwork
# This is only necessary to force ARM mode on THUMB1 targets.
ifneq ($(CONFIG_SYS_ARM_ARCH),4)
diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c
index 947012f2996..46eded590eb 100644
--- a/arch/arm/lib/cache-cp15.c
+++ b/arch/arm/lib/cache-cp15.c
@@ -109,6 +109,15 @@ __weak void dram_bank_mmu_setup(int bank)
set_section_dcache(i, DCACHE_DEFAULT_OPTION);
}
+void _mmu_helper_lpae_hyp(unsigned long reg, unsigned long addr,
+ unsigned long attr);
+void _mmu_helper_lpae_nohyp(unsigned long reg, unsigned long addr,
+ unsigned long attr);
+void _mmu_helper_nolpae_hyp(unsigned long reg);
+void _mmu_helper_nolpae_nohyp(unsigned long reg);
+void _mmu_helper_pt(unsigned long addr);
+void _set_dacr(unsigned long val);
+
/* to activate the MMU we need to set up virtual memory: use 1M areas */
static inline void mmu_setup(void)
{
@@ -141,42 +150,13 @@ static inline void mmu_setup(void)
reg |= TTBCR_ORGN0_WBNWA | TTBCR_IRGN0_WBNWA;
#endif
- if (is_hyp()) {
- /* Set HTCR to enable LPAE */
- asm volatile("mcr p15, 4, %0, c2, c0, 2"
- : : "r" (reg) : "memory");
- /* Set HTTBR0 */
- asm volatile("mcrr p15, 4, %0, %1, c2"
- :
- : "r"(gd->arch.tlb_addr + (4096 * 4)), "r"(0)
- : "memory");
- /* Set HMAIR */
- asm volatile("mcr p15, 4, %0, c10, c2, 0"
- : : "r" (MEMORY_ATTRIBUTES) : "memory");
- } else {
- /* Set TTBCR to enable LPAE */
- asm volatile("mcr p15, 0, %0, c2, c0, 2"
- : : "r" (reg) : "memory");
- /* Set 64-bit TTBR0 */
- asm volatile("mcrr p15, 0, %0, %1, c2"
- :
- : "r"(gd->arch.tlb_addr + (4096 * 4)), "r"(0)
- : "memory");
- /* Set MAIR */
- asm volatile("mcr p15, 0, %0, c10, c2, 0"
- : : "r" (MEMORY_ATTRIBUTES) : "memory");
- }
+ if (is_hyp())
+ _mmu_helper_lpae_hyp(reg, gd->arch.tlb_addr + (4096 * 4),
+ MEMORY_ATTRIBUTES);
+ else
+ _mmu_helper_lpae_nohyp(reg, gd->arch.tlb_addr + (4096 * 4),
+ MEMORY_ATTRIBUTES);
#elif defined(CONFIG_CPU_V7A)
- if (is_hyp()) {
- /* Set HTCR to disable LPAE */
- asm volatile("mcr p15, 4, %0, c2, c0, 2"
- : : "r" (0) : "memory");
- } else {
- /* Set TTBCR to disable LPAE */
- asm volatile("mcr p15, 0, %0, c2, c0, 2"
- : : "r" (0) : "memory");
- }
- /* Set TTBR0 */
reg = gd->arch.tlb_addr & TTBR0_BASE_ADDR_MASK;
#if defined(CONFIG_SYS_ARM_CACHE_WRITETHROUGH)
reg |= TTBR0_RGN_WT | TTBR0_IRGN_WT;
@@ -185,19 +165,19 @@ static inline void mmu_setup(void)
#else
reg |= TTBR0_RGN_WB | TTBR0_IRGN_WB;
#endif
- asm volatile("mcr p15, 0, %0, c2, c0, 0"
- : : "r" (reg) : "memory");
+ if (is_hyp())
+ _mmu_helper_nolpae_hyp(reg);
+ else
+ _mmu_helper_nolpae_nohyp(reg);
#else
/* Copy the page table address to cp15 */
- asm volatile("mcr p15, 0, %0, c2, c0, 0"
- : : "r" (gd->arch.tlb_addr) : "memory");
+ _mmu_helper_pt(gd->arch.tlb_addr);
#endif
/*
* initial value of Domain Access Control Register (DACR)
* Set the access control to client (1U) for each of the 16 domains
*/
- asm volatile("mcr p15, 0, %0, c3, c0, 0"
- : : "r" (0x55555555));
+ _set_dacr(0x55555555);
/* and enable the mmu */
reg = get_cr(); /* get control reg. */
diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c
index dd19bd3e4fb..be3a0b42bff 100644
--- a/arch/arm/lib/cache.c
+++ b/arch/arm/lib/cache.c
@@ -122,12 +122,10 @@ phys_addr_t noncached_alloc(size_t size, size_t align)
#endif /* CONFIG_SYS_NONCACHED_MEMORY */
#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD)
+void _invalidate_l2_cache(void);
void invalidate_l2_cache(void)
{
- unsigned int val = 0;
-
- asm volatile("mcr p15, 1, %0, c15, c11, 0 @ invl l2 cache"
- : : "r" (val) : "cc");
+ _invalidate_l2_cache();
isb();
}
#endif
diff --git a/arch/arm/lib/cp15.S b/arch/arm/lib/cp15.S
new file mode 100644
index 00000000000..c402d998d64
--- /dev/null
+++ b/arch/arm/lib/cp15.S
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <config.h>
+#include <linux/linkage.h>
+
+ENTRY(_get_cr_hyp)
+ mrc p15, 4, r0, c1, c0, 0 @ get CR
+ bx lr
+ENDPROC(_get_cr_hyp)
+
+ENTRY(_get_cr_nohyp)
+ mrc p15, 0, r0, c1, c0, 0 @ get CR
+ bx lr
+ENDPROC(_get_cr_nohyp)
+
+ENTRY(_set_cr_hyp)
+ mcr p15, 4, r0, c1, c0, 0 @ set CR
+ bx lr
+ENDPROC(_set_cr_hyp)
+
+ENTRY(_set_cr_nohyp)
+ mcr p15, 0, r0, c1, c0, 0 @ set CR
+ bx lr
+ENDPROC(_set_cr_nohyp)
+
+ENTRY(_invalidate_l2_cache)
+ mcr p15, 1, r0, c15, c11, 0 @ invl l2 cache
+ bx lr
+ENDPROC(_invalidate_l2_cache)
+
+#if defined(CONFIG_ARMV7_LPAE) && __LINUX_ARM_ARCH__ != 4
+
+ENTRY(_mmu_helper_lpae_hyp)
+ /* Set HTCR to enable LPAE */
+ mcr p15, 4, r0, c2, c0, 2
+ /* Set HTTBR0 */
+ mov r4, #0
+ mcrr p15, 4, r1, r4, c2
+ /* Set HMAIR */
+ mcr p15, 4, r3, c10, c2, 0
+ bx lr
+ENDPROC(_mmu_helper_lpae_hyp)
+
+ENTRY(_mmu_helper_lpae_nohyp)
+ /* Set TTBCR to enable LPAE */
+ mcr p15, 0, r0, c2, c0, 2
+ /* Set 64-bit TTBR0 */
+ mov r4, #0
+ mcrr p15, 0, r1, r4, c2
+ /* Set MAIR */
+ mcr p15, 0, r3, c10, c2, 0
+ bx lr
+ENDPROC(_mmu_helper_lpae_nohyp)
+
+#elif defined(CONFIG_CPU_V7A)
+
+ENTRY(_mmu_helper_nolpae_hyp)
+ /* Set HTCR to disable LPAE */
+ mov r1, #0
+ mcr p15, 4, r1, c2, c0, 2
+ /* Set TTBR0 */
+ mcr p15, 0, r0, c2, c0, 0
+ bx lr
+ENDPROC(_mmu_helper_nolpae_hyp)
+
+ENTRY(_mmu_helper_nolpae_nohyp)
+ /* Set TTBCR to disable LPAE */
+ mov r1, #0
+ mcr p15, 0, r1, c2, c0, 2
+ /* Set TTBR0 */
+ mcr p15, 0, r0, c2, c0, 0
+ bx lr
+ENDPROC(_mmu_helper_nolpae_nohyp)
+
+#else
+
+ENTRY(_mmu_helper_pt)
+ mcr p15, 0, r0, c2, c0, 0
+ bx lr
+ENDPROC(_mmu_helper_pt)
+
+#endif
+
+#if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF)) && \
+ defined(CONFIG_SYS_ARM_MMU)
+
+ENTRY(_set_dacr)
+ mcr p15, 0, r0, c3, c0, 0
+ bx lr
+ENDPROC(_set_dacr)
+
+#endif
diff --git a/arch/arm/mach-kirkwood/Makefile b/arch/arm/mach-kirkwood/Makefile
index 0fb5a2326f5..9581c315af8 100644
--- a/arch/arm/mach-kirkwood/Makefile
+++ b/arch/arm/mach-kirkwood/Makefile
@@ -3,14 +3,17 @@
# (C) Copyright 2009
# Marvell Semiconductor <www.marvell.com>
# Written-by: Prafulla Wadaskar <prafulla at marvell.com>
+# Copyright (C) 2025 Linaro Ltd.
obj-y = cpu.o
+obj-y += cp15.o
obj-y += cache.o
obj-y += lowlevel.o
obj-y += mpp.o
-# cpu.o and cache.o contain CP15 instructions which cannot be run in
+# cpu.o, cpu_asm.o and cache.o contain CP15 instructions which cannot be run in
# Thumb state, so build them for ARM state even with CONFIG_SYS_THUMB_BUILD
CFLAGS_cpu.o := -marm
+CFLAGS_cpu_asm.o := -marm
CFLAGS_cache.o := -marm
diff --git a/arch/arm/mach-kirkwood/cp15.S b/arch/arm/mach-kirkwood/cp15.S
new file mode 100644
index 00000000000..088db9895f7
--- /dev/null
+++ b/arch/arm/mach-kirkwood/cp15.S
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <linux/linkage.h>
+
+ENTRY(readfr_extra_feature_reg)
+ mrc p15, 1, r0, c15, c1, 0 @ readfr exfr
+ bx lr
+ENDPROC(readfr_extra_feature_reg)
+
+ENTRY(_writefr_extra_feature_reg)
+ mcr p15, 1, r0, c15, c1, 0 @ writefr exfr
+ bx lr
+ENDPROC(_writefr_extra_feature_reg)
diff --git a/arch/arm/mach-kirkwood/include/mach/cpu.h b/arch/arm/mach-kirkwood/include/mach/cpu.h
index 9eec786fe8f..54487d2af85 100644
--- a/arch/arm/mach-kirkwood/include/mach/cpu.h
+++ b/arch/arm/mach-kirkwood/include/mach/cpu.h
@@ -82,22 +82,16 @@ struct mbus_win {
* read feroceon/sheeva core extra feature register
* using co-proc instruction
*/
-static inline unsigned int readfr_extra_feature_reg(void)
-{
- unsigned int val;
- asm volatile ("mrc p15, 1, %0, c15, c1, 0 @ readfr exfr":"=r"
- (val)::"cc");
- return val;
-}
+unsigned int readfr_extra_feature_reg(void);
/*
* write feroceon/sheeva core extra feature register
* using co-proc instruction
*/
+void _writefr_extra_feature_reg(unsigned int val);
static inline void writefr_extra_feature_reg(unsigned int val)
{
- asm volatile ("mcr p15, 1, %0, c15, c1, 0 @ writefr exfr"::"r"
- (val):"cc");
+ _writefr_extra_feature_reg(val);
isb();
}
--
2.43.0
More information about the U-Boot
mailing list