[PATCH v3 08/12] riscv: p8700: Add software emulation for AMO* instructions
Uros Stajic
uros.stajic at htecgroup.com
Tue Jul 29 18:24:17 CEST 2025
From: Chao-ying Fu <cfu at mips.com>
This patch adds software emulation for atomic memory operations (AMO)
instructions that may not be supported in hardware.
The `emu-amo.s` file provides assembly implementations of the
aforementioned operations. Corresponding handler logic is integrated
into the illegal instruction trap to catch and emulate unsupported
AMO* instructions at runtime.
Signed-off-by: Chao-ying Fu <cfu at mips.com>
Signed-off-by: Uros Stajic <uros.stajic at htecgroup.com>
---
arch/riscv/cpu/p8700/Makefile | 1 +
arch/riscv/cpu/p8700/emu-amo.S | 254 ++++++++++++++++++++++++++++
arch/riscv/lib/interrupts.c | 299 +++++++++++++++++++++++++++++++++
include/interrupt.h | 19 +++
4 files changed, 573 insertions(+)
create mode 100644 arch/riscv/cpu/p8700/emu-amo.S
diff --git a/arch/riscv/cpu/p8700/Makefile b/arch/riscv/cpu/p8700/Makefile
index 4dfbddc5cba..22f96401640 100644
--- a/arch/riscv/cpu/p8700/Makefile
+++ b/arch/riscv/cpu/p8700/Makefile
@@ -5,5 +5,6 @@
obj-y += cache.o
obj-y += cpu.o
obj-y += dram.o
+obj-y += emu-amo.o
obj-$(CONFIG_P8700_RISCV) += p8700_platform_setup.o
diff --git a/arch/riscv/cpu/p8700/emu-amo.S b/arch/riscv/cpu/p8700/emu-amo.S
new file mode 100644
index 00000000000..b7005339939
--- /dev/null
+++ b/arch/riscv/cpu/p8700/emu-amo.S
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2021, Chao-ying Fu <cfu at mips.com>
+ */
+
+ .text
+
+ .align 3
+ .globl atomic_swap_w
+atomic_swap_w:
+ lw a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+1: lr.w a5,(a4)
+ bne a5,a0,2b
+ sc.w a6,a1,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_swap_d
+atomic_swap_d:
+ ld a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+1: lr.d a5,(a4)
+ bne a5,a0,2b
+ sc.d a6,a1,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_add_w
+atomic_add_w:
+ lw a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ addw a3,a5,a1
+1: lr.w a5,(a4)
+ bne a5,a0,2b
+ sc.w a6,a3,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_add_d
+atomic_add_d:
+ ld a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ add a3,a5,a1
+1: lr.d a5,(a4)
+ bne a5,a0,2b
+ sc.d a6,a3,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_and_w
+atomic_and_w:
+ lw a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ and a3,a5,a1
+1: lr.w a5,(a4)
+ bne a5,a0,2b
+ sc.w a6,a3,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_and_d
+atomic_and_d:
+ ld a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ and a3,a5,a1
+1: lr.d a5,(a4)
+ bne a5,a0,2b
+ sc.d a6,a3,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_or_w
+atomic_or_w:
+ lw a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ or a3,a5,a1
+1: lr.w a5,(a4)
+ bne a5,a0,2b
+ sc.w a6,a3,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_or_d
+atomic_or_d:
+ ld a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ or a3,a5,a1
+1: lr.d a5,(a4)
+ bne a5,a0,2b
+ sc.d a6,a3,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_xor_w
+atomic_xor_w:
+ lw a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ xor a3,a5,a1
+1: lr.w a5,(a4)
+ bne a5,a0,2b
+ sc.w a6,a3,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_xor_d
+atomic_xor_d:
+ ld a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ xor a3,a5,a1
+1: lr.d a5,(a4)
+ bne a5,a0,2b
+ sc.d a6,a3,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_max_w
+atomic_max_w:
+ lw a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ mv a3,a5
+ bge a5,a1,1f
+ mv a3,a1
+1: lr.w a5,(a4)
+ bne a5,a0,2b
+ sc.w a6,a3,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_max_d
+atomic_max_d:
+ ld a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ mv a3,a5
+ bge a5,a1,1f
+ mv a3,a1
+1: lr.d a5,(a4)
+ bne a5,a0,2b
+ sc.d a6,a3,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_maxu_w
+atomic_maxu_w:
+ lw a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ mv a3,a5
+ bgeu a5,a1,1f
+ mv a3,a1
+1: lr.w a5,(a4)
+ bne a5,a0,2b
+ sc.w a6,a3,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_maxu_d
+atomic_maxu_d:
+ ld a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ mv a3,a5
+ bgeu a5,a1,1f
+ mv a3,a1
+1: lr.d a5,(a4)
+ bne a5,a0,2b
+ sc.d a6,a3,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_min_w
+atomic_min_w:
+ lw a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ mv a3,a5
+ bge a1,a5,1f
+ mv a3,a1
+1: lr.w a5,(a4)
+ bne a5,a0,2b
+ sc.w a6,a3,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_min_d
+atomic_min_d:
+ ld a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ mv a3,a5
+ bge a1,a5,1f
+ mv a3,a1
+1: lr.d a5,(a4)
+ bne a5,a0,2b
+ sc.d a6,a3,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_minu_w
+atomic_minu_w:
+ lw a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ mv a3,a5
+ bgeu a1,a5,1f
+ mv a3,a1
+1: lr.w a5,(a4)
+ bne a5,a0,2b
+ sc.w a6,a3,(a4)
+ bnez a6,1b
+ ret
+
+ .align 3
+ .globl atomic_minu_d
+atomic_minu_d:
+ ld a5,0(a0)
+ mv a4,a0
+2: mv a0,a5
+ mv a3,a5
+ bgeu a1,a5,1f
+ mv a3,a1
+1: lr.d a5,(a4)
+ bne a5,a0,2b
+ sc.d a6,a3,(a4)
+ bnez a6,1b
+ ret
diff --git a/arch/riscv/lib/interrupts.c b/arch/riscv/lib/interrupts.c
index ef1056eeb6f..906916c762f 100644
--- a/arch/riscv/lib/interrupts.c
+++ b/arch/riscv/lib/interrupts.c
@@ -22,6 +22,36 @@
DECLARE_GLOBAL_DATA_PTR;
+#define ILLEGAL_INSTRUCTION 2
+#define AMO_MASK 0xf800707f
+#define AQRL_MASK 0x06000000
+#define AQRL_SHIFT 25
+#define RS2_MASK 0x01f00000
+#define RS2_SHIFT 20
+#define RS1_MASK 0x000f8000
+#define RS1_SHIFT 15
+#define RD_MASK 0x00000f80
+#define RD_SHIFT 7
+
+#define AMOADD_D_MATCH 0x0000302f
+#define AMOADD_W_MATCH 0x0000202f
+#define AMOAND_D_MATCH 0x6000302f
+#define AMOAND_W_MATCH 0x6000202f
+#define AMOMAX_D_MATCH 0xa000302f
+#define AMOMAX_W_MATCH 0xa000202f
+#define AMOMAXU_D_MATCH 0xe000302f
+#define AMOMAXU_W_MATCH 0xe000202f
+#define AMOMIN_D_MATCH 0x8000302f
+#define AMOMIN_W_MATCH 0x8000202f
+#define AMOMINU_D_MATCH 0xc000302f
+#define AMOMINU_W_MATCH 0xc000202f
+#define AMOOR_D_MATCH 0x4000302f
+#define AMOOR_W_MATCH 0x4000202f
+#define AMOSWAP_D_MATCH 0x0800302f
+#define AMOSWAP_W_MATCH 0x0800202f
+#define AMOXOR_D_MATCH 0x2000302f
+#define AMOXOR_W_MATCH 0x2000202f
+
void set_resume(struct resume_data *data)
{
gd->arch.resume = data;
@@ -115,6 +145,184 @@ static void show_code(ulong epc)
printf("%04x%s", pos[i], i + 1 == len ? ")\n" : " ");
}
+static ulong get_reg(struct pt_regs *regs, int reg_num)
+{
+ switch (reg_num) {
+ case 0:
+ return 0;
+ case 1:
+ return regs->ra;
+ case 2:
+ return regs->sp;
+ case 3:
+ return regs->gp;
+ case 4:
+ return regs->tp;
+ case 5:
+ return regs->t0;
+ case 6:
+ return regs->t1;
+ case 7:
+ return regs->t2;
+ case 8:
+ return regs->s0;
+ case 9:
+ return regs->s1;
+ case 10:
+ return regs->a0;
+ case 11:
+ return regs->a1;
+ case 12:
+ return regs->a2;
+ case 13:
+ return regs->a3;
+ case 14:
+ return regs->a4;
+ case 15:
+ return regs->a5;
+ case 16:
+ return regs->a6;
+ case 17:
+ return regs->a7;
+ case 18:
+ return regs->s2;
+ case 19:
+ return regs->s3;
+ case 20:
+ return regs->s4;
+ case 21:
+ return regs->s5;
+ case 22:
+ return regs->s6;
+ case 23:
+ return regs->s7;
+ case 24:
+ return regs->s8;
+ case 25:
+ return regs->s9;
+ case 26:
+ return regs->s10;
+ case 27:
+ return regs->s11;
+ case 28:
+ return regs->t3;
+ case 29:
+ return regs->t4;
+ case 30:
+ return regs->t5;
+ case 31:
+ return regs->t6;
+ default:
+ printf("Error reg_num=%d for %s\n", reg_num, __func__);
+ break;
+ }
+ return 0;
+}
+
+static void set_reg(struct pt_regs *regs, int reg_num, ulong reg_value)
+{
+ switch (reg_num) {
+ case 0:
+ break;
+ case 1:
+ regs->ra = reg_value;
+ break;
+ case 2:
+ regs->sp = reg_value;
+ break;
+ case 3:
+ regs->gp = reg_value;
+ break;
+ case 4:
+ regs->tp = reg_value;
+ break;
+ case 5:
+ regs->t0 = reg_value;
+ break;
+ case 6:
+ regs->t1 = reg_value;
+ break;
+ case 7:
+ regs->t2 = reg_value;
+ break;
+ case 8:
+ regs->s0 = reg_value;
+ break;
+ case 9:
+ regs->s1 = reg_value;
+ break;
+ case 10:
+ regs->a0 = reg_value;
+ break;
+ case 11:
+ regs->a1 = reg_value;
+ break;
+ case 12:
+ regs->a2 = reg_value;
+ break;
+ case 13:
+ regs->a3 = reg_value;
+ break;
+ case 14:
+ regs->a4 = reg_value;
+ break;
+ case 15:
+ regs->a5 = reg_value;
+ break;
+ case 16:
+ regs->a6 = reg_value;
+ break;
+ case 17:
+ regs->a7 = reg_value;
+ break;
+ case 18:
+ regs->s2 = reg_value;
+ break;
+ case 19:
+ regs->s3 = reg_value;
+ break;
+ case 20:
+ regs->s4 = reg_value;
+ break;
+ case 21:
+ regs->s5 = reg_value;
+ break;
+ case 22:
+ regs->s6 = reg_value;
+ break;
+ case 23:
+ regs->s7 = reg_value;
+ break;
+ case 24:
+ regs->s8 = reg_value;
+ break;
+ case 25:
+ regs->s9 = reg_value;
+ break;
+ case 26:
+ regs->s10 = reg_value;
+ break;
+ case 27:
+ regs->s11 = reg_value;
+ break;
+ case 28:
+ regs->t3 = reg_value;
+ break;
+ case 29:
+ regs->t4 = reg_value;
+ break;
+ case 30:
+ regs->t5 = reg_value;
+ break;
+ case 31:
+ regs->t6 = reg_value;
+ break;
+ default:
+ printf("Error reg_num=%d for %s\n", reg_num, __func__);
+ break;
+ }
+}
+
static void _exit_trap(ulong code, ulong epc, ulong tval, struct pt_regs *regs)
{
static const char * const exception_code[] = {
@@ -140,6 +348,97 @@ static void _exit_trap(ulong code, ulong epc, ulong tval, struct pt_regs *regs)
gd->arch.resume->code = code;
longjmp(gd->arch.resume->jump, 1);
}
+ if (IS_ENABLED(CONFIG_P8700_RISCV) && code == ILLEGAL_INSTRUCTION) {
+ // Fetch one 16-bit op at a time to deal with 16-bit alignment.
+ // FIXME! For the big-endian mode, we need to swap bytes.
+ unsigned short op0 = *(unsigned short *)epc;
+ unsigned short op1 = *((unsigned short *)epc + 1);
+ unsigned int opcode = (op1 << 16) | op0;
+ //int aqrl = (opcode & AQRL_MASK) >> AQRL_SHIFT;
+ int rs2 = (opcode & RS2_MASK) >> RS2_SHIFT;
+ int rs1 = (opcode & RS1_MASK) >> RS1_SHIFT;
+ int rd = (opcode & RD_MASK) >> RD_SHIFT;
+ ulong rs2_value = get_reg(regs, rs2);
+ ulong rs1_value = get_reg(regs, rs1);
+ ulong rd_value = 0;
+
+ switch (opcode & AMO_MASK) {
+ case AMOADD_D_MATCH:
+ rd_value = atomic_add_d(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOADD_W_MATCH:
+ rd_value = atomic_add_w(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOAND_D_MATCH:
+ rd_value = atomic_and_d(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOAND_W_MATCH:
+ rd_value = atomic_and_w(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOMAX_D_MATCH:
+ rd_value = atomic_max_d(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOMAX_W_MATCH:
+ rd_value = atomic_max_w(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOMAXU_D_MATCH:
+ rd_value = atomic_maxu_d(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOMAXU_W_MATCH:
+ rd_value = atomic_maxu_w(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOMIN_D_MATCH:
+ rd_value = atomic_min_d(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOMIN_W_MATCH:
+ rd_value = atomic_min_w(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOMINU_D_MATCH:
+ rd_value = atomic_minu_d(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOMINU_W_MATCH:
+ rd_value = atomic_minu_w(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOOR_D_MATCH:
+ rd_value = atomic_or_d(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOOR_W_MATCH:
+ rd_value = atomic_or_w(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOSWAP_D_MATCH:
+ rd_value = atomic_swap_d(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOSWAP_W_MATCH:
+ rd_value = atomic_swap_w(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOXOR_D_MATCH:
+ rd_value = atomic_xor_d(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ case AMOXOR_W_MATCH:
+ rd_value = atomic_xor_w(rs1_value, rs2_value);
+ set_reg(regs, rd, rd_value);
+
+ default:
+ break;
+ }
+ }
if (code < ARRAY_SIZE(exception_code))
printf("Unhandled exception: %s\n", exception_code[code]);
diff --git a/include/interrupt.h b/include/interrupt.h
index 6ea28b54a56..5fc983afccb 100644
--- a/include/interrupt.h
+++ b/include/interrupt.h
@@ -43,3 +43,22 @@ struct resume_data {
* Return: 0 before an exception, 1 after an exception occurred
*/
void set_resume(struct resume_data *data);
+
+ulong atomic_swap_w(ulong val, ulong addr);
+ulong atomic_swap_d(ulong val, ulong addr);
+ulong atomic_add_w(ulong val, ulong addr);
+ulong atomic_add_d(ulong val, ulong addr);
+ulong atomic_and_w(ulong val, ulong addr);
+ulong atomic_and_d(ulong val, ulong addr);
+ulong atomic_or_w(ulong val, ulong addr);
+ulong atomic_or_d(ulong val, ulong addr);
+ulong atomic_xor_w(ulong val, ulong addr);
+ulong atomic_xor_d(ulong val, ulong addr);
+ulong atomic_max_w(ulong val, ulong addr);
+ulong atomic_max_d(ulong val, ulong addr);
+ulong atomic_maxu_w(ulong val, ulong addr);
+ulong atomic_maxu_d(ulong val, ulong addr);
+ulong atomic_min_w(ulong val, ulong addr);
+ulong atomic_min_d(ulong val, ulong addr);
+ulong atomic_minu_w(ulong val, ulong addr);
+ulong atomic_minu_d(ulong val, ulong addr);
--
2.34.1
More information about the U-Boot
mailing list