[U-Boot] [PATCH] arm: add 64-64 bit divider
Che-Liang Chiou
clchiou at chromium.org
Wed Aug 31 12:38:50 CEST 2011
This patch adds a 64-64 bit divider that supports ARMv4 and above.
Because clz (count leading zero) instruction is added until ARMv5, the
divider implements a clz function for ARMv4 targets.
The divider was tested with the following test driver code ran by
qemu-arm:
int main(void)
{
uint64_t a, b, q, r;
while (scanf("%llx %llx %llx %llx", &a, &b, &q, &r) > 0)
printf("%016llx %016llx %016llx %016llx\n", a, b, a / b, a % b);
return 0;
}
Signed-off-by: Che-Liang Chiou <clchiou at chromium.org>
Cc: Albert Aribaud <albert.u.boot at aribaud.net>
---
This patch is alos tested with `MAKEALL -a arm`
arch/arm/lib/Makefile | 1 +
arch/arm/lib/_uldivmod.S | 266 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 267 insertions(+), 0 deletions(-)
create mode 100644 arch/arm/lib/_uldivmod.S
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 300c8fa..31770dd 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -33,6 +33,7 @@ GLSOBJS += _divsi3.o
GLSOBJS += _lshrdi3.o
GLSOBJS += _modsi3.o
GLSOBJS += _udivsi3.o
+GLSOBJS += _uldivmod.o
GLSOBJS += _umodsi3.o
GLCOBJS += div0.o
diff --git a/arch/arm/lib/_uldivmod.S b/arch/arm/lib/_uldivmod.S
new file mode 100644
index 0000000..9e3a5e6
--- /dev/null
+++ b/arch/arm/lib/_uldivmod.S
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2011 The Chromium OS Authors.
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+/*
+ * A, Q = r0 + (r1 << 32)
+ * B, R = r2 + (r3 << 32)
+ * A / B = Q ... R
+ */
+
+ .text
+ .global __aeabi_uldivmod
+ .type __aeabi_uldivmod, function
+ .align 0
+
+/* armv4 does not support clz (count leading zero) instruction. */
+#if __LINUX_ARM_ARCH__ <= 4
+# define CLZ(dst, src) bl L_clz_ ## dst ## _ ## src
+# define CLZEQ(dst, src) bleq L_clz_ ## dst ## _ ## src
+#else
+# define CLZ(dst, src) clz dst, src
+# define CLZEQ(dst, src) clzeq dst, src
+#endif
+
+A_0 .req r0
+A_1 .req r1
+B_0 .req r2
+B_1 .req r3
+C_0 .req r4
+C_1 .req r5
+D_0 .req r6
+D_1 .req r7
+
+Q_0 .req r0
+Q_1 .req r1
+R_0 .req r2
+R_1 .req r3
+
+__aeabi_uldivmod:
+ stmfd sp!, {r4, r5, r6, r7, lr}
+ @ Test if B == 0
+ orrs ip, B_0, B_1 @ Z set -> B == 0
+ beq L_div_by_0
+ @ Test if B is power of 2: (B & (B - 1)) == 0
+ subs C_0, B_0, #1
+ sbc C_1, B_1, #0
+ tst C_0, B_0
+ tsteq B_1, C_1
+ beq L_pow2
+ @ Test if A_1 == B_1 == 0
+ orrs ip, A_1, B_1
+ beq L_div_32_32
+
+L_div_64_64:
+ mov C_0, #1
+ mov C_1, #0
+ @ D_0 = clz A
+ CLZ(D_0, A_1)
+ teq A_1, #0
+ CLZEQ(ip, A_0)
+ teq A_1, #0
+ addeq D_0, D_0, ip
+ @ D_1 = clz B
+ CLZ(D_1, B_1)
+ teq B_1, #0
+ CLZEQ(ip, B_0)
+ teq B_1, #0
+ addeq D_1, D_1, ip
+ @ if clz B - clz A <= 0: goto L_done_shift
+ subs D_0, D_1, D_0
+ bls L_done_shift
+ subs D_1, D_0, #32
+ rsb ip, D_0, #32
+ @ B <<= (clz B - clz A)
+ movmi B_1, B_1, lsl D_0
+ orrmi B_1, B_1, B_0, lsr ip
+ movpl B_1, B_0, lsl D_1
+ mov B_0, B_0, lsl D_0
+ @ C = 1 << (clz B - clz A)
+ movmi C_1, C_1, lsl D_0
+ orrmi C_1, C_1, C_0, lsr ip
+ movpl C_1, C_0, lsl D_1
+ mov C_0, C_0, lsl D_0
+L_done_shift:
+ mov D_0, #0
+ mov D_1, #0
+ @ C: current bit; D: result
+L_subtract:
+ @ if A >= B
+ cmp A_1, B_1
+ cmpeq A_0, B_0
+ bcc L_update
+ @ A -= B
+ subs A_0, A_0, B_0
+ sbc A_1, A_1, B_1
+ @ D |= C
+ orr D_0, D_0, C_0
+ orr D_1, D_1, C_1
+L_update:
+ @ if A == 0: break
+ orrs ip, A_1, A_0
+ beq L_exit
+ @ C >>= 1
+ movs C_1, C_1, lsr #1
+ movs C_0, C_0, rrx
+ @ if C == 0: break
+ orrs ip, C_1, C_0
+ beq L_exit
+ @ B >>= 1
+ movs B_1, B_1, lsr #1
+ mov B_0, B_0, rrx
+ b L_subtract
+L_exit:
+ @ Note: A, B & Q, R are aliases
+ mov R_0, A_0
+ mov R_1, A_1
+ mov Q_0, D_0
+ mov Q_1, D_1
+ ldmfd sp!, {r4, r5, r6, r7, pc}
+
+L_div_32_32:
+ @ Note: A_0 & r0 are aliases
+ @ Q_1 r1
+ mov r1, B_0
+ bl __aeabi_uidivmod
+ mov R_0, r1
+ mov R_1, #0
+ mov Q_1, #0
+ ldmfd sp!, {r4, r5, r6, r7, pc}
+
+L_pow2:
+ @ Note: A, B and Q, R are aliases
+ @ R = A & (B - 1)
+ and C_0, A_0, C_0
+ and C_1, A_1, C_1
+ @ Q = A >> log2(B)
+ @ Note: B must not be 0 here!
+ CLZ(D_0, B_0)
+ add D_1, D_0, #1
+ rsbs D_0, D_0, #31
+ movpl A_0, A_0, lsr D_0
+ orrpl A_0, A_0, A_1, lsl D_1
+ bpl L_1
+ CLZ(D_0, B_1)
+ rsb D_0, D_0, #31
+ mov A_0, A_1, lsr D_0
+ add D_0, D_0, #32
+L_1:
+ mov A_1, A_1, lsr D_0
+ @ Mov back C to R
+ mov R_0, C_0
+ mov R_1, C_1
+ ldmfd sp!, {r4, r5, r6, r7, pc}
+
+L_div_by_0:
+ bl __div0
+ @ As wrong as it could be
+ mov Q_0, #0
+ mov Q_1, #0
+ mov R_0, #0
+ mov R_1, #0
+ ldmfd sp!, {r4, r5, r6, r7, pc}
+
+#if __LINUX_ARM_ARCH__ <= 4
+/*
+ * count leading zero
+ *
+ * input : r0
+ * output : r0
+ * destroy : r1, r2, r3, r4, r5
+ */
+L_clz:
+ mov r1, #0 // clz result
+ mov r2, #0xf0000000 // mask
+ mov r3, #28 // shift amount
+ adr r4, L_clz_table
+L_clz_loop:
+ teq r2, #0
+ beq L_clz_loop_done
+ ands r5, r0, r2
+ mov r5, r5, lsr r3
+ ldrsb r5, [r4, r5]
+ add r1, r1, r5
+ mov r2, r2, lsr #4
+ add r3, r3, #-4
+ beq L_clz_loop
+L_clz_loop_done:
+ mov r0, r1
+ mov pc, lr
+L_clz_table:
+ .byte 4
+ .byte 3
+ .byte 2
+ .byte 2
+ .byte 1
+ .byte 1
+ .byte 1
+ .byte 1
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+
+L_clz_D_0_A_1:
+ stmfd sp!, {r0, r1, r2, r3, r4, r5, lr}
+ mov r0, A_1
+ bl L_clz
+ mov D_0, r0
+ ldmfd sp!, {r0, r1, r2, r3, r4, r5, pc}
+
+L_clz_ip_A_0:
+ stmfd sp!, {r0, r1, r2, r3, r4, r5, lr}
+ mov r0, A_0
+ bl L_clz
+ mov ip, r0
+ ldmfd sp!, {r0, r1, r2, r3, r4, r5, pc}
+
+L_clz_D_1_B_1:
+ stmfd sp!, {r0, r1, r2, r3, r4, r5, lr}
+ mov r0, B_1
+ bl L_clz
+ mov D_1, r0
+ ldmfd sp!, {r0, r1, r2, r3, r4, r5, pc}
+
+L_clz_ip_B_0:
+ stmfd sp!, {r0, r1, r2, r3, r4, r5, lr}
+ mov r0, B_0
+ bl L_clz
+ mov ip, r0
+ ldmfd sp!, {r0, r1, r2, r3, r4, r5, pc}
+
+L_clz_D_0_B_0:
+ stmfd sp!, {r0, r1, r2, r3, r4, r5, lr}
+ mov r0, B_0
+ bl L_clz
+ mov D_0, r0
+ ldmfd sp!, {r0, r1, r2, r3, r4, r5, pc}
+
+L_clz_D_0_B_1:
+ stmfd sp!, {r0, r1, r2, r3, r4, r5, lr}
+ mov r0, B_1
+ bl L_clz
+ mov D_0, r0
+ ldmfd sp!, {r0, r1, r2, r3, r4, r5, pc}
+#endif /* __LINUX_ARM_ARCH__ */
--
1.7.3.1
More information about the U-Boot
mailing list