[U-Boot] [PATCH 2/2 u-boot-next] ARM: Don't include libgcc anymore
Jean-Christophe PLAGNIOL-VILLARD
plagnioj at jcrosoft.com
Wed Jun 10 13:59:14 CEST 2009
This patch removes the inclusion of libgcc functions into U-Boot on the ARM
architecture. Only the really needed functions are provided in the lib_arm
directory. Those implementations are copied from Linux where they are well
proven related to reliably, performance.
Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj at jcrosoft.com>
Cc: Stefan Roese <sr at denx.de>
---
Makefile | 11 +-
arm_config.mk | 2 +-
board/trab/u-boot.lds | 2 +-
include/asm-arm/assembler.h | 116 ++++++++++++
include/asm-arm/linkage.h | 11 ++
include/linux/linkage.h | 95 ++++++++++
lib_arm/Makefile | 10 +-
lib_arm/_divsi3.S | 140 ---------------
lib_arm/_modsi3.S | 99 -----------
lib_arm/_udivsi3.S | 77 --------
lib_arm/_umodsi3.S | 88 ----------
lib_arm/{_ashldi3.S => ashldi3.S} | 9 +-
lib_arm/{_ashrdi3.S => ashrdi3.S} | 9 +-
lib_arm/lib1funcs.S | 348 +++++++++++++++++++++++++++++++++++++
lib_arm/{_ashrdi3.S => lshrdi3.S} | 13 +-
15 files changed, 607 insertions(+), 423 deletions(-)
create mode 100644 include/asm-arm/assembler.h
create mode 100644 include/asm-arm/linkage.h
create mode 100644 include/linux/linkage.h
delete mode 100644 lib_arm/_divsi3.S
delete mode 100644 lib_arm/_modsi3.S
delete mode 100644 lib_arm/_udivsi3.S
delete mode 100644 lib_arm/_umodsi3.S
rename lib_arm/{_ashldi3.S => ashldi3.S} (93%)
copy lib_arm/{_ashrdi3.S => ashrdi3.S} (93%)
create mode 100644 lib_arm/lib1funcs.S
rename lib_arm/{_ashrdi3.S => lshrdi3.S} (90%)
diff --git a/Makefile b/Makefile
index be12164..977ed5a 100644
--- a/Makefile
+++ b/Makefile
@@ -220,7 +220,6 @@ endif
ifeq ($(CPU),ixp)
LIBS += cpu/ixp/npe/libnpe.a
endif
-LIBS += lib_$(ARCH)/lib$(ARCH).a
LIBS += fs/cramfs/libcramfs.a fs/fat/libfat.a fs/fdos/libfdos.a fs/jffs2/libjffs2.a \
fs/reiserfs/libreiserfs.a fs/ext2/libext2fs.a fs/yaffs2/libyaffs2.a \
fs/ubifs/libubifs.a
@@ -279,6 +278,9 @@ LIBS := $(addprefix $(obj),$(LIBS))
LIBBOARD = board/$(BOARDDIR)/lib$(BOARD).a
LIBBOARD := $(addprefix $(obj),$(LIBBOARD))
+LIBARCH = lib_$(ARCH)/lib$(ARCH).a
+LIBARCH := $(addprefix $(obj),$(LIBARCH))
+
# Add GCC lib
PLATFORM_LIBS += $(PLATFORM_LIBGCC)
@@ -301,7 +303,7 @@ U_BOOT_ONENAND = $(obj)u-boot-onenand.bin
endif
__OBJS := $(subst $(obj),,$(OBJS))
-__LIBS := $(subst $(obj),,$(LIBS)) $(subst $(obj),,$(LIBBOARD))
+__LIBS := $(subst $(obj),,$(LIBS)) $(subst $(obj),,$(LIBBOARD)) $(subst $(obj),,$(LIBARCH))
#########################################################################
#########################################################################
@@ -354,13 +356,16 @@ $(obj)u-boot: depend $(SUBDIRS) $(OBJS) $(LIBBOARD) $(LIBS) $(LDSCRIPT)
$(OBJS): depend
$(MAKE) -C cpu/$(CPU) $(if $(REMOTE_BUILD),$@,$(notdir $@))
+$(LIBARCH): depend
+ $(MAKE) -C $(dir $(subst $(obj),,$@))
+
$(LIBS): depend $(SUBDIRS)
$(MAKE) -C $(dir $(subst $(obj),,$@))
$(LIBBOARD): depend $(LIBS)
$(MAKE) -C $(dir $(subst $(obj),,$@))
-$(SUBDIRS): depend
+$(SUBDIRS): depend $(LIBARCH)
$(MAKE) -C $@ all
$(LDSCRIPT): depend
diff --git a/arm_config.mk b/arm_config.mk
index b88a3f2..0d6cd24 100644
--- a/arm_config.mk
+++ b/arm_config.mk
@@ -25,4 +25,4 @@ PLATFORM_CPPFLAGS += -DCONFIG_ARM -D__ARM__
LDSCRIPT := $(SRCTREE)/cpu/$(CPU)/u-boot.lds
-PLATFORM_LIBGCC += -L $(shell dirname `$(CC) $(CFLAGS) -print-libgcc-file-name`) -lgcc
+PLATFORM_LIBGCC += -L$(OBJTREE)/lib_arm -larm
diff --git a/board/trab/u-boot.lds b/board/trab/u-boot.lds
index d8bcfa4..a83853e 100644
--- a/board/trab/u-boot.lds
+++ b/board/trab/u-boot.lds
@@ -33,7 +33,7 @@ SECTIONS
.text :
{
cpu/arm920t/start.o (.text)
- lib_arm/_umodsi3.o (.text)
+ lib_arm/lib1funcs.o (.text)
lib_generic/zlib.o (.text)
lib_generic/crc32.o (.text)
lib_generic/string.o (.text)
diff --git a/include/asm-arm/assembler.h b/include/asm-arm/assembler.h
new file mode 100644
index 0000000..6116e48
--- /dev/null
+++ b/include/asm-arm/assembler.h
@@ -0,0 +1,116 @@
+/*
+ * arch/arm/include/asm/assembler.h
+ *
+ * Copyright (C) 1996-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This file contains arm architecture specific defines
+ * for the different processors.
+ *
+ * Do not include any C declarations in this file - it is included by
+ * assembler source.
+ */
+#ifndef __ASSEMBLY__
+#error "Only include this from assembly code"
+#endif
+
+#include <asm/ptrace.h>
+
+/*
+ * Endian independent macros for shifting bytes within registers.
+ */
+#ifndef __ARMEB__
+#define pull lsr
+#define push lsl
+#define get_byte_0 lsl #0
+#define get_byte_1 lsr #8
+#define get_byte_2 lsr #16
+#define get_byte_3 lsr #24
+#define put_byte_0 lsl #0
+#define put_byte_1 lsl #8
+#define put_byte_2 lsl #16
+#define put_byte_3 lsl #24
+#else
+#define pull lsl
+#define push lsr
+#define get_byte_0 lsr #24
+#define get_byte_1 lsr #16
+#define get_byte_2 lsr #8
+#define get_byte_3 lsl #0
+#define put_byte_0 lsl #24
+#define put_byte_1 lsl #16
+#define put_byte_2 lsl #8
+#define put_byte_3 lsl #0
+#endif
+
+/*
+ * Data preload for architectures that support it
+ */
+#if __LINUX_ARM_ARCH__ >= 5
+#define PLD(code...) code
+#else
+#define PLD(code...)
+#endif
+
+/*
+ * This can be used to enable code to cacheline align the destination
+ * pointer when bulk writing to memory. Experiments on StrongARM and
+ * XScale didn't show this a worthwhile thing to do when the cache is not
+ * set to write-allocate (this would need further testing on XScale when WA
+ * is used).
+ *
+ * On Feroceon there is much to gain however, regardless of cache mode.
+ */
+#ifdef CONFIG_CPU_FEROCEON
+#define CALGN(code...) code
+#else
+#define CALGN(code...)
+#endif
+
+/*
+ * Enable and disable interrupts
+ */
+#if __LINUX_ARM_ARCH__ >= 6
+ .macro disable_irq
+ cpsid i
+ .endm
+
+ .macro enable_irq
+ cpsie i
+ .endm
+#else
+ .macro disable_irq
+ msr cpsr_c, #PSR_I_BIT | SVC_MODE
+ .endm
+
+ .macro enable_irq
+ msr cpsr_c, #SVC_MODE
+ .endm
+#endif
+
+/*
+ * Save the current IRQ state and disable IRQs. Note that this macro
+ * assumes FIQs are enabled, and that the processor is in SVC mode.
+ */
+ .macro save_and_disable_irqs, oldcpsr
+ mrs \oldcpsr, cpsr
+ disable_irq
+ .endm
+
+/*
+ * Restore interrupt state previously stored in a register. We don't
+ * guarantee that this will preserve the flags.
+ */
+ .macro restore_irqs, oldcpsr
+ msr cpsr_c, \oldcpsr
+ .endm
+
+#define USER(x...) \
+9999: x; \
+ .section __ex_table,"a"; \
+ .align 3; \
+ .long 9999b,9001f; \
+ .previous
diff --git a/include/asm-arm/linkage.h b/include/asm-arm/linkage.h
new file mode 100644
index 0000000..5a25632
--- /dev/null
+++ b/include/asm-arm/linkage.h
@@ -0,0 +1,11 @@
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#define __ALIGN .align 0
+#define __ALIGN_STR ".align 0"
+
+#define ENDPROC(name) \
+ .type name, %function; \
+ END(name)
+
+#endif
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
new file mode 100644
index 0000000..b25624e
--- /dev/null
+++ b/include/linux/linkage.h
@@ -0,0 +1,95 @@
+#ifndef _LINUX_LINKAGE_H
+#define _LINUX_LINKAGE_H
+
+#include <asm/linkage.h>
+
+#ifdef __cplusplus
+#define CPP_ASMLINKAGE extern "C"
+#else
+#define CPP_ASMLINKAGE
+#endif
+
+#ifndef asmlinkage
+#define asmlinkage CPP_ASMLINKAGE
+#endif
+
+#ifndef asmregparm
+# define asmregparm
+#endif
+
+#define __page_aligned_data __section(.data.page_aligned) __aligned(PAGE_SIZE)
+#define __page_aligned_bss __section(.bss.page_aligned) __aligned(PAGE_SIZE)
+
+/*
+ * This is used by architectures to keep arguments on the stack
+ * untouched by the compiler by keeping them live until the end.
+ * The argument stack may be owned by the assembly-language
+ * caller, not the callee, and gcc doesn't always understand
+ * that.
+ *
+ * We have the return value, and a maximum of six arguments.
+ *
+ * This should always be followed by a "return ret" for the
+ * protection to work (ie no more work that the compiler might
+ * end up needing stack temporaries for).
+ */
+/* Assembly files may be compiled with -traditional .. */
+#ifndef __ASSEMBLY__
+#ifndef asmlinkage_protect
+# define asmlinkage_protect(n, ret, args...) do { } while (0)
+#endif
+#endif
+
+#ifndef __ALIGN
+#define __ALIGN .align 4,0x90
+#define __ALIGN_STR ".align 4,0x90"
+#endif
+
+#ifdef __ASSEMBLY__
+
+#define ALIGN __ALIGN
+#define ALIGN_STR __ALIGN_STR
+
+#ifndef ENTRY
+#define ENTRY(name) \
+ .globl name; \
+ ALIGN; \
+ name:
+#endif
+
+#ifndef WEAK
+#define WEAK(name) \
+ .weak name; \
+ name:
+#endif
+
+#define KPROBE_ENTRY(name) \
+ .pushsection .kprobes.text, "ax"; \
+ ENTRY(name)
+
+#define KPROBE_END(name) \
+ END(name); \
+ .popsection
+
+#ifndef END
+#define END(name) \
+ .size name, .-name
+#endif
+
+/* If symbol 'name' is treated as a subroutine (gets called, and returns)
+ * then please use ENDPROC to mark 'name' as STT_FUNC for the benefit of
+ * static analysis tools such as stack depth analyzer.
+ */
+#ifndef ENDPROC
+#define ENDPROC(name) \
+ .type name, @function; \
+ END(name)
+#endif
+
+#endif
+
+#define NORET_TYPE /**/
+#define ATTRIB_NORET __attribute__((noreturn))
+#define NORET_AND noreturn,
+
+#endif
diff --git a/lib_arm/Makefile b/lib_arm/Makefile
index 4469361..756643f 100644
--- a/lib_arm/Makefile
+++ b/lib_arm/Makefile
@@ -25,12 +25,10 @@ include $(TOPDIR)/config.mk
LIB = $(obj)lib$(ARCH).a
-SOBJS-y += _ashldi3.o
-SOBJS-y += _ashrdi3.o
-SOBJS-y += _divsi3.o
-SOBJS-y += _modsi3.o
-SOBJS-y += _udivsi3.o
-SOBJS-y += _umodsi3.o
+SOBJS-y += lib1funcs.o
+SOBJS-y += ashldi3.o
+SOBJS-y += ashrdi3.o
+SOBJS-y += lshrdi3.o
COBJS-y += board.o
COBJS-y += bootm.o
diff --git a/lib_arm/_divsi3.S b/lib_arm/_divsi3.S
deleted file mode 100644
index 9dc15f6..0000000
--- a/lib_arm/_divsi3.S
+++ /dev/null
@@ -1,140 +0,0 @@
-
-.macro ARM_DIV_BODY dividend, divisor, result, curbit
-
-#if __LINUX_ARM_ARCH__ >= 5
-
- clz \curbit, \divisor
- clz \result, \dividend
- sub \result, \curbit, \result
- mov \curbit, #1
- mov \divisor, \divisor, lsl \result
- mov \curbit, \curbit, lsl \result
- mov \result, #0
-
-#else
-
- @ Initially shift the divisor left 3 bits if possible,
- @ set curbit accordingly. This allows for curbit to be located
- @ at the left end of each 4 bit nibbles in the division loop
- @ to save one loop in most cases.
- tst \divisor, #0xe0000000
- moveq \divisor, \divisor, lsl #3
- moveq \curbit, #8
- movne \curbit, #1
-
- @ Unless the divisor is very big, shift it up in multiples of
- @ four bits, since this is the amount of unwinding in the main
- @ division loop. Continue shifting until the divisor is
- @ larger than the dividend.
-1: cmp \divisor, #0x10000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #4
- movlo \curbit, \curbit, lsl #4
- blo 1b
-
- @ For very big divisors, we must shift it a bit at a time, or
- @ we will be in danger of overflowing.
-1: cmp \divisor, #0x80000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #1
- movlo \curbit, \curbit, lsl #1
- blo 1b
-
- mov \result, #0
-
-#endif
-
- @ Division loop
-1: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- orrhs \result, \result, \curbit
- cmp \dividend, \divisor, lsr #1
- subhs \dividend, \dividend, \divisor, lsr #1
- orrhs \result, \result, \curbit, lsr #1
- cmp \dividend, \divisor, lsr #2
- subhs \dividend, \dividend, \divisor, lsr #2
- orrhs \result, \result, \curbit, lsr #2
- cmp \dividend, \divisor, lsr #3
- subhs \dividend, \dividend, \divisor, lsr #3
- orrhs \result, \result, \curbit, lsr #3
- cmp \dividend, #0 @ Early termination?
- movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
- movne \divisor, \divisor, lsr #4
- bne 1b
-
-.endm
-
-.macro ARM_DIV2_ORDER divisor, order
-
-#if __LINUX_ARM_ARCH__ >= 5
-
- clz \order, \divisor
- rsb \order, \order, #31
-
-#else
-
- cmp \divisor, #(1 << 16)
- movhs \divisor, \divisor, lsr #16
- movhs \order, #16
- movlo \order, #0
-
- cmp \divisor, #(1 << 8)
- movhs \divisor, \divisor, lsr #8
- addhs \order, \order, #8
-
- cmp \divisor, #(1 << 4)
- movhs \divisor, \divisor, lsr #4
- addhs \order, \order, #4
-
- cmp \divisor, #(1 << 2)
- addhi \order, \order, #3
- addls \order, \order, \divisor, lsr #1
-
-#endif
-
-.endm
-
- .align 5
-.globl __divsi3
-__divsi3:
- cmp r1, #0
- eor ip, r0, r1 @ save the sign of the result.
- beq Ldiv0
- rsbmi r1, r1, #0 @ loops below use unsigned.
- subs r2, r1, #1 @ division by 1 or -1 ?
- beq 10f
- movs r3, r0
- rsbmi r3, r0, #0 @ positive dividend value
- cmp r3, r1
- bls 11f
- tst r1, r2 @ divisor is power of 2 ?
- beq 12f
-
- ARM_DIV_BODY r3, r1, r0, r2
-
- cmp ip, #0
- rsbmi r0, r0, #0
- mov pc, lr
-
-10: teq ip, r0 @ same sign ?
- rsbmi r0, r0, #0
- mov pc, lr
-
-11: movlo r0, #0
- moveq r0, ip, asr #31
- orreq r0, r0, #1
- mov pc, lr
-
-12: ARM_DIV2_ORDER r1, r2
-
- cmp ip, #0
- mov r0, r3, lsr r2
- rsbmi r0, r0, #0
- mov pc, lr
-
-Ldiv0:
-
- str lr, [sp, #-4]!
- bl __div0
- mov r0, #0 @ About as wrong as it could be.
- ldr pc, [sp], #4
diff --git a/lib_arm/_modsi3.S b/lib_arm/_modsi3.S
deleted file mode 100644
index 539c584..0000000
--- a/lib_arm/_modsi3.S
+++ /dev/null
@@ -1,99 +0,0 @@
-
-.macro ARM_MOD_BODY dividend, divisor, order, spare
-
-#if __LINUX_ARM_ARCH__ >= 5
-
- clz \order, \divisor
- clz \spare, \dividend
- sub \order, \order, \spare
- mov \divisor, \divisor, lsl \order
-
-#else
-
- mov \order, #0
-
- @ Unless the divisor is very big, shift it up in multiples of
- @ four bits, since this is the amount of unwinding in the main
- @ division loop. Continue shifting until the divisor is
- @ larger than the dividend.
-1: cmp \divisor, #0x10000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #4
- addlo \order, \order, #4
- blo 1b
-
- @ For very big divisors, we must shift it a bit at a time, or
- @ we will be in danger of overflowing.
-1: cmp \divisor, #0x80000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #1
- addlo \order, \order, #1
- blo 1b
-
-#endif
-
- @ Perform all needed substractions to keep only the reminder.
- @ Do comparisons in batch of 4 first.
- subs \order, \order, #3 @ yes, 3 is intended here
- blt 2f
-
-1: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- cmp \dividend, \divisor, lsr #1
- subhs \dividend, \dividend, \divisor, lsr #1
- cmp \dividend, \divisor, lsr #2
- subhs \dividend, \dividend, \divisor, lsr #2
- cmp \dividend, \divisor, lsr #3
- subhs \dividend, \dividend, \divisor, lsr #3
- cmp \dividend, #1
- mov \divisor, \divisor, lsr #4
- subges \order, \order, #4
- bge 1b
-
- tst \order, #3
- teqne \dividend, #0
- beq 5f
-
- @ Either 1, 2 or 3 comparison/substractions are left.
-2: cmn \order, #2
- blt 4f
- beq 3f
- cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- mov \divisor, \divisor, lsr #1
-3: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- mov \divisor, \divisor, lsr #1
-4: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
-5:
-.endm
-
- .align 5
-.globl __modsi3
-__modsi3:
- cmp r1, #0
- beq Ldiv0
- rsbmi r1, r1, #0 @ loops below use unsigned.
- movs ip, r0 @ preserve sign of dividend
- rsbmi r0, r0, #0 @ if negative make positive
- subs r2, r1, #1 @ compare divisor with 1
- cmpne r0, r1 @ compare dividend with divisor
- moveq r0, #0
- tsthi r1, r2 @ see if divisor is power of 2
- andeq r0, r0, r2
- bls 10f
-
- ARM_MOD_BODY r0, r1, r2, r3
-
-10: cmp ip, #0
- rsbmi r0, r0, #0
- mov pc, lr
-
-
-Ldiv0:
-
- str lr, [sp, #-4]!
- bl __div0
- mov r0, #0 @ About as wrong as it could be.
- ldr pc, [sp], #4
diff --git a/lib_arm/_udivsi3.S b/lib_arm/_udivsi3.S
deleted file mode 100644
index a3f9b59..0000000
--- a/lib_arm/_udivsi3.S
+++ /dev/null
@@ -1,77 +0,0 @@
-/* # 1 "libgcc1.S" */
-@ libgcc1 routines for ARM cpu.
-@ Division routines, written by Richard Earnshaw, (rearnsha at armltd.co.uk)
-dividend .req r0
-divisor .req r1
-result .req r2
-curbit .req r3
-/* ip .req r12 */
-/* sp .req r13 */
-/* lr .req r14 */
-/* pc .req r15 */
- .text
- .globl __udivsi3
- .type __udivsi3 ,function
- .align 0
- __udivsi3 :
- cmp divisor, #0
- beq Ldiv0
- mov curbit, #1
- mov result, #0
- cmp dividend, divisor
- bcc Lgot_result
-Loop1:
- @ Unless the divisor is very big, shift it up in multiples of
- @ four bits, since this is the amount of unwinding in the main
- @ division loop. Continue shifting until the divisor is
- @ larger than the dividend.
- cmp divisor, #0x10000000
- cmpcc divisor, dividend
- movcc divisor, divisor, lsl #4
- movcc curbit, curbit, lsl #4
- bcc Loop1
-Lbignum:
- @ For very big divisors, we must shift it a bit at a time, or
- @ we will be in danger of overflowing.
- cmp divisor, #0x80000000
- cmpcc divisor, dividend
- movcc divisor, divisor, lsl #1
- movcc curbit, curbit, lsl #1
- bcc Lbignum
-Loop3:
- @ Test for possible subtractions, and note which bits
- @ are done in the result. On the final pass, this may subtract
- @ too much from the dividend, but the result will be ok, since the
- @ "bit" will have been shifted out at the bottom.
- cmp dividend, divisor
- subcs dividend, dividend, divisor
- orrcs result, result, curbit
- cmp dividend, divisor, lsr #1
- subcs dividend, dividend, divisor, lsr #1
- orrcs result, result, curbit, lsr #1
- cmp dividend, divisor, lsr #2
- subcs dividend, dividend, divisor, lsr #2
- orrcs result, result, curbit, lsr #2
- cmp dividend, divisor, lsr #3
- subcs dividend, dividend, divisor, lsr #3
- orrcs result, result, curbit, lsr #3
- cmp dividend, #0 @ Early termination?
- movnes curbit, curbit, lsr #4 @ No, any more bits to do?
- movne divisor, divisor, lsr #4
- bne Loop3
-Lgot_result:
- mov r0, result
- mov pc, lr
-Ldiv0:
- str lr, [sp, #-4]!
- bl __div0 (PLT)
- mov r0, #0 @ about as wrong as it could be
- ldmia sp!, {pc}
- .size __udivsi3 , . - __udivsi3
-/* # 235 "libgcc1.S" */
-/* # 320 "libgcc1.S" */
-/* # 421 "libgcc1.S" */
-/* # 433 "libgcc1.S" */
-/* # 456 "libgcc1.S" */
-/* # 500 "libgcc1.S" */
-/* # 580 "libgcc1.S" */
diff --git a/lib_arm/_umodsi3.S b/lib_arm/_umodsi3.S
deleted file mode 100644
index 8465ef0..0000000
--- a/lib_arm/_umodsi3.S
+++ /dev/null
@@ -1,88 +0,0 @@
-/* # 1 "libgcc1.S" */
-@ libgcc1 routines for ARM cpu.
-@ Division routines, written by Richard Earnshaw, (rearnsha at armltd.co.uk)
-/* # 145 "libgcc1.S" */
-dividend .req r0
-divisor .req r1
-overdone .req r2
-curbit .req r3
-/* ip .req r12 */
-/* sp .req r13 */
-/* lr .req r14 */
-/* pc .req r15 */
- .text
- .globl __umodsi3
- .type __umodsi3 ,function
- .align 0
- __umodsi3 :
- cmp divisor, #0
- beq Ldiv0
- mov curbit, #1
- cmp dividend, divisor
- movcc pc, lr
-Loop1:
- @ Unless the divisor is very big, shift it up in multiples of
- @ four bits, since this is the amount of unwinding in the main
- @ division loop. Continue shifting until the divisor is
- @ larger than the dividend.
- cmp divisor, #0x10000000
- cmpcc divisor, dividend
- movcc divisor, divisor, lsl #4
- movcc curbit, curbit, lsl #4
- bcc Loop1
-Lbignum:
- @ For very big divisors, we must shift it a bit at a time, or
- @ we will be in danger of overflowing.
- cmp divisor, #0x80000000
- cmpcc divisor, dividend
- movcc divisor, divisor, lsl #1
- movcc curbit, curbit, lsl #1
- bcc Lbignum
-Loop3:
- @ Test for possible subtractions. On the final pass, this may
- @ subtract too much from the dividend, so keep track of which
- @ subtractions are done, we can fix them up afterwards...
- mov overdone, #0
- cmp dividend, divisor
- subcs dividend, dividend, divisor
- cmp dividend, divisor, lsr #1
- subcs dividend, dividend, divisor, lsr #1
- orrcs overdone, overdone, curbit, ror #1
- cmp dividend, divisor, lsr #2
- subcs dividend, dividend, divisor, lsr #2
- orrcs overdone, overdone, curbit, ror #2
- cmp dividend, divisor, lsr #3
- subcs dividend, dividend, divisor, lsr #3
- orrcs overdone, overdone, curbit, ror #3
- mov ip, curbit
- cmp dividend, #0 @ Early termination?
- movnes curbit, curbit, lsr #4 @ No, any more bits to do?
- movne divisor, divisor, lsr #4
- bne Loop3
- @ Any subtractions that we should not have done will be recorded in
- @ the top three bits of "overdone". Exactly which were not needed
- @ are governed by the position of the bit, stored in ip.
- @ If we terminated early, because dividend became zero,
- @ then none of the below will match, since the bit in ip will not be
- @ in the bottom nibble.
- ands overdone, overdone, #0xe0000000
- moveq pc, lr @ No fixups needed
- tst overdone, ip, ror #3
- addne dividend, dividend, divisor, lsr #3
- tst overdone, ip, ror #2
- addne dividend, dividend, divisor, lsr #2
- tst overdone, ip, ror #1
- addne dividend, dividend, divisor, lsr #1
- mov pc, lr
-Ldiv0:
- str lr, [sp, #-4]!
- bl __div0 (PLT)
- mov r0, #0 @ about as wrong as it could be
- ldmia sp!, {pc}
- .size __umodsi3 , . - __umodsi3
-/* # 320 "libgcc1.S" */
-/* # 421 "libgcc1.S" */
-/* # 433 "libgcc1.S" */
-/* # 456 "libgcc1.S" */
-/* # 500 "libgcc1.S" */
-/* # 580 "libgcc1.S" */
diff --git a/lib_arm/_ashldi3.S b/lib_arm/ashldi3.S
similarity index 93%
rename from lib_arm/_ashldi3.S
rename to lib_arm/ashldi3.S
index de4403d..1154d92 100644
--- a/lib_arm/_ashldi3.S
+++ b/lib_arm/ashldi3.S
@@ -26,6 +26,8 @@ the Free Software Foundation, 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA. */
+#include <linux/linkage.h>
+
#ifdef __ARMEB__
#define al r1
#define ah r0
@@ -34,8 +36,8 @@ Boston, MA 02110-1301, USA. */
#define ah r1
#endif
-.globl __ashldi3
-__ashldi3:
+ENTRY(__ashldi3)
+ENTRY(__aeabi_llsl)
subs r3, r2, #32
rsb ip, r2, #32
@@ -44,3 +46,6 @@ __ashldi3:
orrmi ah, ah, al, lsr ip
mov al, al, lsl r2
mov pc, lr
+
+ENDPROC(__ashldi3)
+ENDPROC(__aeabi_llsl)
diff --git a/lib_arm/_ashrdi3.S b/lib_arm/ashrdi3.S
similarity index 93%
copy from lib_arm/_ashrdi3.S
copy to lib_arm/ashrdi3.S
index 5edbcb3..9f8b355 100644
--- a/lib_arm/_ashrdi3.S
+++ b/lib_arm/ashrdi3.S
@@ -26,6 +26,8 @@ the Free Software Foundation, 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA. */
+#include <linux/linkage.h>
+
#ifdef __ARMEB__
#define al r1
#define ah r0
@@ -34,8 +36,8 @@ Boston, MA 02110-1301, USA. */
#define ah r1
#endif
-.globl __ashrdi3
-__ashrdi3:
+ENTRY(__ashrdi3)
+ENTRY(__aeabi_lasr)
subs r3, r2, #32
rsb ip, r2, #32
@@ -44,3 +46,6 @@ __ashrdi3:
orrmi al, al, ah, lsl ip
mov ah, ah, asr r2
mov pc, lr
+
+ENDPROC(__ashrdi3)
+ENDPROC(__aeabi_lasr)
diff --git a/lib_arm/lib1funcs.S b/lib_arm/lib1funcs.S
new file mode 100644
index 0000000..67964bc
--- /dev/null
+++ b/lib_arm/lib1funcs.S
@@ -0,0 +1,348 @@
+/*
+ * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
+ *
+ * Author: Nicolas Pitre <nico at cam.org>
+ * - contributed to gcc-3.4 on Sep 30, 2003
+ * - adapted for the Linux kernel on Oct 2, 2003
+ */
+
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file. (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+ clz \curbit, \divisor
+ clz \result, \dividend
+ sub \result, \curbit, \result
+ mov \curbit, #1
+ mov \divisor, \divisor, lsl \result
+ mov \curbit, \curbit, lsl \result
+ mov \result, #0
+
+#else
+
+ @ Initially shift the divisor left 3 bits if possible,
+ @ set curbit accordingly. This allows for curbit to be located
+ @ at the left end of each 4 bit nibbles in the division loop
+ @ to save one loop in most cases.
+ tst \divisor, #0xe0000000
+ moveq \divisor, \divisor, lsl #3
+ moveq \curbit, #8
+ movne \curbit, #1
+
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+1: cmp \divisor, #0x10000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #4
+ movlo \curbit, \curbit, lsl #4
+ blo 1b
+
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+1: cmp \divisor, #0x80000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #1
+ movlo \curbit, \curbit, lsl #1
+ blo 1b
+
+ mov \result, #0
+
+#endif
+
+ @ Division loop
+1: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ orrhs \result, \result, \curbit
+ cmp \dividend, \divisor, lsr #1
+ subhs \dividend, \dividend, \divisor, lsr #1
+ orrhs \result, \result, \curbit, lsr #1
+ cmp \dividend, \divisor, lsr #2
+ subhs \dividend, \dividend, \divisor, lsr #2
+ orrhs \result, \result, \curbit, lsr #2
+ cmp \dividend, \divisor, lsr #3
+ subhs \dividend, \dividend, \divisor, lsr #3
+ orrhs \result, \result, \curbit, lsr #3
+ cmp \dividend, #0 @ Early termination?
+ movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
+ movne \divisor, \divisor, lsr #4
+ bne 1b
+
+.endm
+
+
+.macro ARM_DIV2_ORDER divisor, order
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+ clz \order, \divisor
+ rsb \order, \order, #31
+
+#else
+
+ cmp \divisor, #(1 << 16)
+ movhs \divisor, \divisor, lsr #16
+ movhs \order, #16
+ movlo \order, #0
+
+ cmp \divisor, #(1 << 8)
+ movhs \divisor, \divisor, lsr #8
+ addhs \order, \order, #8
+
+ cmp \divisor, #(1 << 4)
+ movhs \divisor, \divisor, lsr #4
+ addhs \order, \order, #4
+
+ cmp \divisor, #(1 << 2)
+ addhi \order, \order, #3
+ addls \order, \order, \divisor, lsr #1
+
+#endif
+
+.endm
+
+
+.macro ARM_MOD_BODY dividend, divisor, order, spare
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+ clz \order, \divisor
+ clz \spare, \dividend
+ sub \order, \order, \spare
+ mov \divisor, \divisor, lsl \order
+
+#else
+
+ mov \order, #0
+
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+1: cmp \divisor, #0x10000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #4
+ addlo \order, \order, #4
+ blo 1b
+
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+1: cmp \divisor, #0x80000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #1
+ addlo \order, \order, #1
+ blo 1b
+
+#endif
+
+ @ Perform all needed substractions to keep only the reminder.
+ @ Do comparisons in batch of 4 first.
+ subs \order, \order, #3 @ yes, 3 is intended here
+ blt 2f
+
+1: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ cmp \dividend, \divisor, lsr #1
+ subhs \dividend, \dividend, \divisor, lsr #1
+ cmp \dividend, \divisor, lsr #2
+ subhs \dividend, \dividend, \divisor, lsr #2
+ cmp \dividend, \divisor, lsr #3
+ subhs \dividend, \dividend, \divisor, lsr #3
+ cmp \dividend, #1
+ mov \divisor, \divisor, lsr #4
+ subges \order, \order, #4
+ bge 1b
+
+ tst \order, #3
+ teqne \dividend, #0
+ beq 5f
+
+ @ Either 1, 2 or 3 comparison/substractions are left.
+2: cmn \order, #2
+ blt 4f
+ beq 3f
+ cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ mov \divisor, \divisor, lsr #1
+3: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ mov \divisor, \divisor, lsr #1
+4: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+5:
+.endm
+
+
+ENTRY(__udivsi3)
+ENTRY(__aeabi_uidiv)
+
+ subs r2, r1, #1
+ moveq pc, lr
+ bcc Ldiv0
+ cmp r0, r1
+ bls 11f
+ tst r1, r2
+ beq 12f
+
+ ARM_DIV_BODY r0, r1, r2, r3
+
+ mov r0, r2
+ mov pc, lr
+
+11: moveq r0, #1
+ movne r0, #0
+ mov pc, lr
+
+12: ARM_DIV2_ORDER r1, r2
+
+ mov r0, r0, lsr r2
+ mov pc, lr
+
+ENDPROC(__udivsi3)
+ENDPROC(__aeabi_uidiv)
+
+ENTRY(__umodsi3)
+
+ subs r2, r1, #1 @ compare divisor with 1
+ bcc Ldiv0
+ cmpne r0, r1 @ compare dividend with divisor
+ moveq r0, #0
+ tsthi r1, r2 @ see if divisor is power of 2
+ andeq r0, r0, r2
+ movls pc, lr
+
+ ARM_MOD_BODY r0, r1, r2, r3
+
+ mov pc, lr
+
+ENDPROC(__umodsi3)
+
+ENTRY(__divsi3)
+ENTRY(__aeabi_idiv)
+
+ cmp r1, #0
+ eor ip, r0, r1 @ save the sign of the result.
+ beq Ldiv0
+ rsbmi r1, r1, #0 @ loops below use unsigned.
+ subs r2, r1, #1 @ division by 1 or -1 ?
+ beq 10f
+ movs r3, r0
+ rsbmi r3, r0, #0 @ positive dividend value
+ cmp r3, r1
+ bls 11f
+ tst r1, r2 @ divisor is power of 2 ?
+ beq 12f
+
+ ARM_DIV_BODY r3, r1, r0, r2
+
+ cmp ip, #0
+ rsbmi r0, r0, #0
+ mov pc, lr
+
+10: teq ip, r0 @ same sign ?
+ rsbmi r0, r0, #0
+ mov pc, lr
+
+11: movlo r0, #0
+ moveq r0, ip, asr #31
+ orreq r0, r0, #1
+ mov pc, lr
+
+12: ARM_DIV2_ORDER r1, r2
+
+ cmp ip, #0
+ mov r0, r3, lsr r2
+ rsbmi r0, r0, #0
+ mov pc, lr
+
+ENDPROC(__divsi3)
+ENDPROC(__aeabi_idiv)
+
+ENTRY(__modsi3)
+
+ cmp r1, #0
+ beq Ldiv0
+ rsbmi r1, r1, #0 @ loops below use unsigned.
+ movs ip, r0 @ preserve sign of dividend
+ rsbmi r0, r0, #0 @ if negative make positive
+ subs r2, r1, #1 @ compare divisor with 1
+ cmpne r0, r1 @ compare dividend with divisor
+ moveq r0, #0
+ tsthi r1, r2 @ see if divisor is power of 2
+ andeq r0, r0, r2
+ bls 10f
+
+ ARM_MOD_BODY r0, r1, r2, r3
+
+10: cmp ip, #0
+ rsbmi r0, r0, #0
+ mov pc, lr
+
+ENDPROC(__modsi3)
+
+#ifdef CONFIG_AEABI
+
+ENTRY(__aeabi_uidivmod)
+
+ stmfd sp!, {r0, r1, ip, lr}
+ bl __aeabi_uidiv
+ ldmfd sp!, {r1, r2, ip, lr}
+ mul r3, r0, r2
+ sub r1, r1, r3
+ mov pc, lr
+
+ENDPROC(__aeabi_uidivmod)
+
+ENTRY(__aeabi_idivmod)
+
+ stmfd sp!, {r0, r1, ip, lr}
+ bl __aeabi_idiv
+ ldmfd sp!, {r1, r2, ip, lr}
+ mul r3, r0, r2
+ sub r1, r1, r3
+ mov pc, lr
+
+ENDPROC(__aeabi_idivmod)
+
+#endif
+
+Ldiv0:
+
+ str lr, [sp, #-8]!
+ bl __div0
+ mov r0, #0 @ About as wrong as it could be.
+ ldr pc, [sp], #8
+
+
diff --git a/lib_arm/_ashrdi3.S b/lib_arm/lshrdi3.S
similarity index 90%
rename from lib_arm/_ashrdi3.S
rename to lib_arm/lshrdi3.S
index 5edbcb3..99ea338 100644
--- a/lib_arm/_ashrdi3.S
+++ b/lib_arm/lshrdi3.S
@@ -26,6 +26,8 @@ the Free Software Foundation, 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA. */
+#include <linux/linkage.h>
+
#ifdef __ARMEB__
#define al r1
#define ah r0
@@ -34,13 +36,16 @@ Boston, MA 02110-1301, USA. */
#define ah r1
#endif
-.globl __ashrdi3
-__ashrdi3:
+ENTRY(__lshrdi3)
+ENTRY(__aeabi_llsr)
subs r3, r2, #32
rsb ip, r2, #32
movmi al, al, lsr r2
- movpl al, ah, asr r3
+ movpl al, ah, lsr r3
orrmi al, al, ah, lsl ip
- mov ah, ah, asr r2
+ mov ah, ah, lsr r2
mov pc, lr
+
+ENDPROC(__lshrdi3)
+ENDPROC(__aeabi_llsr)
--
1.6.3.1
More information about the U-Boot
mailing list