[U-Boot-Users] [PATCH] mips: Bring over optimized memset() routine from Linux.
Jason McMullan
mcmullan at netapp.com
Wed Jun 4 21:44:22 CEST 2008
This commit pulls over the memset() MIPS routine from Linux 2.6.26,
which provides a 10x to 20x speedup over the generic byte-at-a-time
routine. This is especially useful on platforms with manual ECC
scrubbing, that require all of memory to be written at least once
after a power cycle.
---
include/asm-mips/string.h | 2 +-
lib_mips/Makefile | 2 +-
lib_mips/memset.S | 174 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 176 insertions(+), 2 deletions(-)
create mode 100644 lib_mips/memset.S
diff --git a/include/asm-mips/string.h b/include/asm-mips/string.h
index 579a591..0df1463 100644
--- a/include/asm-mips/string.h
+++ b/include/asm-mips/string.h
@@ -27,7 +27,7 @@ extern int strcmp(__const__ char *__cs, __const__ char *__ct);
#undef __HAVE_ARCH_STRNCMP
extern int strncmp(__const__ char *__cs, __const__ char *__ct, __kernel_size_t __count);
-#undef __HAVE_ARCH_MEMSET
+#define __HAVE_ARCH_MEMSET
extern void *memset(void *__s, int __c, __kernel_size_t __count);
#undef __HAVE_ARCH_MEMCPY
diff --git a/lib_mips/Makefile b/lib_mips/Makefile
index 8176437..9149039 100644
--- a/lib_mips/Makefile
+++ b/lib_mips/Makefile
@@ -25,7 +25,7 @@ include $(TOPDIR)/config.mk
LIB = $(obj)lib$(ARCH).a
-SOBJS-y +=
+SOBJS-y += memset.o
COBJS-y += board.o
COBJS-y += bootm.o
diff --git a/lib_mips/memset.S b/lib_mips/memset.S
new file mode 100644
index 0000000..f1c07d7
--- /dev/null
+++ b/lib_mips/memset.S
@@ -0,0 +1,174 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998, 1999, 2000 by Ralf Baechle
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2007 Maciej W. Rozycki
+ */
+#include <asm/asm.h>
+//#include <asm/asm-offsets.h>
+#include <asm/regdef.h>
+
+#if LONGSIZE == 4
+#define LONG_S_L swl
+#define LONG_S_R swr
+#else
+#define LONG_S_L sdl
+#define LONG_S_R sdr
+#endif
+
+#define EX(insn,reg,addr,handler) \
+9: insn reg, addr; \
+ .section __ex_table,"a"; \
+ PTR 9b, handler; \
+ .previous
+
+ .macro f_fill64 dst, offset, val, fixup
+ EX(LONG_S, \val, (\offset + 0 * LONGSIZE)(\dst), \fixup)
+ EX(LONG_S, \val, (\offset + 1 * LONGSIZE)(\dst), \fixup)
+ EX(LONG_S, \val, (\offset + 2 * LONGSIZE)(\dst), \fixup)
+ EX(LONG_S, \val, (\offset + 3 * LONGSIZE)(\dst), \fixup)
+ EX(LONG_S, \val, (\offset + 4 * LONGSIZE)(\dst), \fixup)
+ EX(LONG_S, \val, (\offset + 5 * LONGSIZE)(\dst), \fixup)
+ EX(LONG_S, \val, (\offset + 6 * LONGSIZE)(\dst), \fixup)
+ EX(LONG_S, \val, (\offset + 7 * LONGSIZE)(\dst), \fixup)
+#if LONGSIZE == 4
+ EX(LONG_S, \val, (\offset + 8 * LONGSIZE)(\dst), \fixup)
+ EX(LONG_S, \val, (\offset + 9 * LONGSIZE)(\dst), \fixup)
+ EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup)
+ EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup)
+ EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup)
+ EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup)
+ EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup)
+ EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup)
+#endif
+ .endm
+
+/*
+ * memset(void *s, int c, size_t n)
+ *
+ * a0: start of area to clear
+ * a1: char to fill with
+ * a2: size of area to clear
+ */
+ .set noreorder
+ .align 5
+LEAF(memset)
+ beqz a1, 1f
+ move v0, a0 /* result */
+
+ andi a1, 0xff /* spread fillword */
+ LONG_SLL t1, a1, 8
+ or a1, t1
+ LONG_SLL t1, a1, 16
+#if LONGSIZE == 8
+ or a1, t1
+ LONG_SLL t1, a1, 32
+#endif
+ or a1, t1
+1:
+
+FEXPORT(__bzero)
+ sltiu t0, a2, LONGSIZE /* very small region? */
+ bnez t0, .Lsmall_memset
+ andi t0, a0, LONGMASK /* aligned? */
+
+#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
+ beqz t0, 1f
+ PTR_SUBU t0, LONGSIZE /* alignment in bytes */
+#else
+ .set noat
+ li AT, LONGSIZE
+ beqz t0, 1f
+ PTR_SUBU t0, AT /* alignment in bytes */
+ .set at
+#endif
+
+ R10KCBARRIER(0(ra))
+#ifdef __MIPSEB__
+ EX(LONG_S_L, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */
+#endif
+#ifdef __MIPSEL__
+ EX(LONG_S_R, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */
+#endif
+ PTR_SUBU a0, t0 /* long align ptr */
+ PTR_ADDU a2, t0 /* correct size */
+
+1: ori t1, a2, 0x3f /* # of full blocks */
+ xori t1, 0x3f
+ beqz t1, .Lmemset_partial /* no block to fill */
+ andi t0, a2, 0x40-LONGSIZE
+
+ PTR_ADDU t1, a0 /* end address */
+ .set reorder
+1: PTR_ADDIU a0, 64
+ R10KCBARRIER(0(ra))
+ f_fill64 a0, -64, a1, .Lfwd_fixup
+ bne t1, a0, 1b
+ .set noreorder
+
+.Lmemset_partial:
+ R10KCBARRIER(0(ra))
+ PTR_LA t1, 2f /* where to start */
+#if LONGSIZE == 4
+ PTR_SUBU t1, t0
+#else
+ .set noat
+ LONG_SRL AT, t0, 1
+ PTR_SUBU t1, AT
+ .set at
+#endif
+ jr t1
+ PTR_ADDU a0, t0 /* dest ptr */
+
+ .set push
+ .set noreorder
+ .set nomacro
+ f_fill64 a0, -64, a1, .Lpartial_fixup /* ... but first do longs ... */
+2: .set pop
+ andi a2, LONGMASK /* At most one long to go */
+
+ beqz a2, 1f
+ PTR_ADDU a0, a2 /* What's left */
+ R10KCBARRIER(0(ra))
+#ifdef __MIPSEB__
+ EX(LONG_S_R, a1, -1(a0), .Llast_fixup)
+#endif
+#ifdef __MIPSEL__
+ EX(LONG_S_L, a1, -1(a0), .Llast_fixup)
+#endif
+1: jr ra
+ move a2, zero
+
+.Lsmall_memset:
+ beqz a2, 2f
+ PTR_ADDU t1, a0, a2
+
+1: PTR_ADDIU a0, 1 /* fill bytewise */
+ R10KCBARRIER(0(ra))
+ bne t1, a0, 1b
+ sb a1, -1(a0)
+
+2: jr ra /* done */
+ move a2, zero
+ END(memset)
+
+.Lfirst_fixup:
+ jr ra
+ nop
+
+.Lfwd_fixup:
+ andi a2, 0x3f
+ jr ra
+ LONG_ADDU a2, t1
+
+.Lpartial_fixup:
+ andi a2, LONGMASK
+ jr ra
+ LONG_ADDU a2, t1
+
+.Llast_fixup:
+ jr ra
+ andi v1, a2, LONGMASK
--
1.5.4.3
More information about the U-Boot
mailing list