[U-Boot] [PATCH v2 3/9] arc: add library functions

Alexey Brodkin Alexey.Brodkin at synopsys.com
Wed Jan 29 17:08:59 CET 2014


These are library functions used by ARC700 architecture.

Following files were borrowed from Linux kernel sources,
commit 5ee54f38171b9b3541c5e9cf9c3a9e53455fd8b4 (Linux 3.11.10):

 * memcmp.S
 * memcpy-700.S
 * memset.S
 * strchr-700.S
 * strcmp.S
 * strcpy-700.S
 * strlen.S

Signed-off-by: Alexey Brodkin <abrodkin at synopsys.com>

Cc: Mischa Jonker <mjonker at synopsys.com>
Cc: Francois Bedard <fbedard at synopsys.com>

Changes for v2:

 * Added commit message
 * Added borrowed from Linux optimized string routines
 * Added explicit mention of files borrowed from Linux sources with
   reference to versoin/commit in Linux git repository
---
 arch/arc/lib/Makefile     |  16 ++++++
 arch/arc/lib/bootm.c      | 106 ++++++++++++++++++++++++++++++++++++
 arch/arc/lib/memcmp.S     | 122 ++++++++++++++++++++++++++++++++++++++++++
 arch/arc/lib/memcpy-700.S |  64 ++++++++++++++++++++++
 arch/arc/lib/memset.S     |  59 ++++++++++++++++++++
 arch/arc/lib/relocate.c   |  74 ++++++++++++++++++++++++++
 arch/arc/lib/sections.c   |  21 ++++++++
 arch/arc/lib/strchr-700.S | 133 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/arc/lib/strcmp.S     |  96 +++++++++++++++++++++++++++++++++
 arch/arc/lib/strcpy-700.S |  70 ++++++++++++++++++++++++
 arch/arc/lib/strlen.S     |  83 +++++++++++++++++++++++++++++
 11 files changed, 844 insertions(+)
 create mode 100644 arch/arc/lib/Makefile
 create mode 100644 arch/arc/lib/bootm.c
 create mode 100644 arch/arc/lib/memcmp.S
 create mode 100644 arch/arc/lib/memcpy-700.S
 create mode 100644 arch/arc/lib/memset.S
 create mode 100644 arch/arc/lib/relocate.c
 create mode 100644 arch/arc/lib/sections.c
 create mode 100644 arch/arc/lib/strchr-700.S
 create mode 100644 arch/arc/lib/strcmp.S
 create mode 100644 arch/arc/lib/strcpy-700.S
 create mode 100644 arch/arc/lib/strlen.S

diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile
new file mode 100644
index 0000000..7675f85
--- /dev/null
+++ b/arch/arc/lib/Makefile
@@ -0,0 +1,16 @@
+#
+# Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
+#
+# SPDX-License-Identifier:	GPL-2.0+
+#
+
+obj-y += sections.o
+obj-y += relocate.o
+obj-y += strchr-700.o
+obj-y += strcmp.o
+obj-y += strcpy-700.o
+obj-y += strlen.o
+obj-y += memcmp.o
+obj-y += memcpy-700.o
+obj-y += memset.o
+obj-$(CONFIG_CMD_BOOTM) += bootm.o
diff --git a/arch/arc/lib/bootm.c b/arch/arc/lib/bootm.c
new file mode 100644
index 0000000..d185a50
--- /dev/null
+++ b/arch/arc/lib/bootm.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+static ulong get_sp(void)
+{
+	ulong ret;
+
+	asm("mov %0, sp" : "=r"(ret) : );
+	return ret;
+}
+
+void arch_lmb_reserve(struct lmb *lmb)
+{
+	ulong sp;
+
+	/*
+	 * Booting a (Linux) kernel image
+	 *
+	 * Allocate space for command line and board info - the
+	 * address should be as high as possible within the reach of
+	 * the kernel (see CONFIG_SYS_BOOTMAPSZ settings), but in unused
+	 * memory, which means far enough below the current stack
+	 * pointer.
+	 */
+	sp = get_sp();
+	debug("## Current stack ends at 0x%08lx ", sp);
+
+	/* adjust sp by 4K to be safe */
+	sp -= 4096;
+	lmb_reserve(lmb, sp, (CONFIG_SYS_SDRAM_BASE + gd->ram_size - sp));
+}
+
+static int cleanup_before_linux(void)
+{
+	disable_interrupts();
+	flush_dcache_all();
+	invalidate_icache_all();
+
+	return 0;
+}
+
+/* Subcommand: PREP */
+static void boot_prep_linux(bootm_headers_t *images)
+{
+	if (image_setup_linux(images))
+		hang();
+}
+
+/* Subcommand: GO */
+static void boot_jump_linux(bootm_headers_t *images, int flag)
+{
+	void (*kernel_entry)(int zero, int arch, uint params);
+	unsigned int r0, r2;
+	int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
+
+	kernel_entry = (void (*)(int, int, uint))images->ep;
+
+	debug("## Transferring control to Linux (at address %08lx)...\n",
+	      (ulong) kernel_entry);
+	bootstage_mark(BOOTSTAGE_ID_RUN_OS);
+
+	printf("\nStarting kernel ...%s\n\n", fake ?
+	       "(fake run for tracing)" : "");
+	bootstage_mark_name(BOOTSTAGE_ID_BOOTM_HANDOFF, "start_kernel");
+
+	cleanup_before_linux();
+
+	if (IMAGE_ENABLE_OF_LIBFDT && images->ft_len) {
+		r0 = 2;
+		r2 = (unsigned int)images->ft_addr;
+	} else {
+		r0 = 1;
+		r2 = (unsigned int)getenv("bootargs");
+	}
+
+	if (!fake)
+		kernel_entry(r0, 0, r2);
+}
+
+int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images)
+{
+	/* No need for those on ARC */
+	if ((flag & BOOTM_STATE_OS_BD_T) || (flag & BOOTM_STATE_OS_CMDLINE))
+		return -1;
+
+	if (flag & BOOTM_STATE_OS_PREP) {
+		boot_prep_linux(images);
+		return 0;
+	}
+
+	if (flag & (BOOTM_STATE_OS_GO | BOOTM_STATE_OS_FAKE_GO)) {
+		boot_jump_linux(images, flag);
+		return 0;
+	}
+
+	boot_prep_linux(images);
+	boot_jump_linux(images, flag);
+	return 0;
+}
diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S
new file mode 100644
index 0000000..c47a271
--- /dev/null
+++ b/arch/arc/lib/memcmp.S
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <asm/linkage.h>
+
+#ifdef __LITTLE_ENDIAN__
+#define WORD2 r2
+#define SHIFT r3
+#else /* BIG ENDIAN */
+#define WORD2 r3
+#define SHIFT r2
+#endif
+
+ARC_ENTRY memcmp
+	or	r12,r0,r1
+	asl_s	r12,r12,30
+	sub	r3,r2,1
+	brls	r2,r12,.Lbytewise
+	ld	r4,[r0,0]
+	ld	r5,[r1,0]
+	lsr.f	lp_count,r3,3
+	lpne	.Loop_end
+	ld_s	WORD2,[r0,4]
+	ld_s	r12,[r1,4]
+	brne	r4,r5,.Leven
+	ld.a	r4,[r0,8]
+	ld.a	r5,[r1,8]
+	brne	WORD2,r12,.Lodd
+.Loop_end:
+	asl_s	SHIFT,SHIFT,3
+	bhs_s	.Last_cmp
+	brne	r4,r5,.Leven
+	ld	r4,[r0,4]
+	ld	r5,[r1,4]
+#ifdef __LITTLE_ENDIAN__
+	nop_s
+	; one more load latency cycle
+.Last_cmp:
+	xor	r0,r4,r5
+	bset	r0,r0,SHIFT
+	sub_s	r1,r0,1
+	bic_s	r1,r1,r0
+	norm	r1,r1
+	b.d	.Leven_cmp
+	and	r1,r1,24
+.Leven:
+	xor	r0,r4,r5
+	sub_s	r1,r0,1
+	bic_s	r1,r1,r0
+	norm	r1,r1
+	; slow track insn
+	and	r1,r1,24
+.Leven_cmp:
+	asl	r2,r4,r1
+	asl	r12,r5,r1
+	lsr_s	r2,r2,1
+	lsr_s	r12,r12,1
+	j_s.d	[blink]
+	sub	r0,r2,r12
+	.balign	4
+.Lodd:
+	xor	r0,WORD2,r12
+	sub_s	r1,r0,1
+	bic_s	r1,r1,r0
+	norm	r1,r1
+	; slow track insn
+	and	r1,r1,24
+	asl_s	r2,r2,r1
+	asl_s	r12,r12,r1
+	lsr_s	r2,r2,1
+	lsr_s	r12,r12,1
+	j_s.d	[blink]
+	sub	r0,r2,r12
+#else /* BIG ENDIAN */
+.Last_cmp:
+	neg_s	SHIFT,SHIFT
+	lsr	r4,r4,SHIFT
+	lsr	r5,r5,SHIFT
+	; slow track insn
+.Leven:
+	sub.f	r0,r4,r5
+	mov.ne	r0,1
+	j_s.d	[blink]
+	bset.cs	r0,r0,31
+.Lodd:
+	cmp_s	WORD2,r12
+
+	mov_s	r0,1
+	j_s.d	[blink]
+	bset.cs	r0,r0,31
+#endif /* ENDIAN */
+	.balign	4
+.Lbytewise:
+	breq	r2,0,.Lnil
+	ldb	r4,[r0,0]
+	ldb	r5,[r1,0]
+	lsr.f	lp_count,r3
+	lpne	.Lbyte_end
+	ldb_s	r3,[r0,1]
+	ldb	r12,[r1,1]
+	brne	r4,r5,.Lbyte_even
+	ldb.a	r4,[r0,2]
+	ldb.a	r5,[r1,2]
+	brne	r3,r12,.Lbyte_odd
+.Lbyte_end:
+	bcc	.Lbyte_even
+	brne	r4,r5,.Lbyte_even
+	ldb_s	r3,[r0,1]
+	ldb_s	r12,[r1,1]
+.Lbyte_odd:
+	j_s.d	[blink]
+	sub	r0,r3,r12
+.Lbyte_even:
+	j_s.d	[blink]
+	sub	r0,r4,r5
+.Lnil:
+	j_s.d	[blink]
+	mov	r0,0
+ARC_EXIT memcmp
diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S
new file mode 100644
index 0000000..b5f6151
--- /dev/null
+++ b/arch/arc/lib/memcpy-700.S
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY memcpy
+	or	r3,r0,r1
+	asl_s	r3,r3,30
+	mov_s	r5,r0
+	brls.d	r2,r3,.Lcopy_bytewise
+	sub.f	r3,r2,1
+	ld_s	r12,[r1,0]
+	asr.f	lp_count,r3,3
+	bbit0.d	r3,2,.Lnox4
+	bmsk_s	r2,r2,1
+	st.ab	r12,[r5,4]
+	ld.a	r12,[r1,4]
+.Lnox4:
+	lppnz	.Lendloop
+	ld_s	r3,[r1,4]
+	st.ab	r12,[r5,4]
+	ld.a	r12,[r1,8]
+	st.ab	r3,[r5,4]
+.Lendloop:
+	breq	r2,0,.Last_store
+	ld	r3,[r5,0]
+#ifdef __LITTLE_ENDIAN__
+	add3	r2,-1,r2
+	; uses long immediate
+	xor_s	r12,r12,r3
+	bmsk	r12,r12,r2
+    xor_s	r12,r12,r3
+#else /* BIG ENDIAN */
+	sub3	r2,31,r2
+	; uses long immediate
+        xor_s	r3,r3,r12
+        bmsk	r3,r3,r2
+        xor_s	r12,r12,r3
+#endif /* ENDIAN */
+.Last_store:
+	j_s.d	[blink]
+	st	r12,[r5,0]
+
+	.balign	4
+.Lcopy_bytewise:
+	jcs	[blink]
+	ldb_s	r12,[r1,0]
+	lsr.f	lp_count,r3
+	bhs_s	.Lnox1
+	stb.ab	r12,[r5,1]
+	ldb.a	r12,[r1,1]
+.Lnox1:
+	lppnz	.Lendbloop
+	ldb_s	r3,[r1,1]
+	stb.ab	r12,[r5,1]
+	ldb.a	r12,[r1,2]
+	stb.ab	r3,[r5,1]
+.Lendbloop:
+	j_s.d	[blink]
+	stb	r12,[r5,0]
+ARC_EXIT memcpy
diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S
new file mode 100644
index 0000000..9b2d88d
--- /dev/null
+++ b/arch/arc/lib/memset.S
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/linkage.h>
+
+#define SMALL	7 /* Must be at least 6 to deal with alignment/loop issues.  */
+
+ARC_ENTRY memset
+	mov_s	r4,r0
+	or	r12,r0,r2
+	bmsk.f	r12,r12,1
+	extb_s	r1,r1
+	asl	r3,r1,8
+	beq.d	.Laligned
+	or_s	r1,r1,r3
+	brls	r2,SMALL,.Ltiny
+	add	r3,r2,r0
+	stb	r1,[r3,-1]
+	bclr_s	r3,r3,0
+	stw	r1,[r3,-2]
+	bmsk.f	r12,r0,1
+	add_s	r2,r2,r12
+	sub.ne	r2,r2,4
+	stb.ab	r1,[r4,1]
+	and	r4,r4,-2
+	stw.ab	r1,[r4,2]
+	and	r4,r4,-4
+.Laligned:	; This code address should be aligned for speed.
+	asl	r3,r1,16
+	lsr.f	lp_count,r2,2
+	or_s	r1,r1,r3
+	lpne	.Loop_end
+	st.ab	r1,[r4,4]
+.Loop_end:
+	j_s	[blink]
+
+	.balign	4
+.Ltiny:
+	mov.f	lp_count,r2
+	lpne	.Ltiny_end
+	stb.ab	r1,[r4,1]
+.Ltiny_end:
+	j_s	[blink]
+ARC_EXIT memset
+
+; memzero: @r0 = mem, @r1 = size_t
+; memset:  @r0 = mem, @r1 = char, @r2 = size_t
+
+ARC_ENTRY memzero
+    ; adjust bzero args to memset args
+    mov r2, r1
+    mov r1, 0
+    b  memset    ;tail call so need to tinker with blink
+ARC_EXIT memzero
diff --git a/arch/arc/lib/relocate.c b/arch/arc/lib/relocate.c
new file mode 100644
index 0000000..710b792
--- /dev/null
+++ b/arch/arc/lib/relocate.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <elf.h>
+#include <asm/sections.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+extern char __text_end[];
+
+/*
+ * Base functionality is taken from x86 version with added ARC-specifics
+ */
+int do_elf_reloc_fixups(void)
+{
+	Elf32_Rela *re_src = (Elf32_Rela *)(&__rel_dyn_start);
+	Elf32_Rela *re_end = (Elf32_Rela *)(&__rel_dyn_end);
+
+	Elf32_Addr *offset_ptr_rom, *last_offset = NULL;
+	Elf32_Addr *offset_ptr_ram;
+
+	do {
+		/* Get the location from the relocation entry */
+		offset_ptr_rom = (Elf32_Addr *)re_src->r_offset;
+
+		/* Check that the location of the relocation is in .text */
+		if (offset_ptr_rom >= (Elf32_Addr *)CONFIG_SYS_TEXT_BASE &&
+		    offset_ptr_rom > last_offset) {
+			unsigned int val;
+			/* Switch to the in-RAM version */
+			offset_ptr_ram = (Elf32_Addr *)((ulong)offset_ptr_rom +
+							gd->reloc_off);
+
+			/*
+			 * Use "memcpy" because target location might be
+			 * 16-bit aligned on ARC so we may need to read
+			 * byte-by-byte. On attempt to read entire word by
+			 * CPU throws an exception
+			 */
+			memcpy(&val, offset_ptr_ram, sizeof(int));
+
+			/* If location in ".text" section swap value */
+			if ((unsigned int)offset_ptr_rom <
+			    (unsigned int)&__text_end)
+				val = (val << 16) | (val >> 16);
+
+			/* Check that the target points into .text */
+			if (val >= CONFIG_SYS_TEXT_BASE && val <=
+			    (unsigned int)&__bss_end) {
+				val += gd->reloc_off;
+				/* If location in ".text" section swap value */
+				if ((unsigned int)offset_ptr_rom <
+				    (unsigned int)&__text_end)
+					val = (val << 16) | (val >> 16);
+				memcpy(offset_ptr_ram, &val, sizeof(int));
+			} else {
+				debug("   %p: rom reloc %x, ram %p, value %x, limit %x\n",
+				      re_src, re_src->r_offset, offset_ptr_ram,
+				      val, (unsigned int)&__bss_end);
+			}
+		} else {
+			debug("   %p: rom reloc %x, last %p\n", re_src,
+			      re_src->r_offset, last_offset);
+		}
+		last_offset = offset_ptr_rom;
+
+	} while (++re_src < re_end);
+
+	return 0;
+}
diff --git a/arch/arc/lib/sections.c b/arch/arc/lib/sections.c
new file mode 100644
index 0000000..b0b46a4
--- /dev/null
+++ b/arch/arc/lib/sections.c
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+/*
+ * For some reason linker sets linker-generated symbols to zero in PIE mode.
+ * A work-around is substitution of linker-generated symbols with
+ * compiler-generated symbols which are properly handled by linker in PAE mode.
+ */
+
+char __bss_start[0] __attribute__((section(".__bss_start")));
+char __bss_end[0] __attribute__((section(".__bss_end")));
+char __image_copy_start[0] __attribute__((section(".__image_copy_start")));
+char __image_copy_end[0] __attribute__((section(".__image_copy_end")));
+char __rel_dyn_start[0] __attribute__((section(".__rel_dyn_start")));
+char __rel_dyn_end[0] __attribute__((section(".__rel_dyn_end")));
+char __text_start[0] __attribute__((section(".__text_start")));
+char __text_end[0] __attribute__((section(".__text_end")));
+char __init_end[0] __attribute__((section(".__init_end")));
diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S
new file mode 100644
index 0000000..9c548c7
--- /dev/null
+++ b/arch/arc/lib/strchr-700.S
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* ARC700 has a relatively long pipeline and branch prediction, so we want
+   to avoid branches that are hard to predict.  On the other hand, the
+   presence of the norm instruction makes it easier to operate on whole
+   words branch-free.  */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strchr
+	extb_s	r1,r1
+	asl	r5,r1,8
+	bmsk	r2,r0,1
+	or	r5,r5,r1
+	mov_s	r3,0x01010101
+	breq.d	r2,r0,.Laligned
+	asl	r4,r5,16
+	sub_s	r0,r0,r2
+	asl	r7,r2,3
+	ld_s	r2,[r0]
+#ifdef __LITTLE_ENDIAN__
+	asl	r7,r3,r7
+#else
+	lsr	r7,r3,r7
+#endif
+	or	r5,r5,r4
+	ror	r4,r3
+	sub	r12,r2,r7
+	bic_s	r12,r12,r2
+	and	r12,r12,r4
+	brne.d	r12,0,.Lfound0_ua
+	xor	r6,r2,r5
+	ld.a	r2,[r0,4]
+	sub	r12,r6,r7
+	bic	r12,r12,r6
+#ifdef __LITTLE_ENDIAN__
+	and	r7,r12,r4
+	breq	r7,0,.Loop ; For speed, we want this branch to be unaligned.
+	b	.Lfound_char ; Likewise this one.
+#else
+	and	r12,r12,r4
+	breq	r12,0,.Loop ; For speed, we want this branch to be unaligned.
+	lsr_s	r12,r12,7
+	bic 	r2,r7,r6
+	b.d	.Lfound_char_b
+	and_s	r2,r2,r12
+#endif
+; /* We require this code address to be unaligned for speed...  */
+.Laligned:
+	ld_s	r2,[r0]
+	or	r5,r5,r4
+	ror	r4,r3
+; /* ... so that this code address is aligned, for itself and ...  */
+.Loop:
+	sub	r12,r2,r3
+	bic_s	r12,r12,r2
+	and	r12,r12,r4
+	brne.d	r12,0,.Lfound0
+	xor	r6,r2,r5
+	ld.a	r2,[r0,4]
+	sub	r12,r6,r3
+	bic	r12,r12,r6
+	and	r7,r12,r4
+	breq	r7,0,.Loop /* ... so that this branch is unaligned.  */
+	; Found searched-for character.  r0 has already advanced to next word.
+#ifdef __LITTLE_ENDIAN__
+/* We only need the information about the first matching byte
+   (i.e. the least significant matching byte) to be exact,
+   hence there is no problem with carry effects.  */
+.Lfound_char:
+	sub	r3,r7,1
+	bic	r3,r3,r7
+	norm	r2,r3
+	sub_s	r0,r0,1
+	asr_s	r2,r2,3
+	j.d	[blink]
+	sub_s	r0,r0,r2
+
+	.balign	4
+.Lfound0_ua:
+	mov	r3,r7
+.Lfound0:
+	sub	r3,r6,r3
+	bic	r3,r3,r6
+	and	r2,r3,r4
+	or_s	r12,r12,r2
+	sub_s	r3,r12,1
+	bic_s	r3,r3,r12
+	norm	r3,r3
+	add_s	r0,r0,3
+	asr_s	r12,r3,3
+	asl.f	0,r2,r3
+	sub_s	r0,r0,r12
+	j_s.d	[blink]
+	mov.pl	r0,0
+#else /* BIG ENDIAN */
+.Lfound_char:
+	lsr	r7,r7,7
+
+	bic	r2,r7,r6
+.Lfound_char_b:
+	norm	r2,r2
+	sub_s	r0,r0,4
+	asr_s	r2,r2,3
+	j.d	[blink]
+	add_s	r0,r0,r2
+
+.Lfound0_ua:
+	mov_s	r3,r7
+.Lfound0:
+	asl_s	r2,r2,7
+	or	r7,r6,r4
+	bic_s	r12,r12,r2
+	sub	r2,r7,r3
+	or	r2,r2,r6
+	bic	r12,r2,r12
+	bic.f	r3,r4,r12
+	norm	r3,r3
+
+	add.pl	r3,r3,1
+	asr_s	r12,r3,3
+	asl.f	0,r2,r3
+	add_s	r0,r0,r12
+	j_s.d	[blink]
+	mov.mi	r0,0
+#endif /* ENDIAN */
+ARC_EXIT strchr
diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S
new file mode 100644
index 0000000..5dc802b
--- /dev/null
+++ b/arch/arc/lib/strcmp.S
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* This is optimized primarily for the ARC700.
+   It would be possible to speed up the loops by one cycle / word
+   respective one cycle / byte by forcing double source 1 alignment, unrolling
+   by a factor of two, and speculatively loading the second word / byte of
+   source 1; however, that would increase the overhead for loop setup / finish,
+   and strcmp might often terminate early.  */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strcmp
+	or	r2,r0,r1
+	bmsk_s	r2,r2,1
+	brne	r2,0,.Lcharloop
+	mov_s	r12,0x01010101
+	ror	r5,r12
+.Lwordloop:
+	ld.ab	r2,[r0,4]
+	ld.ab	r3,[r1,4]
+	nop_s
+	sub	r4,r2,r12
+	bic	r4,r4,r2
+	and	r4,r4,r5
+	brne	r4,0,.Lfound0
+	breq	r2,r3,.Lwordloop
+#ifdef	__LITTLE_ENDIAN__
+	xor	r0,r2,r3	; mask for difference
+	sub_s	r1,r0,1
+	bic_s	r0,r0,r1	; mask for least significant difference bit
+	sub	r1,r5,r0
+	xor	r0,r5,r1	; mask for least significant difference byte
+	and_s	r2,r2,r0
+	and_s	r3,r3,r0
+#endif /* LITTLE ENDIAN */
+	cmp_s	r2,r3
+	mov_s	r0,1
+	j_s.d	[blink]
+	bset.lo	r0,r0,31
+
+	.balign	4
+#ifdef __LITTLE_ENDIAN__
+.Lfound0:
+	xor	r0,r2,r3	; mask for difference
+	or	r0,r0,r4	; or in zero indicator
+	sub_s	r1,r0,1
+	bic_s	r0,r0,r1	; mask for least significant difference bit
+	sub	r1,r5,r0
+	xor	r0,r5,r1	; mask for least significant difference byte
+	and_s	r2,r2,r0
+	and_s	r3,r3,r0
+	sub.f	r0,r2,r3
+	mov.hi	r0,1
+	j_s.d	[blink]
+	bset.lo	r0,r0,31
+#else /* BIG ENDIAN */
+	/* The zero-detection above can mis-detect 0x01 bytes as zeroes
+	   because of carry-propagateion from a lower significant zero byte.
+	   We can compensate for this by checking that bit0 is zero.
+	   This compensation is not necessary in the step where we
+	   get a low estimate for r2, because in any affected bytes
+	   we already have 0x00 or 0x01, which will remain unchanged
+	   when bit 7 is cleared.  */
+	.balign	4
+.Lfound0:
+	lsr	r0,r4,8
+	lsr_s	r1,r2
+	bic_s	r2,r2,r0	; get low estimate for r2 and get ...
+	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros>
+	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ...
+	cmp_s	r3,r2		; ... be independent of trailing garbage
+	or_s	r2,r2,r0	; likewise for r3 > r2
+	bic_s	r3,r3,r0
+	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0
+	cmp_s	r2,r3
+	j_s.d	[blink]
+	bset.lo	r0,r0,31
+#endif /* ENDIAN */
+
+	.balign	4
+.Lcharloop:
+	ldb.ab	r2,[r0,1]
+	ldb.ab	r3,[r1,1]
+	nop_s
+	breq	r2,0,.Lcmpend
+	breq	r2,r3,.Lcharloop
+.Lcmpend:
+	j_s.d	[blink]
+	sub	r0,r2,r3
+ARC_EXIT strcmp
diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S
new file mode 100644
index 0000000..b7ca4ae
--- /dev/null
+++ b/arch/arc/lib/strcpy-700.S
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* If dst and src are 4 byte aligned, copy 8 bytes at a time.
+   If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
+   it 8 byte aligned.  Thus, we can do a little read-ahead, without
+   dereferencing a cache line that we should not touch.
+   Note that short and long instructions have been scheduled to avoid
+   branch stalls.
+   The beq_s to r3z could be made unaligned & long to avoid a stall
+   there, but the it is not likely to be taken often, and it
+   would also be likey to cost an unaligned mispredict at the next call.  */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strcpy
+	or	r2,r0,r1
+	bmsk_s	r2,r2,1
+	brne.d	r2,0,charloop
+	mov_s	r10,r0
+	ld_s	r3,[r1,0]
+	mov	r8,0x01010101
+	bbit0.d	r1,2,loop_start
+	ror	r12,r8
+	sub	r2,r3,r8
+	bic_s	r2,r2,r3
+	tst_s	r2,r12
+	bne	r3z
+	mov_s	r4,r3
+	.balign 4
+loop:
+	ld.a	r3,[r1,4]
+	st.ab	r4,[r10,4]
+loop_start:
+	ld.a	r4,[r1,4]
+	sub	r2,r3,r8
+	bic_s	r2,r2,r3
+	tst_s	r2,r12
+	bne_s	r3z
+	st.ab	r3,[r10,4]
+	sub	r2,r4,r8
+	bic	r2,r2,r4
+	tst	r2,r12
+	beq	loop
+	mov_s	r3,r4
+#ifdef __LITTLE_ENDIAN__
+r3z:	bmsk.f	r1,r3,7
+	lsr_s	r3,r3,8
+#else
+r3z:	lsr.f	r1,r3,24
+	asl_s	r3,r3,8
+#endif
+	bne.d	r3z
+	stb.ab	r1,[r10,1]
+	j_s	[blink]
+
+	.balign	4
+charloop:
+	ldb.ab	r3,[r1,1]
+
+
+	brne.d	r3,0,charloop
+	stb.ab	r3,[r10,1]
+	j	[blink]
+ARC_EXIT strcpy
diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S
new file mode 100644
index 0000000..39759e0
--- /dev/null
+++ b/arch/arc/lib/strlen.S
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strlen
+	or	r3,r0,7
+	ld	r2,[r3,-7]
+	ld.a	r6,[r3,-3]
+	mov	r4,0x01010101
+	; uses long immediate
+#ifdef __LITTLE_ENDIAN__
+	asl_s	r1,r0,3
+	btst_s	r0,2
+	asl	r7,r4,r1
+	ror	r5,r4
+	sub	r1,r2,r7
+	bic_s	r1,r1,r2
+	mov.eq	r7,r4
+	sub	r12,r6,r7
+	bic	r12,r12,r6
+	or.eq	r12,r12,r1
+	and	r12,r12,r5
+	brne	r12,0,.Learly_end
+#else /* BIG ENDIAN */
+	ror	r5,r4
+	btst_s	r0,2
+	mov_s	r1,31
+	sub3	r7,r1,r0
+	sub	r1,r2,r4
+	bic_s	r1,r1,r2
+	bmsk	r1,r1,r7
+	sub	r12,r6,r4
+	bic	r12,r12,r6
+	bmsk.ne	r12,r12,r7
+	or.eq	r12,r12,r1
+	and	r12,r12,r5
+	brne	r12,0,.Learly_end
+#endif /* ENDIAN */
+
+.Loop:
+	ld_s	r2,[r3,4]
+	ld.a	r6,[r3,8]
+	; stall for load result
+	sub	r1,r2,r4
+	bic_s	r1,r1,r2
+	sub	r12,r6,r4
+	bic	r12,r12,r6
+	or	r12,r12,r1
+	and	r12,r12,r5
+	breq r12,0,.Loop
+.Lend:
+	and.f	r1,r1,r5
+	sub.ne	r3,r3,4
+	mov.eq	r1,r12
+#ifdef __LITTLE_ENDIAN__
+	sub_s	r2,r1,1
+	bic_s	r2,r2,r1
+	norm	r1,r2
+	sub_s	r0,r0,3
+	lsr_s	r1,r1,3
+	sub	    r0,r3,r0
+	j_s.d	[blink]
+	sub	    r0,r0,r1
+#else /* BIG ENDIAN */
+	lsr_s	r1,r1,7
+	mov.eq	r2,r6
+	bic_s	r1,r1,r2
+	norm	r1,r1
+	sub	    r0,r3,r0
+	lsr_s	r1,r1,3
+	j_s.d	[blink]
+	add	    r0,r0,r1
+#endif /* ENDIAN */
+.Learly_end:
+	b.d	.Lend
+	sub_s.ne r1,r1,r1
+ARC_EXIT strlen
-- 
1.8.5.3



More information about the U-Boot mailing list