[U-Boot] [PATCH v4 3/9] arc: add library functions
    Alexey Brodkin 
    Alexey.Brodkin at synopsys.com
       
    Tue Feb  4 09:56:15 CET 2014
    
    
  
These are library functions used by ARC700 architecture.
Signed-off-by: Alexey Brodkin <abrodkin at synopsys.com>
Cc: Vineet Gupta <vgupta at synopsys.com>
Cc: Francois Bedard <fbedard at synopsys.com>
Cc: Wolfgang Denk <wd at denx.de>
Cc: Heiko Schocher <hs at denx.de>
---
No changes for v4.
 arch/arc/lib/Makefile     |  16 ++++++
 arch/arc/lib/bootm.c      | 106 ++++++++++++++++++++++++++++++++++
 arch/arc/lib/memcmp.S     | 121 +++++++++++++++++++++++++++++++++++++++
 arch/arc/lib/memcpy-700.S |  63 +++++++++++++++++++++
 arch/arc/lib/memset.S     |  62 ++++++++++++++++++++
 arch/arc/lib/relocate.c   |  72 +++++++++++++++++++++++
 arch/arc/lib/sections.c   |  21 +++++++
 arch/arc/lib/strchr-700.S | 141 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/arc/lib/strcmp.S     |  97 +++++++++++++++++++++++++++++++
 arch/arc/lib/strcpy-700.S |  67 ++++++++++++++++++++++
 arch/arc/lib/strlen.S     |  80 ++++++++++++++++++++++++++
 11 files changed, 846 insertions(+)
 create mode 100644 arch/arc/lib/Makefile
 create mode 100644 arch/arc/lib/bootm.c
 create mode 100644 arch/arc/lib/memcmp.S
 create mode 100644 arch/arc/lib/memcpy-700.S
 create mode 100644 arch/arc/lib/memset.S
 create mode 100644 arch/arc/lib/relocate.c
 create mode 100644 arch/arc/lib/sections.c
 create mode 100644 arch/arc/lib/strchr-700.S
 create mode 100644 arch/arc/lib/strcmp.S
 create mode 100644 arch/arc/lib/strcpy-700.S
 create mode 100644 arch/arc/lib/strlen.S
diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile
new file mode 100644
index 0000000..7675f85
--- /dev/null
+++ b/arch/arc/lib/Makefile
@@ -0,0 +1,16 @@
+#
+# Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
+#
+# SPDX-License-Identifier:	GPL-2.0+
+#
+
+obj-y += sections.o
+obj-y += relocate.o
+obj-y += strchr-700.o
+obj-y += strcmp.o
+obj-y += strcpy-700.o
+obj-y += strlen.o
+obj-y += memcmp.o
+obj-y += memcpy-700.o
+obj-y += memset.o
+obj-$(CONFIG_CMD_BOOTM) += bootm.o
diff --git a/arch/arc/lib/bootm.c b/arch/arc/lib/bootm.c
new file mode 100644
index 0000000..d185a50
--- /dev/null
+++ b/arch/arc/lib/bootm.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+static ulong get_sp(void)
+{
+	ulong ret;
+
+	asm("mov %0, sp" : "=r"(ret) : );
+	return ret;
+}
+
+void arch_lmb_reserve(struct lmb *lmb)
+{
+	ulong sp;
+
+	/*
+	 * Booting a (Linux) kernel image
+	 *
+	 * Allocate space for command line and board info - the
+	 * address should be as high as possible within the reach of
+	 * the kernel (see CONFIG_SYS_BOOTMAPSZ settings), but in unused
+	 * memory, which means far enough below the current stack
+	 * pointer.
+	 */
+	sp = get_sp();
+	debug("## Current stack ends at 0x%08lx ", sp);
+
+	/* adjust sp by 4K to be safe */
+	sp -= 4096;
+	lmb_reserve(lmb, sp, (CONFIG_SYS_SDRAM_BASE + gd->ram_size - sp));
+}
+
+static int cleanup_before_linux(void)
+{
+	disable_interrupts();
+	flush_dcache_all();
+	invalidate_icache_all();
+
+	return 0;
+}
+
+/* Subcommand: PREP */
+static void boot_prep_linux(bootm_headers_t *images)
+{
+	if (image_setup_linux(images))
+		hang();
+}
+
+/* Subcommand: GO */
+static void boot_jump_linux(bootm_headers_t *images, int flag)
+{
+	void (*kernel_entry)(int zero, int arch, uint params);
+	unsigned int r0, r2;
+	int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
+
+	kernel_entry = (void (*)(int, int, uint))images->ep;
+
+	debug("## Transferring control to Linux (at address %08lx)...\n",
+	      (ulong) kernel_entry);
+	bootstage_mark(BOOTSTAGE_ID_RUN_OS);
+
+	printf("\nStarting kernel ...%s\n\n", fake ?
+	       "(fake run for tracing)" : "");
+	bootstage_mark_name(BOOTSTAGE_ID_BOOTM_HANDOFF, "start_kernel");
+
+	cleanup_before_linux();
+
+	if (IMAGE_ENABLE_OF_LIBFDT && images->ft_len) {
+		r0 = 2;
+		r2 = (unsigned int)images->ft_addr;
+	} else {
+		r0 = 1;
+		r2 = (unsigned int)getenv("bootargs");
+	}
+
+	if (!fake)
+		kernel_entry(r0, 0, r2);
+}
+
+int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images)
+{
+	/* No need for those on ARC */
+	if ((flag & BOOTM_STATE_OS_BD_T) || (flag & BOOTM_STATE_OS_CMDLINE))
+		return -1;
+
+	if (flag & BOOTM_STATE_OS_PREP) {
+		boot_prep_linux(images);
+		return 0;
+	}
+
+	if (flag & (BOOTM_STATE_OS_GO | BOOTM_STATE_OS_FAKE_GO)) {
+		boot_jump_linux(images, flag);
+		return 0;
+	}
+
+	boot_prep_linux(images);
+	boot_jump_linux(images, flag);
+	return 0;
+}
diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S
new file mode 100644
index 0000000..fa5aac5
--- /dev/null
+++ b/arch/arc/lib/memcmp.S
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifdef __LITTLE_ENDIAN__
+#define WORD2 r2
+#define SHIFT r3
+#else /* __BIG_ENDIAN__ */
+#define WORD2 r3
+#define SHIFT r2
+#endif /* _ENDIAN__ */
+
+.global memcmp
+.align 4
+memcmp:
+	or	%r12, %r0, %r1
+	asl_s	%r12, %r12, 30
+	sub	%r3, %r2, 1
+	brls	%r2, %r12, .Lbytewise
+	ld	%r4, [%r0, 0]
+	ld	%r5, [%r1, 0]
+	lsr.f	%lp_count, %r3, 3
+	lpne	.Loop_end
+	ld_s	WORD2, [%r0, 4]
+	ld_s	%r12, [%r1, 4]
+	brne	%r4, %r5, .Leven
+	ld.a	%r4, [%r0, 8]
+	ld.a	%r5, [%r1, 8]
+	brne	WORD2, %r12, .Lodd
+.Loop_end:
+	asl_s	SHIFT, SHIFT, 3
+	bhs_s	.Last_cmp
+	brne	%r4, %r5, .Leven
+	ld	%r4, [%r0, 4]
+	ld	%r5, [%r1, 4]
+#ifdef __LITTLE_ENDIAN__
+	nop_s
+	/* one more load latency cycle */
+.Last_cmp:
+	xor	%r0, %r4, %r5
+	bset	%r0, %r0, SHIFT
+	sub_s	%r1, %r0, 1
+	bic_s	%r1, %r1, %r0
+	norm	%r1, %r1
+	b.d	.Leven_cmp
+	and	%r1, %r1, 24
+.Leven:
+	xor	%r0, %r4, %r5
+	sub_s	%r1, %r0, 1
+	bic_s	%r1, %r1, %r0
+	norm	%r1, %r1
+	/* slow track insn */
+	and	%r1, %r1, 24
+.Leven_cmp:
+	asl	%r2, %r4, %r1
+	asl	%r12, %r5, %r1
+	lsr_s	%r2, %r2, 1
+	lsr_s	%r12, %r12, 1
+	j_s.d	[%blink]
+	sub	%r0, %r2, %r12
+	.balign	4
+.Lodd:
+	xor	%r0, WORD2, %r12
+	sub_s	%r1, %r0, 1
+	bic_s	%r1, %r1, %r0
+	norm	%r1, %r1
+	/* slow track insn */
+	and	%r1, %r1, 24
+	asl_s	%r2, %r2, %r1
+	asl_s	%r12, %r12, %r1
+	lsr_s	%r2, %r2, 1
+	lsr_s	%r12, %r12, 1
+	j_s.d	[%blink]
+	sub	%r0, %r2, %r12
+#else /* __BIG_ENDIAN__ */
+.Last_cmp:
+	neg_s	SHIFT, SHIFT
+	lsr	%r4, %r4, SHIFT
+	lsr	%r5, %r5, SHIFT
+	/* slow track insn */
+.Leven:
+	sub.f	%r0, %r4, %r5
+	mov.ne	%r0, 1
+	j_s.d	[%blink]
+	bset.cs	%r0, %r0, 31
+.Lodd:
+	cmp_s	WORD2, %r12
+
+	mov_s	%r0, 1
+	j_s.d	[%blink]
+	bset.cs	%r0, %r0, 31
+#endif /* _ENDIAN__ */
+	.balign	4
+.Lbytewise:
+	breq	%r2, 0, .Lnil
+	ldb	%r4, [%r0, 0]
+	ldb	%r5, [%r1, 0]
+	lsr.f	%lp_count, %r3
+	lpne	.Lbyte_end
+	ldb_s	%r3, [%r0, 1]
+	ldb	%r12, [%r1, 1]
+	brne	%r4, %r5, .Lbyte_even
+	ldb.a	%r4, [%r0, 2]
+	ldb.a	%r5, [%r1, 2]
+	brne	%r3, %r12, .Lbyte_odd
+.Lbyte_end:
+	bcc	.Lbyte_even
+	brne	%r4, %r5, .Lbyte_even
+	ldb_s	%r3, [%r0, 1]
+	ldb_s	%r12, [%r1, 1]
+.Lbyte_odd:
+	j_s.d	[%blink]
+	sub	%r0, %r3, %r12
+.Lbyte_even:
+	j_s.d	[%blink]
+	sub	%r0, %r4, %r5
+.Lnil:
+	j_s.d	[%blink]
+	mov	%r0, 0
diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S
new file mode 100644
index 0000000..51dd73a
--- /dev/null
+++ b/arch/arc/lib/memcpy-700.S
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+.global memcpy
+.align 4
+memcpy:
+	or	%r3, %r0, %r1
+	asl_s	%r3, %r3, 30
+	mov_s	%r5, %r0
+	brls.d	%r2, %r3, .Lcopy_bytewise
+	sub.f	%r3, %r2, 1
+	ld_s	%r12, [%r1, 0]
+	asr.f	%lp_count, %r3, 3
+	bbit0.d	%r3, 2, .Lnox4
+	bmsk_s	%r2, %r2, 1
+	st.ab	%r12, [%r5, 4]
+	ld.a	%r12, [%r1, 4]
+.Lnox4:
+	lppnz	.Lendloop
+	ld_s	%r3, [%r1, 4]
+	st.ab	%r12, [%r5, 4]
+	ld.a	%r12, [%r1, 8]
+	st.ab	%r3, [%r5, 4]
+.Lendloop:
+	breq	%r2, 0, .Last_store
+	ld	%r3, [%r5, 0]
+#ifdef __LITTLE_ENDIAN__
+	add3	%r2, -1, %r2
+	/* uses long immediate */
+	xor_s	%r12, %r12, %r3
+	bmsk	%r12, %r12, %r2
+	xor_s	%r12, %r12, %r3
+#else /* __BIG_ENDIAN__ */
+	sub3	%r2, 31, %r2
+	/* uses long immediate */
+	xor_s	%r3, %r3, %r12
+	bmsk	%r3, %r3, %r2
+	xor_s	%r12, %r12, %r3
+#endif /* _ENDIAN__ */
+.Last_store:
+	j_s.d	[%blink]
+	st	%r12, [%r5, 0]
+
+	.balign	4
+.Lcopy_bytewise:
+	jcs	[%blink]
+	ldb_s	%r12, [%r1, 0]
+	lsr.f	%lp_count, %r3
+	bhs_s	.Lnox1
+	stb.ab	%r12, [%r5, 1]
+	ldb.a	%r12, [%r1, 1]
+.Lnox1:
+	lppnz	.Lendbloop
+	ldb_s	%r3, [%r1, 1]
+	stb.ab	%r12, [%r5, 1]
+	ldb.a	%r12, [%r1, 2]
+	stb.ab	%r3, [%r5, 1]
+.Lendbloop:
+	j_s.d	[%blink]
+	stb	%r12, [%r5, 0]
diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S
new file mode 100644
index 0000000..017e8af
--- /dev/null
+++ b/arch/arc/lib/memset.S
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#define SMALL	7 /* Must be at least 6 to deal with alignment/loop issues.  */
+
+.global memset
+.align 4
+memset:
+	mov_s	%r4, %r0
+	or	%r12, %r0, %r2
+	bmsk.f	%r12, %r12, 1
+	extb_s	%r1, %r1
+	asl	%r3, %r1, 8
+	beq.d	.Laligned
+	or_s	%r1, %r1, %r3
+	brls	%r2, SMALL, .Ltiny
+	add	%r3, %r2, %r0
+	stb	%r1, [%r3, -1]
+	bclr_s	%r3, %r3, 0
+	stw	%r1, [%r3, -2]
+	bmsk.f	%r12, %r0, 1
+	add_s	%r2, %r2, %r12
+	sub.ne	%r2, %r2, 4
+	stb.ab	%r1, [%r4, 1]
+	and	%r4, %r4, -2
+	stw.ab	%r1, [%r4, 2]
+	and	%r4, %r4, -4
+
+	.balign	4
+.Laligned:
+	asl	%r3, %r1, 16
+	lsr.f	%lp_count, %r2, 2
+	or_s	%r1, %r1, %r3
+	lpne	.Loop_end
+	st.ab	%r1, [%r4, 4]
+.Loop_end:
+	j_s	[%blink]
+
+	.balign	4
+.Ltiny:
+	mov.f	%lp_count, %r2
+	lpne	.Ltiny_end
+	stb.ab	%r1, [%r4, 1]
+.Ltiny_end:
+	j_s	[%blink]
+
+/*
+ * memzero: @r0 = mem, @r1 = size_t
+ * memset:  @r0 = mem, @r1 = char, @r2 = size_t
+ */
+
+.global memzero
+.align 4
+memzero:
+	/* adjust bzero args to memset args */
+	mov	%r2, %r1
+	mov	%r1, 0
+	/* tail call so need to tinker with blink */
+	b	memset
diff --git a/arch/arc/lib/relocate.c b/arch/arc/lib/relocate.c
new file mode 100644
index 0000000..956aa14
--- /dev/null
+++ b/arch/arc/lib/relocate.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <elf.h>
+#include <asm/sections.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+/*
+ * Base functionality is taken from x86 version with added ARC-specifics
+ */
+int do_elf_reloc_fixups(void)
+{
+	Elf32_Rela *re_src = (Elf32_Rela *)(&__rel_dyn_start);
+	Elf32_Rela *re_end = (Elf32_Rela *)(&__rel_dyn_end);
+
+	Elf32_Addr *offset_ptr_rom, *last_offset = NULL;
+	Elf32_Addr *offset_ptr_ram;
+
+	do {
+		/* Get the location from the relocation entry */
+		offset_ptr_rom = (Elf32_Addr *)re_src->r_offset;
+
+		/* Check that the location of the relocation is in .text */
+		if (offset_ptr_rom >= (Elf32_Addr *)CONFIG_SYS_TEXT_BASE &&
+		    offset_ptr_rom > last_offset) {
+			unsigned int val;
+			/* Switch to the in-RAM version */
+			offset_ptr_ram = (Elf32_Addr *)((ulong)offset_ptr_rom +
+							gd->reloc_off);
+
+			/*
+			 * Use "memcpy" because target location might be
+			 * 16-bit aligned on ARC so we may need to read
+			 * byte-by-byte. On attempt to read entire word by
+			 * CPU throws an exception
+			 */
+			memcpy(&val, offset_ptr_ram, sizeof(int));
+
+			/* If location in ".text" section swap value */
+			if ((unsigned int)offset_ptr_rom <
+			    (unsigned int)&__text_end)
+				val = (val << 16) | (val >> 16);
+
+			/* Check that the target points into .text */
+			if (val >= CONFIG_SYS_TEXT_BASE && val <=
+			    (unsigned int)&__bss_end) {
+				val += gd->reloc_off;
+				/* If location in ".text" section swap value */
+				if ((unsigned int)offset_ptr_rom <
+				    (unsigned int)&__text_end)
+					val = (val << 16) | (val >> 16);
+				memcpy(offset_ptr_ram, &val, sizeof(int));
+			} else {
+				debug("   %p: rom reloc %x, ram %p, value %x, limit %x\n",
+				      re_src, re_src->r_offset, offset_ptr_ram,
+				      val, (unsigned int)&__bss_end);
+			}
+		} else {
+			debug("   %p: rom reloc %x, last %p\n", re_src,
+			      re_src->r_offset, last_offset);
+		}
+		last_offset = offset_ptr_rom;
+
+	} while (++re_src < re_end);
+
+	return 0;
+}
diff --git a/arch/arc/lib/sections.c b/arch/arc/lib/sections.c
new file mode 100644
index 0000000..b0b46a4
--- /dev/null
+++ b/arch/arc/lib/sections.c
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+/*
+ * For some reason linker sets linker-generated symbols to zero in PIE mode.
+ * A work-around is substitution of linker-generated symbols with
+ * compiler-generated symbols which are properly handled by linker in PAE mode.
+ */
+
+char __bss_start[0] __attribute__((section(".__bss_start")));
+char __bss_end[0] __attribute__((section(".__bss_end")));
+char __image_copy_start[0] __attribute__((section(".__image_copy_start")));
+char __image_copy_end[0] __attribute__((section(".__image_copy_end")));
+char __rel_dyn_start[0] __attribute__((section(".__rel_dyn_start")));
+char __rel_dyn_end[0] __attribute__((section(".__rel_dyn_end")));
+char __text_start[0] __attribute__((section(".__text_start")));
+char __text_end[0] __attribute__((section(".__text_end")));
+char __init_end[0] __attribute__((section(".__init_end")));
diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S
new file mode 100644
index 0000000..55fcc9f
--- /dev/null
+++ b/arch/arc/lib/strchr-700.S
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+/*
+ * ARC700 has a relatively long pipeline and branch prediction, so we want
+ * to avoid branches that are hard to predict.  On the other hand, the
+ * presence of the norm instruction makes it easier to operate on whole
+ * words branch-free.
+ */
+
+.global strchr
+.align 4
+strchr:
+	extb_s	%r1, %r1
+	asl	%r5, %r1, 8
+	bmsk	%r2, %r0, 1
+	or	%r5, %r5, %r1
+	mov_s	%r3, 0x01010101
+	breq.d	%r2, %r0, .Laligned
+	asl	%r4, %r5, 16
+	sub_s	%r0, %r0, %r2
+	asl	%r7, %r2, 3
+	ld_s	%r2, [%r0]
+#ifdef __LITTLE_ENDIAN__
+	asl	%r7, %r3, %r7
+#else /* __BIG_ENDIAN__ */
+	lsr	%r7, %r3, %r7
+#endif /* _ENDIAN__ */
+	or	%r5, %r5, %r4
+	ror	%r4, %r3
+	sub	%r12, %r2, %r7
+	bic_s	%r12, %r12, %r2
+	and	%r12, %r12, %r4
+	brne.d	%r12, 0, .Lfound0_ua
+	xor	%r6, %r2, %r5
+	ld.a	%r2, [%r0, 4]
+	sub	%r12, %r6, %r7
+	bic	%r12, %r12, %r6
+#ifdef __LITTLE_ENDIAN__
+	and	%r7, %r12, %r4
+	/* For speed, we want this branch to be unaligned. */
+	breq	%r7, 0, .Loop
+	/* Likewise this one */
+	b	.Lfound_char
+#else /* __BIG_ENDIAN__ */
+	and	%r12, %r12, %r4
+	/* For speed, we want this branch to be unaligned. */
+	breq	%r12, 0, .Loop
+	lsr_s	%r12, %r12, 7
+	bic 	%r2, %r7, %r6
+	b.d	.Lfound_char_b
+	and_s	%r2, %r2, %r12
+#endif /* _ENDIAN__ */
+	/* We require this code address to be unaligned for speed...  */
+.Laligned:
+	ld_s	%r2, [%r0]
+	or	%r5, %r5, %r4
+	ror	%r4, %r3
+	/* ... so that this code address is aligned, for itself and ...  */
+.Loop:
+	sub	%r12, %r2, %r3
+	bic_s	%r12, %r12, %r2
+	and	%r12, %r12, %r4
+	brne.d	%r12, 0, .Lfound0
+	xor	%r6, %r2, %r5
+	ld.a	%r2, [%r0, 4]
+	sub	%r12, %r6, %r3
+	bic	%r12, %r12, %r6
+	and	%r7, %r12, %r4
+	breq	%r7, 0, .Loop
+	/*
+	 *... so that this branch is unaligned.
+	 * Found searched-for character.
+	 * r0 has already advanced to next word.
+	 */
+#ifdef __LITTLE_ENDIAN__
+	/*
+	 * We only need the information about the first matching byte
+	 * (i.e. the least significant matching byte) to be exact,
+	 * hence there is no problem with carry effects.
+	 */
+.Lfound_char:
+	sub	%r3, %r7, 1
+	bic	%r3, %r3, %r7
+	norm	%r2, %r3
+	sub_s	%r0, %r0, 1
+	asr_s	%r2, %r2, 3
+	j.d	[%blink]
+	sub_s	%r0, %r0, %r2
+
+	.balign	4
+.Lfound0_ua:
+	mov	%r3, %r7
+.Lfound0:
+	sub	%r3, %r6, %r3
+	bic	%r3, %r3, %r6
+	and	%r2, %r3, %r4
+	or_s	%r12, %r12, %r2
+	sub_s	%r3, %r12, 1
+	bic_s	%r3, %r3, %r12
+	norm	%r3, %r3
+	add_s	%r0, %r0, 3
+	asr_s	%r12, %r3, 3
+	asl.f	0, %r2, %r3
+	sub_s	%r0, %r0, %r12
+	j_s.d	[%blink]
+	mov.pl	%r0, 0
+#else /* __BIG_ENDIAN__ */
+.Lfound_char:
+	lsr	%r7, %r7, 7
+
+	bic	%r2, %r7, %r6
+.Lfound_char_b:
+	norm	%r2, %r2
+	sub_s	%r0, %r0, 4
+	asr_s	%r2, %r2, 3
+	j.d	[%blink]
+	add_s	%r0, %r0, %r2
+
+.Lfound0_ua:
+	mov_s	%r3, %r7
+.Lfound0:
+	asl_s	%r2, %r2, 7
+	or	%r7, %r6, %r4
+	bic_s	%r12, %r12, %r2
+	sub	%r2, %r7, %r3
+	or	%r2, %r2, %r6
+	bic	%r12, %r2, %r12
+	bic.f	%r3, %r4, %r12
+	norm	%r3, %r3
+
+	add.pl	%r3, %r3, 1
+	asr_s	%r12, %r3, 3
+	asl.f	0, %r2, %r3
+	add_s	%r0, %r0, %r12
+	j_s.d	[%blink]
+	mov.mi	%r0, 0
+#endif /* _ENDIAN__ */
diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S
new file mode 100644
index 0000000..8cb7d2f
--- /dev/null
+++ b/arch/arc/lib/strcmp.S
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+/*
+ * This is optimized primarily for the ARC700.
+ * It would be possible to speed up the loops by one cycle / word
+ * respective one cycle / byte by forcing double source 1 alignment, unrolling
+ * by a factor of two, and speculatively loading the second word / byte of
+ * source 1; however, that would increase the overhead for loop setup / finish,
+ * and strcmp might often terminate early.
+ */
+
+.global strcmp
+.align 4
+strcmp:
+	or	%r2, %r0, %r1
+	bmsk_s	%r2, %r2, 1
+	brne	%r2, 0, .Lcharloop
+	mov_s	%r12, 0x01010101
+	ror	%r5, %r12
+.Lwordloop:
+	ld.ab	%r2, [%r0, 4]
+	ld.ab	%r3, [%r1, 4]
+	nop_s
+	sub	%r4, %r2, %r12
+	bic	%r4, %r4, %r2
+	and	%r4, %r4, %r5
+	brne	%r4, 0, .Lfound0
+	breq	%r2 ,%r3, .Lwordloop
+#ifdef	__LITTLE_ENDIAN__
+	xor	%r0, %r2, %r3	/* mask for difference */
+	sub_s	%r1, %r0, 1
+	bic_s	%r0, %r0, %r1	/* mask for least significant difference bit */
+	sub	%r1, %r5, %r0
+	xor	%r0, %r5, %r1	/* mask for least significant difference byte */
+	and_s	%r2, %r2, %r0
+	and_s	%r3, %r3, %r0
+#endif /* _ENDIAN__ */
+	cmp_s	%r2, %r3
+	mov_s	%r0, 1
+	j_s.d	[%blink]
+	bset.lo	%r0, %r0, 31
+
+	.balign	4
+#ifdef __LITTLE_ENDIAN__
+.Lfound0:
+	xor	%r0, %r2, %r3	/* mask for difference */
+	or	%r0, %r0, %r4	/* or in zero indicator */
+	sub_s	%r1, %r0, 1
+	bic_s	%r0, %r0, %r1	/* mask for least significant difference bit */
+	sub	%r1, %r5, %r0
+	xor	%r0, %r5, %r1	/* mask for least significant difference byte */
+	and_s	%r2, %r2, %r0
+	and_s	%r3, %r3, %r0
+	sub.f	%r0, %r2, %r3
+	mov.hi	%r0, 1
+	j_s.d	[%blink]
+	bset.lo	%r0, %r0, 31
+#else /* __BIG_ENDIAN__ */
+	/*
+	 * The zero-detection above can mis-detect 0x01 bytes as zeroes
+	 * because of carry-propagateion from a lower significant zero byte.
+	 * We can compensate for this by checking that bit0 is zero.
+	 * This compensation is not necessary in the step where we
+	 * get a low estimate for r2, because in any affected bytes
+	 * we already have 0x00 or 0x01, which will remain unchanged
+	 * when bit 7 is cleared.
+	 */
+	.balign	4
+.Lfound0:
+	lsr	%r0, %r4, 8
+	lsr_s	%r1, %r2
+	bic_s	%r2, %r2, %r0	/* get low estimate for r2 and get ... */
+	bic_s	%r0, %r0, %r1	/* <this is the adjusted mask for zeros> */
+	or_s	%r3, %r3, %r0	/* ... high estimate r3 so that r2 > r3 will */
+	cmp_s	%r3, %r2	/* ... be independent of trailing garbage */
+	or_s	%r2, %r2, %r0	/* likewise for r3 > r2 */
+	bic_s	%r3, %r3, %r0
+	rlc	%r0, 0		/* r0 := r2 > r3 ? 1 : 0 */
+	cmp_s	%r2, %r3
+	j_s.d	[%blink]
+	bset.lo	%r0, %r0, 31
+#endif /* _ENDIAN__ */
+
+	.balign	4
+.Lcharloop:
+	ldb.ab	%r2,[%r0,1]
+	ldb.ab	%r3,[%r1,1]
+	nop_s
+	breq	%r2, 0, .Lcmpend
+	breq	%r2, %r3, .Lcharloop
+.Lcmpend:
+	j_s.d	[%blink]
+	sub	%r0, %r2, %r3
diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S
new file mode 100644
index 0000000..41bb53e
--- /dev/null
+++ b/arch/arc/lib/strcpy-700.S
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+/*
+ * If dst and src are 4 byte aligned, copy 8 bytes at a time.
+ * If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
+ * it 8 byte aligned.  Thus, we can do a little read-ahead, without
+ * dereferencing a cache line that we should not touch.
+ * Note that short and long instructions have been scheduled to avoid
+ * branch stalls.
+ * The beq_s to r3z could be made unaligned & long to avoid a stall
+ * there, but it is not likely to be taken often, and it would also be likely
+ * to cost an unaligned mispredict at the next call.
+ */
+
+.global strcpy
+.align 4
+strcpy:
+	or	%r2, %r0, %r1
+	bmsk_s	%r2, %r2, 1
+	brne.d	%r2, 0, charloop
+	mov_s	%r10, %r0
+	ld_s	%r3, [%r1, 0]
+	mov	%r8, 0x01010101
+	bbit0.d	%r1, 2, loop_start
+	ror	%r12, %r8
+	sub	%r2, %r3, %r8
+	bic_s	%r2, %r2, %r3
+	tst_s	%r2,%r12
+	bne	r3z
+	mov_s	%r4,%r3
+	.balign 4
+loop:
+	ld.a	%r3, [%r1, 4]
+	st.ab	%r4, [%r10, 4]
+loop_start:
+	ld.a	%r4, [%r1, 4]
+	sub	%r2, %r3, %r8
+	bic_s	%r2, %r2, %r3
+	tst_s	%r2, %r12
+	bne_s	r3z
+	st.ab	%r3, [%r10, 4]
+	sub	%r2, %r4, %r8
+	bic	%r2, %r2, %r4
+	tst	%r2, %r12
+	beq	loop
+	mov_s	%r3, %r4
+#ifdef __LITTLE_ENDIAN__
+r3z:	bmsk.f	%r1, %r3, 7
+	lsr_s	%r3, %r3, 8
+#else /* __BIG_ENDIAN__ */
+r3z:	lsr.f	%r1, %r3, 24
+	asl_s	%r3, %r3, 8
+#endif /* _ENDIAN__ */
+	bne.d	r3z
+	stb.ab	%r1, [%r10, 1]
+	j_s	[%blink]
+
+	.balign	4
+charloop:
+	ldb.ab	%r3, [%r1, 1]
+	brne.d	%r3, 0, charloop
+	stb.ab	%r3, [%r10, 1]
+	j	[%blink]
diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S
new file mode 100644
index 0000000..666e22c
--- /dev/null
+++ b/arch/arc/lib/strlen.S
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+.global strlen
+.align 4
+strlen:
+	or	%r3, %r0, 7
+	ld	%r2, [%r3, -7]
+	ld.a	%r6, [%r3, -3]
+	mov	%r4, 0x01010101
+	/* uses long immediate */
+#ifdef __LITTLE_ENDIAN__
+	asl_s	%r1, %r0, 3
+	btst_s	%r0, 2
+	asl	%r7, %r4, %r1
+	ror	%r5, %r4
+	sub	%r1, %r2, %r7
+	bic_s	%r1, %r1, %r2
+	mov.eq	%r7, %r4
+	sub	%r12, %r6, %r7
+	bic	%r12, %r12, %r6
+	or.eq	%r12, %r12, %r1
+	and	%r12, %r12, %r5
+	brne	%r12, 0, .Learly_end
+#else /* __BIG_ENDIAN__ */
+	ror	%r5, %r4
+	btst_s	%r0, 2
+	mov_s	%r1, 31
+	sub3	%r7, %r1, %r0
+	sub	%r1, %r2, %r4
+	bic_s	%r1, %r1, %r2
+	bmsk	%r1, %r1, %r7
+	sub	%r12, %r6, %r4
+	bic	%r12, %r12, %r6
+	bmsk.ne	%r12, %r12, %r7
+	or.eq	%r12, %r12, %r1
+	and	%r12, %r12, %r5
+	brne	%r12, 0, .Learly_end
+#endif /* _ENDIAN__ */
+
+.Loop:
+	ld_s	%r2, [%r3, 4]
+	ld.a	%r6, [%r3, 8]
+	/* stall for load result */
+	sub	%r1, %r2, %r4
+	bic_s	%r1, %r1, %r2
+	sub	%r12, %r6, %r4
+	bic	%r12, %r12, %r6
+	or	%r12, %r12, %r1
+	and	%r12, %r12, %r5
+	breq	%r12, 0, .Loop
+.Lend:
+	and.f	%r1, %r1, %r5
+	sub.ne	%r3, %r3, 4
+	mov.eq	%r1, %r12
+#ifdef __LITTLE_ENDIAN__
+	sub_s	%r2, %r1, 1
+	bic_s	%r2, %r2, %r1
+	norm	%r1, %r2
+	sub_s	%r0, %r0, 3
+	lsr_s	%r1, %r1, 3
+	sub	%r0, %r3, %r0
+	j_s.d	[%blink]
+	sub	%r0, %r0, %r1
+#else /* __BIG_ENDIAN__ */
+	lsr_s	%r1, %r1, 7
+	mov.eq	%r2, %r6
+	bic_s	%r1, %r1, %r2
+	norm	%r1, %r1
+	sub	%r0, %r3, %r0
+	lsr_s	%r1, %r1, 3
+	j_s.d	[%blink]
+	add	%r0, %r0, %r1
+#endif /* _ENDIAN */
+.Learly_end:
+	b.d	.Lend
+	sub_s.ne %r1, %r1, %r1
-- 
1.8.5.3
    
    
More information about the U-Boot
mailing list