[U-Boot] [PATCH V2 1/3] memcpy: copy one word at a time if possible

Alessandro Rubini rubini-list at gnudd.com
Thu Oct 8 20:23:37 CEST 2009


>> That's true, but I think the most important case is lcd scrolling,
>> where it's usually a big power of two -- that's where we had the #ifdef,
>> so the problem was known, I suppose.
> 
> I think the most important case for *you* is lcd scrolling, but for 99%
> of everyone else, it isn't at all:)

Well, its a big memcpy, and it has direct effect on the user. Every
other copy is smaller, or has no interactive value. 

> memcpy() and memset() are used 100 times more often in non-lcd
> related code and most boards don't even have LCDs.

That's true. But it's only a boot loader (I just looked at what Nicolas
Pitre did in the kernel for ARM strcpy and, well....).

So I made some measures (it's one of Pike's rules of programming:

     * Rule 2. Measure. Don't tune for speed until you've measured, and even
       then don't unless one part of the code overwhelms the rest.

)

I booted in u-boot, typed "setenv stdout serial" then "boot", which goes
over the ethernet. Stopped the system after u-boot gave over control to
the kernel. Result: 10412 memcopies so divided (number, length): 

   3941 4
   1583 6
    772 20
      1 46
      1 47
      3 60
   1024 64
      1 815
      1 888
    770 1148
   1543 1480
      1 2283
      1 3836
    770 4096

So I dare say non-power-of-4 is a minority anyways: 1587 calls, 12689 bytes.
i.e. 15.2% of the calls and 0.2% of the data.

Data collected in memory with patch below, used with following line:

od -An -t d4 logfile | awk '{print $4}' | sort -n | uniq -c

diff --git a/include/configs/nhk8815.h b/include/configs/nhk8815.h
index edd698e..a390f28 100644
--- a/include/configs/nhk8815.h
+++ b/include/configs/nhk8815.h
@@ -28,6 +28,8 @@
 
 #include <nomadik.h>
 
+#define CONFIG_MCLOGSIZE (16*1024)
+
 #define CONFIG_ARM926EJS
 #define CONFIG_NOMADIK
 #define CONFIG_NOMADIK_8815	/* cpu variant */
diff --git a/lib_generic/string.c b/lib_generic/string.c
index 5f7aff9..5afa11e 100644
--- a/lib_generic/string.c
+++ b/lib_generic/string.c
@@ -19,6 +19,7 @@
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <malloc.h>
+#include <common.h>
 
 
 #if 0 /* not used - was: #ifndef __HAVE_ARCH_STRNICMP */
@@ -461,11 +462,29 @@ char * bcopy(const char * src, char * dest, int count)
  * You should not use this function to access IO space, use memcpy_toio()
  * or memcpy_fromio() instead.
  */
+
+#ifndef CONFIG_MCLOGSIZE /* if you want to log the memcpy calls, define it */
+#define CONFIG_MCLOGSIZE 0
+#endif
+struct mclog {int idx; void *dst; const void *src; int cnt;};
+static struct mclog mclog[CONFIG_MCLOGSIZE];
+
 void * memcpy(void *dest, const void *src, size_t count)
 {
 	char *d8 = (char *)dest, *s8 = (char *)src;
 	unsigned long *dl = (unsigned long *)dest, *sl = (unsigned long *)src;
 
+	if (CONFIG_MCLOGSIZE) {
+		static int idx;
+		struct mclog *p = mclog + (idx % (CONFIG_MCLOGSIZE ?: 1));
+		if (!idx) printf("memcpy log at %p, size 0x%x\n",
+				 mclog, sizeof(mclog));
+		p->idx = idx++;
+		p->dst = dest;
+		p->src = src;
+		p->cnt = count;
+	}
+
 	/* if all data is aligned (common case), copy a word at a time */
 	if ( (((int)dest | (int)src | count) & (sizeof(long) - 1)) == 0) {
 		count /= sizeof(unsigned long);


More information about the U-Boot mailing list