[U-Boot-Users] [PATCH] MPC83xx platform memory access performance improved
Liu Dave-r63238
DaveLiu at freescale.com
Tue May 30 07:06:21 CEST 2006
Hi Wolfwang,
Here is one patch which can make DMA performance improved 6x,
memory write access performance improved 2.5x. and fixed some bugs.
The change is:
1. Bus pipeline
Set the CSB bus pipeline depth as 4, which makes 4 outstanding
transcation on CSB bus.
2. Memory write
When ECC enable, CPU write with 64bit. It makes DDR bus more effective.
3. DMA performance
DMA transfer with cache line burst read and burst write.
4. ECC test
I fixed some bugs in ECC test code, and enhanced the test code.
5. Platform clean up and bug fixed
I clean up the start.S file, and change the comments which comes
from 826x and 74xx.
The write performance is:
ECC on
write duration: 646 ms
ECC off
write duration: 646 ms
DMA ECC on
DDR init duration: 1338 ms
DMA ECC off
DDR init duration: 968 ms
Best Regards,
Dave
==========
diff -Naur u-boot/board/mpc8349emds/mpc8349emds.c u-boot-new/board/mpc8349emds/mpc8349emds.c
--- u-boot/board/mpc8349emds/mpc8349emds.c 2006-05-26 13:06:44.000000000 +0800
+++ u-boot-new/board/mpc8349emds/mpc8349emds.c 2006-05-30 11:07:00.000000000 +0800
@@ -235,7 +235,7 @@
#else
void sdram_init(void)
{
- put("SDRAM on Local Bus is NOT available!\n");
+ puts("SDRAM on Local Bus is NOT available!\n");
}
#endif
@@ -323,14 +323,24 @@
volatile immap_t *immap = (immap_t *)CFG_IMMRBAR;
volatile ddr8349_t *ddr = &immap->ddr;
volatile u32 val;
- u64 *addr, count, val64;
+ u64 *addr;
+ u32 count;
register u64 *i;
+ u32 ret[2];
+ u32 pattern[2];
+ u32 writeback[2];
+
+ pattern[0] = 0xfedcba98UL;
+ pattern[1] = 0x76543210UL;
+
+ writeback[0] = 0x01234567UL;
+ writeback[1] = 0x89abcdefUL;
if (argc > 4) {
printf ("Usage:\n%s\n", cmdtp->usage);
return 1;
}
-
+
if (argc == 2) {
if (strcmp(argv[1], "status") == 0) {
ecc_print_status();
@@ -343,8 +353,8 @@
ddr->capture_attributes = 0;
return 0;
}
- }
-
+ }
+
if (argc == 3) {
if (strcmp(argv[1], "sbecnt") == 0) {
val = simple_strtoul(argv[2], NULL, 10);
@@ -380,8 +390,8 @@
} else if (strcmp(argv[2], "+mse") == 0) {
val |= ECC_ERROR_DISABLE_MSED;
} else if (strcmp(argv[2], "+all") == 0) {
- val |= (ECC_ERROR_DISABLE_SBED |
- ECC_ERROR_DISABLE_MBED |
+ val |= (ECC_ERROR_DISABLE_SBED |
+ ECC_ERROR_DISABLE_MBED |
ECC_ERROR_DISABLE_MSED);
} else if (strcmp(argv[2], "-sbe") == 0) {
val &= ~ECC_ERROR_DISABLE_SBED;
@@ -390,8 +400,8 @@
} else if (strcmp(argv[2], "-mse") == 0) {
val &= ~ECC_ERROR_DISABLE_MSED;
} else if (strcmp(argv[2], "-all") == 0) {
- val &= ~(ECC_ERROR_DISABLE_SBED |
- ECC_ERROR_DISABLE_MBED |
+ val &= ~(ECC_ERROR_DISABLE_SBED |
+ ECC_ERROR_DISABLE_MBED |
ECC_ERROR_DISABLE_MSED);
} else {
printf("Incorrect err_disable field\n");
@@ -475,7 +485,7 @@
}
if (argc == 4) {
- if (strcmp(argv[1], "test") == 0) {
+ if (strcmp(argv[1], "testdw") == 0) {
addr = (u64 *)simple_strtoul(argv[2], NULL, 16);
count = simple_strtoul(argv[3], NULL, 16);
@@ -485,16 +495,16 @@
}
disable_interrupts();
- icache_disable();
for (i = addr; i < addr + count; i++) {
+
/* enable injects */
ddr->ecc_err_inject |= ECC_ERR_INJECT_EIEN;
__asm__ __volatile__ ("sync");
__asm__ __volatile__ ("isync");
/* write memory location injecting errors */
- *i = 0x1122334455667788ULL;
+ ppcDWstore((u32*)i, pattern);
__asm__ __volatile__ ("sync");
/* disable injects */
@@ -503,30 +513,65 @@
__asm__ __volatile__ ("isync");
/* read data, this generates ECC error */
- val64 = *i;
+ ppcDWload((u32*)i, ret);
__asm__ __volatile__ ("sync");
- /* disable errors for ECC */
- ddr->err_disable |= ~ECC_ERROR_ENABLE;
+ /* re-initialize memory, double word write the location again,
+ * generates new ECC code this time */
+ ppcDWstore((u32*)i, writeback);
+ __asm__ __volatile__ ("sync");
+
+ }
+
+ enable_interrupts();
+
+ return 0;
+ }
+
+ if (strcmp(argv[1], "testword") == 0) {
+ addr = (u64 *)simple_strtoul(argv[2], NULL, 16);
+ count = simple_strtoul(argv[3], NULL, 16);
+
+ if ((u32)addr % 8) {
+ printf("Address not alligned on double word boundary\n");
+ return 1;
+ }
+
+ disable_interrupts();
+
+ for (i = addr; i < addr + count; i++) {
+
+ /* enable injects */
+ ddr->ecc_err_inject |= ECC_ERR_INJECT_EIEN;
__asm__ __volatile__ ("sync");
__asm__ __volatile__ ("isync");
- /* re-initialize memory, write the location again
- * NOT injecting errors this time */
- *i = 0xcafecafecafecafeULL;
+ /* write memory location injecting errors */
+ *(u32*)i = 0xfedcba98UL;
+ __asm__ __volatile__ ("sync");
+
+ /* sub double word write, bus will read-modify-write, generates ECC error */
+ *((u32*)i+1) = 0x76543210UL;
__asm__ __volatile__ ("sync");
- /* enable errors for ECC */
- ddr->err_disable &= ECC_ERROR_ENABLE;
+ /* disable injects */
+ ddr->ecc_err_inject &= ~ECC_ERR_INJECT_EIEN;
__asm__ __volatile__ ("sync");
__asm__ __volatile__ ("isync");
- }
- icache_enable();
+ /* re-initialize memory, double word write the location again,
+ * generates new ECC code this time */
+ ppcDWstore((u32*)i, writeback);
+ __asm__ __volatile__ ("sync");
+
+ }
+
enable_interrupts();
return 0;
}
+
+
}
printf ("Usage:\n%s\n", cmdtp->usage);
@@ -556,11 +601,17 @@
"ecc injectecc <ecc> - set ECC Error Injection Mask\n"
"ecc inject <en|dis> - enable/disable error injection\n"
"ecc mirror <en|dis> - enable/disable mirror byte\n"
- "ecc test <addr> <cnt> - test mem region:\n"
+ "ecc testdw <addr> <cnt> - test mem region with double word access:\n"
+ " - enables injects\n"
+ " - writes pattern injecting errors with double word access\n"
+ " - disables injects\n"
+ " - reads pattern back with double word access, generates error\n"
+ " - re-inits memory\n"
+ "ecc testword <addr> <cnt> - test mem region with word access:\n"
" - enables injects\n"
- " - writes pattern injecting errors\n"
+ " - writes pattern injecting errors with word access\n"
+ " - writes pattern with word access, generates error\n"
" - disables injects\n"
- " - reads pattern back, generates error\n"
" - re-inits memory"
);
#endif /* if defined(CONFIG_DDR_ECC) && defined(CONFIG_DDR_ECC_CMD) */
diff -Naur u-boot/cpu/mpc83xx/cpu.c u-boot-new/cpu/mpc83xx/cpu.c
--- u-boot/cpu/mpc83xx/cpu.c 2006-05-26 13:06:48.000000000 +0800
+++ u-boot-new/cpu/mpc83xx/cpu.c 2006-05-29 19:10:08.000000000 +0800
@@ -257,10 +257,7 @@
__asm__ __volatile__ ("isync");
/* init direct transfer, clear CS bit */
- dmamr0 = (DMA_CHANNEL_TRANSFER_MODE_DIRECT |
- DMA_CHANNEL_SOURCE_ADDRESS_HOLD_8B |
- DMA_CHANNEL_SOURCE_ADRESSS_HOLD_EN);
-
+ dmamr0 = DMA_CHANNEL_TRANSFER_MODE_DIRECT;
dma->dmamr0 = swab32(dmamr0);
__asm__ __volatile__ ("sync");
diff -Naur u-boot/cpu/mpc83xx/cpu_init.c u-boot-new/cpu/mpc83xx/cpu_init.c
--- u-boot/cpu/mpc83xx/cpu_init.c 2006-05-26 13:06:48.000000000 +0800
+++ u-boot-new/cpu/mpc83xx/cpu_init.c 2006-05-29 19:20:23.000000000 +0800
@@ -56,6 +56,9 @@
*/
im->reset.rmr = (RMR_CSRE & (1<<RMR_CSRE_SHIFT));
+ /* Set the CSB bus pipeline depth as 4 */
+ im->arbiter.acr |= ACR_PIPE_DEP_4;
+
/* LCRR - Clock Ratio Register (10.3.1.16) */
im->lbus.lcrr = CFG_LCRR;
diff -Naur u-boot/cpu/mpc83xx/spd_sdram.c u-boot-new/cpu/mpc83xx/spd_sdram.c
--- u-boot/cpu/mpc83xx/spd_sdram.c 2006-05-26 13:06:48.000000000 +0800
+++ u-boot-new/cpu/mpc83xx/spd_sdram.c 2006-05-30 11:40:35.000000000 +0800
@@ -429,54 +429,44 @@
/* #define CONFIG_DDR_ECC_INIT_VIA_DMA */
void ddr_enable_ecc(unsigned int dram_size)
{
- uint *p;
volatile immap_t *immap = (immap_t *)CFG_IMMRBAR;
volatile ddr8349_t *ddr = &immap->ddr;
unsigned long t_start, t_end;
+
+ register u64 *p;
+ register uint size;
+ unsigned int pattern[2];
+
#if defined(CONFIG_DDR_ECC_INIT_VIA_DMA)
uint i;
#endif
- debug("Initialize a Cachline in DRAM\n");
icache_enable();
-#if defined(CONFIG_DDR_ECC_INIT_VIA_DMA)
- /* Initialise DMA for direct Transfers */
- dma_init();
-#endif
+ pattern[0] = 0xdeadbeef;
+ pattern[1] = 0xbeefdead;
t_start = get_tbms();
#if !defined(CONFIG_DDR_ECC_INIT_VIA_DMA)
- debug("DDR init: Cache flush method\n");
- for (p = 0; p < (uint *)(dram_size); p++) {
- if (((unsigned int)p & 0x1f) == 0) {
- ppcDcbz((unsigned long) p);
- }
-
- /* write pattern to cache and flush */
- *p = (unsigned int)0xdeadbeef;
-
- if (((unsigned int)p & 0x1c) == 0x1c) {
- ppcDcbf((unsigned long) p);
- }
+ debug("DDR init: CPU 64bit write method\n");
+ size = dram_size;
+
+ for (p = 0; p < (u64 *)(size); p++) {
+ ppcDWstore((u32*)p, pattern);
}
+ __asm__ __volatile__ ("sync");
#else
printf("DDR init: DMA method\n");
- for (p = 0; p < (uint *)(8 * 1024); p++) {
- /* zero one data cache line */
- if (((unsigned int)p & 0x1f) == 0) {
- ppcDcbz((unsigned long)p);
- }
-
- /* write pattern to it and flush */
- *p = (unsigned int)0xdeadbeef;
-
- if (((unsigned int)p & 0x1c) == 0x1c) {
- ppcDcbf((unsigned long)p);
- }
+ size = 0x2000;
+
+ for (p = 0; p < (u64 *)(size); p++) {
+ ppcDWstore((u32*)p, pattern);
}
+ __asm__ __volatile__ ("sync");
+ /* Initialise DMA for direct Transfers */
+ dma_init();
/* 8K */
dma_xfer((uint *)0x2000, 0x2000, (uint *)0);
/* 16K */
diff -Naur u-boot/cpu/mpc83xx/speed.c u-boot-new/cpu/mpc83xx/speed.c
--- u-boot/cpu/mpc83xx/speed.c 2006-05-26 13:06:48.000000000 +0800
+++ u-boot-new/cpu/mpc83xx/speed.c 2006-05-29 19:11:01.000000000 +0800
@@ -333,7 +333,7 @@
gd->pci_clk = pci_sync_in;
gd->cpu_clk = gd->core_clk;
- gd->bus_clk = gd->lbiu_clk;
+ gd->bus_clk = gd->csb_clk;
return 0;
}
diff -Naur u-boot/cpu/mpc83xx/start.S u-boot-new/cpu/mpc83xx/start.S
--- u-boot/cpu/mpc83xx/start.S 2006-05-26 13:06:48.000000000 +0800
+++ u-boot-new/cpu/mpc83xx/start.S 2006-05-30 11:27:17.000000000 +0800
@@ -31,7 +31,7 @@
#include <mpc83xx.h>
#include <version.h>
-#define CONFIG_83XX 1 /* needed for Linux kernel header files*/
+#define CONFIG_83XX 1 /* needed for Linux kernel header files*/
#define _LINUX_CONFIG_H 1 /* avoid reading Linux autoconf.h file */
#include <ppc_asm.tmpl>
@@ -113,7 +113,7 @@
* vector at offset 0x100 relative to the base set by MSR[IP]. If
* MSR[IP] is 0, the base address is 0x00000000. If MSR[IP] is 1, the
* base address is 0xfff00000. In the case of a Power On Reset or Hard
- * Reset, the value of MSR[IP] is determined by the CIP field in the
+ * Reset, the value of MSR[IP] is determined by the BMS field in the
* HRCW.
*
* Other bits in the HRCW set up the Base Address and Port Size in BR0.
@@ -124,17 +124,17 @@
* not matter.
*
* Once we have got here, the address mask in OR0 is cleared so that the
- * bottom 32K of the boot ROM is effectively repeated all throughout the
- * processor's address space, after which we can jump to the absolute
- * address at which the boot ROM was linked at compile time, and proceed
- * to initialise the memory controller without worrying if the rug will
- * be pulled out from under us, so to speak (it will be fine as long as
- * we configure BR0 with the same boot ROM link address).
+ * boot ROM appears everywhere of the processor's address space, after
+ * which we can jump to the absolute address at which the boot ROM was
+ * linked at compile time, and proceed to initialise the memory controller
+ * without worrying if the rug will be pulled out from under us, so to
+ * speak (it will be fine as long as we configure BR0 with the same
+ * boot ROM link address).
*/
. = EXC_OFF_SYS_RESET
.globl _start
-_start: /* time t 0 */
+_start:
li r21, BOOTFLAG_COLD /* Normal Power-On: Boot from FLASH*/
nop
b boot_cold
@@ -147,14 +147,15 @@
b boot_warm
-boot_cold: /* time t 3 */
+boot_cold:
lis r4, CONFIG_DEFAULT_IMMR at h
nop
-boot_warm: /* time t 5 */
+boot_warm:
mfmsr r5 /* save msr contents */
lis r3, CFG_IMMRBAR at h
ori r3, r3, CFG_IMMRBAR at l
stw r3, IMMRBAR(r4)
+ mtspr 311, r3 /* SPR311-MBAR */
/* Initialise the E300 processor core */
/*------------------------------------------*/
@@ -163,39 +164,32 @@
#ifndef CFG_RAMBOOT
- /* Inflate flash location so it appears everywhere, calculate */
- /* the absolute address in final location of the FLASH, jump */
- /* there and deflate the flash size back to minimal size */
- /*------------------------------------------------------------*/
+ /* Inflate flash location so it appears everywhere, and setup */
+ /* LAW1 for ROM flash space backup, that is LCS0 space. */
+
bl map_flash_by_law1
+
+ /* Calculate the absolute address of in_flash, change PC to */
+ /* jump there, later the remap_flash_by_law0 will change ROM */
+ /* flash base address */
+
lis r4, (CFG_MONITOR_BASE)@h
ori r4, r4, (CFG_MONITOR_BASE)@l
addi r5, r4, in_flash - _start + EXC_OFF_SYS_RESET
mtlr r5
blr
in_flash:
-#if 1 /* Remapping flash with LAW0. */
+ /* Change BR0 to CFG_FLASH_BASE, and remapping ROM flash with LAW0 */
+
bl remap_flash_by_law0
-#endif
+
+ /* Change ROM flash base address and LAW0 is done! */
#endif /* CFG_RAMBOOT */
/* setup the bats */
bl setup_bats
sync
- /*
- * Cache must be enabled here for stack-in-cache trick.
- * This means we need to enable the BATS.
- * This means:
- * 1) for the EVB, original gt regs need to be mapped
- * 2) need to have an IBAT for the 0xf region,
- * we are running there!
- * Cache should be turned on after BATs, since by default
- * everything is write-through.
- * The init-mem BAT can be reused after reloc. The old
- * gt-regs BAT can be reused after board_init_f calls
- * board_early_init_f (EVB only).
- */
/* enable address translation */
bl enable_addr_trans
sync
@@ -431,19 +425,19 @@
* Note: expects original MSR contents to be in r5.
*/
.globl init_e300_core
-init_e300_core: /* time t 10 */
+init_e300_core:
/* Initialize machine status; enable machine check interrupt */
/*-----------------------------------------------------------*/
- li r3, MSR_KERNEL /* Set ME and RI flags */
+ li r3, MSR_KERNEL /* Set FP, ME and RI flags */
rlwimi r3, r5, 0, 25, 25 /* preserve IP bit set by HRCW */
#ifdef DEBUG
- rlwimi r3, r5, 0, 21, 22 /* debugger might set SE & BE bits */
+ rlwimi r3, r5, 0, 21, 22 /* debugger might set SE & BE bits */
#endif
- SYNC /* Some chip revs need this... */
+ SYNC /* Some chip revs need this... */
mtmsr r3
SYNC
- mtspr SRR1, r3 /* Make SRR1 match MSR */
+ mtspr SRR1, r3 /* Make SRR1 match MSR */
lis r3, CFG_IMMRBAR at h
@@ -796,7 +790,7 @@
.globl icache_status
icache_status:
mfspr r3, HID0
- rlwinm r3, r3, (31 - HID0_ICE_SHIFT + 1), 31, 31
+ rlwinm r3, r3, (32 - HID0_ICE_SHIFT), 31, 31
blr
.globl dcache_enable
@@ -828,7 +822,7 @@
.globl dcache_status
dcache_status:
mfspr r3, HID0
- rlwinm r3, r3, (31 - HID0_DCE_SHIFT + 1), 31, 31
+ rlwinm r3, r3, (32 - HID0_DCE_SHIFT), 31, 31
blr
.globl get_pvr
@@ -870,6 +864,18 @@
dcbz r0,r3
blr
+ .globl ppcDWstore
+ppcDWstore:
+ lfd 1, 0(r4)
+ stfd 1, 0(r3)
+ blr
+
+ .globl ppcDWload
+ppcDWload:
+ lfd 1, 0(r3)
+ stfd 1, 0(r4)
+ blr
+
/*-------------------------------------------------------------------*/
/*
@@ -1214,9 +1220,9 @@
lis r4, (CFG_FLASH_BASE)@h
ori r4, r4, (CFG_FLASH_BASE)@l
stw r4, LBLAWBAR1(r3) /* LBLAWBAR1 <= CFG_FLASH_BASE */
- lis r4, (0x80000016)@h
- ori r4, r4, (0x80000016)@l
- stw r4, LBLAWAR1(r3) /* LBLAWAR1 <= 8MB Flash Size */
+ lis r4, (0x80000018)@h
+ ori r4, r4, (0x80000018)@l
+ stw r4, LBLAWAR1(r3) /* LBLAWAR1 <= 32MB Flash Size */
blr
/* Though all the LBIU Local Access Windows and LBC Banks will be
@@ -1234,17 +1240,17 @@
stw r5, BR0(r3) /* r5 <= (CFG_FLASH_BASE & 0xFFFF8000) | (BR0 & 0x00007FFF) */
lwz r4, OR0(r3)
- lis r5, 0xFF80 /* 8M */
+ lis r5, 0xFE00 /* 32M */
or r4, r4, r5
- stw r4, OR0(r3) /* OR0 <= OR0 | 0xFF800000 */
+ stw r4, OR0(r3) /* OR0 <= OR0 | 0xFE000000 */
lis r4, (CFG_FLASH_BASE)@h
ori r4, r4, (CFG_FLASH_BASE)@l
stw r4, LBLAWBAR0(r3) /* LBLAWBAR0 <= CFG_FLASH_BASE */
- lis r4, (0x80000016)@h
- ori r4, r4, (0x80000016)@l
- stw r4, LBLAWAR0(r3) /* LBLAWAR0 <= 8MB Flash Size */
+ lis r4, (0x80000018)@h
+ ori r4, r4, (0x80000018)@l
+ stw r4, LBLAWAR0(r3) /* LBLAWAR0 <= 32MB Flash Size */
xor r4, r4, r4
stw r4, LBLAWBAR1(r3)
diff -Naur u-boot/include/asm-ppc/immap_83xx.h u-boot-new/include/asm-ppc/immap_83xx.h
--- u-boot/include/asm-ppc/immap_83xx.h 2006-05-26 13:06:50.000000000 +0800
+++ u-boot-new/include/asm-ppc/immap_83xx.h 2006-05-29 19:16:35.000000000 +0800
@@ -371,6 +371,10 @@
u32 acr; /* Arbiter Configuration Register */
#define ACR_COREDIS 0x10000000 /* Core disable. */
#define ACR_PIPE_DEP 0x00070000 /* Pipeline depth (number of outstanding transactions). */
+#define ACR_PIPE_DEP_1 0x00000000 /* Pipeline depth 1 */
+#define ACR_PIPE_DEP_2 0x00010000 /* Pipeline depth 2 */
+#define ACR_PIPE_DEP_3 0x00020000 /* Pipeline depth 3 */
+#define ACR_PIPE_DEP_4 0x00030000 /* Pipeline depth 4 */
#define ACR_PCI_RPTCNT 0x00007000 /* PCI repeat count. */
#define ACR_RPTCNT 0x00000700 /* Repeat count. */
#define ACR_APARK 0x00000030 /* Address parking. */
diff -Naur u-boot/include/common.h u-boot-new/include/common.h
--- u-boot/include/common.h 2006-05-26 13:06:50.000000000 +0800
+++ u-boot-new/include/common.h 2006-05-29 20:50:28.000000000 +0800
@@ -383,6 +383,11 @@
void ppcDcbz(unsigned long value);
#endif
+#if defined (CONFIG_MPC83XX)
+void ppcDWload(unsigned int *addr, unsigned int *ret);
+void ppcDWstore(unsigned int *addr, unsigned int *value);
+#endif
+
/* $(CPU)/cpu.c */
int checkcpu (void);
int checkicache (void);
diff -Naur u-boot/Makefile u-boot-new/Makefile
--- u-boot/Makefile 2006-05-26 13:06:40.000000000 +0800
+++ u-boot-new/Makefile 2006-05-29 19:08:21.000000000 +0800
@@ -1326,9 +1326,6 @@
## MPC83xx Systems
#########################################################################
-MPC8349ADS_config: unconfig
- @./mkconfig $(@:_config=) ppc mpc83xx mpc8349ads
-
TQM834x_config: unconfig
@./mkconfig $(@:_config=) ppc mpc83xx tqm834x
More information about the U-Boot
mailing list