[U-Boot] [PATCH 3/3] sunxi: H6: use writel_relaxed for DRAM timing register accesses

Andre Przywara andre.przywara at arm.com
Fri Jan 11 00:31:21 UTC 2019


The timing registers in the DRAM controller can be programmed in any
order, as they will only take effect once the controller is eventually
"activated".

Switch the MMIO writes in mctl_set_timing_lpddr3() over to use
writel_relaxed(), since we don't need the stronger guarantee of the
normal writel(). We satisfy the overall ordering requirement by ending
the function with an explicit DMB barrier.

In this case we are not interested in the performance benefit this
usually gives, but in the saved instructions, which sum up for the many
writes we have in the timing setup.
Due to alignment effects this shrinks our chronically tight H6 SPL by a
whopping 2KB, which brings it in the same region as for the other
AArch64 Allwinner SPL builds.

Signed-off-by: Andre Przywara <andre.przywara at arm.com>
---
 arch/arm/mach-sunxi/dram_sun50i_h6.c | 79 +++++++++++++++++++-----------------
 1 file changed, 42 insertions(+), 37 deletions(-)

diff --git a/arch/arm/mach-sunxi/dram_sun50i_h6.c b/arch/arm/mach-sunxi/dram_sun50i_h6.c
index 5da90a2835..84a33a63d6 100644
--- a/arch/arm/mach-sunxi/dram_sun50i_h6.c
+++ b/arch/arm/mach-sunxi/dram_sun50i_h6.c
@@ -241,51 +241,55 @@ static void mctl_set_timing_lpddr3(struct dram_para *para)
 	memcpy(mctl_phy->mr, mr_lpddr3, sizeof(mr_lpddr3));
 
 	/* set DRAM timing */
-	writel((twtp << 24) | (tfaw << 16) | (trasmax << 8) | tras,
-	       &mctl_ctl->dramtmg[0]);
-	writel((txp << 16) | (trtp << 8) | trc, &mctl_ctl->dramtmg[1]);
-	writel((tcwl << 24) | (tcl << 16) | (trd2wr << 8) | twr2rd,
-	       &mctl_ctl->dramtmg[2]);
-	writel((tmrw << 20) | (tmrd << 12) | tmod, &mctl_ctl->dramtmg[3]);
-	writel((trcd << 24) | (tccd << 16) | (trrd << 8) | trp,
-	       &mctl_ctl->dramtmg[4]);
-	writel((tcksrx << 24) | (tcksre << 16) | (tckesr << 8) | tcke,
-	       &mctl_ctl->dramtmg[5]);
+	writel_relaxed((twtp << 24) | (tfaw << 16) | (trasmax << 8) | tras,
+		       &mctl_ctl->dramtmg[0]);
+	writel_relaxed((txp << 16) | (trtp << 8) | trc, &mctl_ctl->dramtmg[1]);
+	writel_relaxed((tcwl << 24) | (tcl << 16) | (trd2wr << 8) | twr2rd,
+		       &mctl_ctl->dramtmg[2]);
+	writel_relaxed((tmrw << 20) | (tmrd << 12) | tmod,
+		       &mctl_ctl->dramtmg[3]);
+	writel_relaxed((trcd << 24) | (tccd << 16) | (trrd << 8) | trp,
+		       &mctl_ctl->dramtmg[4]);
+	writel_relaxed((tcksrx << 24) | (tcksre << 16) | (tckesr << 8) | tcke,
+		       &mctl_ctl->dramtmg[5]);
 	/* Value suggested by ZynqMP manual and used by libdram */
-	writel((txp + 2) | 0x02020000, &mctl_ctl->dramtmg[6]);
-	writel((txsfast << 24) | (txsabort << 16) | (txsdll << 8) | txs,
-	       &mctl_ctl->dramtmg[8]);
-	writel(txsr, &mctl_ctl->dramtmg[14]);
+	writel_relaxed((txp + 2) | 0x02020000, &mctl_ctl->dramtmg[6]);
+	writel_relaxed((txsfast << 24) | (txsabort << 16) | (txsdll << 8) | txs,
+		       &mctl_ctl->dramtmg[8]);
+	writel_relaxed(txsr, &mctl_ctl->dramtmg[14]);
 
 	clrsetbits_le32(&mctl_ctl->init[0], (3 << 30), (1 << 30));
-	writel(0, &mctl_ctl->dfimisc);
+	writel_relaxed(0, &mctl_ctl->dfimisc);
 	clrsetbits_le32(&mctl_ctl->rankctl, 0xff0, 0x660);
 
 	/*
 	 * Set timing registers of the PHY.
 	 * Note: the PHY is clocked 2x from the DRAM frequency.
 	 */
-	writel((trrd << 25) | (tras << 17) | (trp << 9) | (trtp << 1),
+	writel_relaxed((trrd << 25) | (tras << 17) | (trp << 9) | (trtp << 1),
 	       &mctl_phy->dtpr[0]);
-	writel((tfaw << 17) | 0x28000400 | (tmrd << 1), &mctl_phy->dtpr[1]);
-	writel(((txs << 6) - 1) | (tcke << 17), &mctl_phy->dtpr[2]);
-	writel(((txsdll << 22) - (0x1 << 16)) | twtr_sa | (tcksrea << 8),
-	       &mctl_phy->dtpr[3]);
-	writel((txp << 1) | (trfc << 17) | 0x800, &mctl_phy->dtpr[4]);
-	writel((trc << 17) | (trcd << 9) | (twtr << 1), &mctl_phy->dtpr[5]);
-	writel(0x0505, &mctl_phy->dtpr[6]);
+	writel_relaxed((tfaw << 17) | 0x28000400 | (tmrd << 1),
+		       &mctl_phy->dtpr[1]);
+	writel_relaxed(((txs << 6) - 1) | (tcke << 17), &mctl_phy->dtpr[2]);
+	writel_relaxed(((txsdll << 22) - (0x1 << 16)) | twtr_sa |
+		       (tcksrea << 8), &mctl_phy->dtpr[3]);
+	writel_relaxed((txp << 1) | (trfc << 17) | 0x800, &mctl_phy->dtpr[4]);
+	writel_relaxed((trc << 17) | (trcd << 9) | (twtr << 1),
+		       &mctl_phy->dtpr[5]);
+	writel_relaxed(0x0505, &mctl_phy->dtpr[6]);
 
 	/* Configure DFI timing */
-	writel(tcl | 0x2000200 | (t_rdata_en << 16) | 0x808000,
-	       &mctl_ctl->dfitmg0);
-	writel(0x040201, &mctl_ctl->dfitmg1);
+	writel_relaxed(tcl | 0x2000200 | (t_rdata_en << 16) | 0x808000,
+		       &mctl_ctl->dfitmg0);
+	writel_relaxed(0x040201, &mctl_ctl->dfitmg1);
 
 	/* Configure PHY timing */
-	writel(tdinit0 | (tdinit1 << 20), &mctl_phy->ptr[3]);
-	writel(tdinit2 | (tdinit3 << 18), &mctl_phy->ptr[4]);
+	writel_relaxed(tdinit0 | (tdinit1 << 20), &mctl_phy->ptr[3]);
+	writel_relaxed(tdinit2 | (tdinit3 << 18), &mctl_phy->ptr[4]);
 
 	/* set refresh timing */
-	writel((trefi << 16) | trfc, &mctl_ctl->rfshtmg);
+	writel_relaxed((trefi << 16) | trfc, &mctl_ctl->rfshtmg);
+	DMB;
 }
 
 static void mctl_sys_init(struct dram_para *para)
@@ -476,17 +480,17 @@ static void mctl_bit_delay_set(struct dram_para *para)
 		val = readl(&mctl_phy->dx[i].bdlr0);
 		for (j = 0; j < 4; j++)
 			val += para->dx_write_delays[i][j] << (j * 8);
-		writel(val, &mctl_phy->dx[i].bdlr0);
+		writel_relaxed(val, &mctl_phy->dx[i].bdlr0);
 
 		val = readl(&mctl_phy->dx[i].bdlr1);
 		for (j = 0; j < 4; j++)
 			val += para->dx_write_delays[i][j + 4] << (j * 8);
-		writel(val, &mctl_phy->dx[i].bdlr1);
+		writel_relaxed(val, &mctl_phy->dx[i].bdlr1);
 
 		val = readl(&mctl_phy->dx[i].bdlr2);
 		for (j = 0; j < 4; j++)
 			val += para->dx_write_delays[i][j + 8] << (j * 8);
-		writel(val, &mctl_phy->dx[i].bdlr2);
+		writel_relaxed(val, &mctl_phy->dx[i].bdlr2);
 	}
 	clrbits_le32(&mctl_phy->pgcr[0], BIT(26));
 
@@ -494,22 +498,22 @@ static void mctl_bit_delay_set(struct dram_para *para)
 		val = readl(&mctl_phy->dx[i].bdlr3);
 		for (j = 0; j < 4; j++)
 			val += para->dx_read_delays[i][j] << (j * 8);
-		writel(val, &mctl_phy->dx[i].bdlr3);
+		writel_relaxed(val, &mctl_phy->dx[i].bdlr3);
 
 		val = readl(&mctl_phy->dx[i].bdlr4);
 		for (j = 0; j < 4; j++)
 			val += para->dx_read_delays[i][j + 4] << (j * 8);
-		writel(val, &mctl_phy->dx[i].bdlr4);
+		writel_relaxed(val, &mctl_phy->dx[i].bdlr4);
 
 		val = readl(&mctl_phy->dx[i].bdlr5);
 		for (j = 0; j < 4; j++)
 			val += para->dx_read_delays[i][j + 8] << (j * 8);
-		writel(val, &mctl_phy->dx[i].bdlr5);
+		writel_relaxed(val, &mctl_phy->dx[i].bdlr5);
 
 		val = readl(&mctl_phy->dx[i].bdlr6);
 		val += (para->dx_read_delays[i][12] << 8) |
 		       (para->dx_read_delays[i][13] << 16);
-		writel(val, &mctl_phy->dx[i].bdlr6);
+		writel_relaxed(val, &mctl_phy->dx[i].bdlr6);
 	}
 	setbits_le32(&mctl_phy->pgcr[0], BIT(26));
 	udelay(1);
@@ -517,8 +521,9 @@ static void mctl_bit_delay_set(struct dram_para *para)
 	for (i = 1; i < 14; i++) {
 		val = readl(&mctl_phy->acbdlr[i]);
 		val += 0x0a0a0a0a;
-		writel(val, &mctl_phy->acbdlr[i]);
+		writel_relaxed(val, &mctl_phy->acbdlr[i]);
 	}
+	DMB;
 }
 
 static void mctl_channel_init(struct dram_para *para)
-- 
2.14.5



More information about the U-Boot mailing list