[U-Boot] [PATCH] sunxi: H6: DRAM: Add support for half DQ

Jernej Škrabec jernej.skrabec at siol.net
Wed Aug 21 21:54:16 UTC 2019


Dne sreda, 21. avgust 2019 ob 13:07:13 CEST je Andre Przywara napisal(a):
> On Wed, 21 Aug 2019 08:01:31 +0200
> Jernej Škrabec <jernej.skrabec at siol.net> wrote:
> 
> Hi,
> 
> > Dne sreda, 21. avgust 2019 ob 02:31:04 CEST je André Przywara napisal(a):
> > > On 17/07/2019 23:16, Jernej Skrabec wrote:
> > > > Half DQ configuration seems to be very rare for H6 based boards/STBs,
> > > > but exists nevertheless. Currently the only known product which needs
> > > > this support is Tanix TX6 mini.
> > > > 
> > > > This commit adds support for half DQ configuration. Code was tested
> > > > for regressions on other configurations (OrangePi 3 1 GiB/LPDDR3,
> > > > Tanix
> > > > TX6 4 GiB/DDR3) and none were found.
> > > > 
> > > > Thanks to Icenowy Zheng for help with this code.
> > > > 
> > > > Signed-off-by: Jernej Skrabec <jernej.skrabec at siol.net>
> > > 
> > > I don't have a board with 16-bit DRAM, but can confirm that this still
> > > works on the PineH64 and the Eachlink H6 Mini.
> > 
> > Thanks for testing!
> > 
> > > Aside from the one question below, the code looks alright for me. I
> > > verified the bits set or checked below against the Zynq register
> > > documentation.
> > > I didn't browse through all of the register documentation to find other
> > > bits that possibly affect the bus width. But since the code seems to
> > > work for Jernej, I assume that's fine.
> > 
> > Unfortunately, there is no RSR0 documentation in Zynq register manual, but
> > I
> Ah, I actually mistook RSR1 for RSR0. But ...
> 
> > found this page:
> > https://git.axiom-project.eu/axiom-evi-qemu/raw/
> > 47171dbf860af6d12c9b82997629460b77378496/hw/misc/xilinx-ddr_phy.c
> > 
> > All RSR0 registers are defined as having QSGERR field, but there is no
> > explanation of values it can hold.
> 
> Yeah, it looks like all RSR registers hold some error bits for each 8-bit
> channel. And since RSR1 mentions to be about "one bit per rank", I would
> assume the same to be true for RSR0, just for some other error cause.
> > BTW, I don't have board with such memory combination, but few people
> > confirmed working on Tanix TX6 mini, which it does.
> 
> Do you have an overview on which H6 board uses single rank and which dual
> rank? Do we actually have both varieties, for the full bus width?

I think Icenowy can help you here. But AFAIK we have both varieties for full 
bus width.

> > > > ---
> > > > 
> > > >  .../include/asm/arch-sunxi/dram_sun50i_h6.h   |  1 +
> > > >  arch/arm/mach-sunxi/dram_sun50i_h6.c          | 74
> > > >  ++++++++++++-------
> > > >  2 files changed, 50 insertions(+), 25 deletions(-)
> > > > 
> > > > diff --git a/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h
> > > > b/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h index
> > > > 0a1da02376..49a8a66f7b 100644
> > > > --- a/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h
> > > > +++ b/arch/arm/include/asm/arch-sunxi/dram_sun50i_h6.h
> > > > @@ -315,6 +315,7 @@ struct dram_para {
> > > > 
> > > >  	u8 cols;
> > > >  	u8 rows;
> > > >  	u8 ranks;
> > > > 
> > > > +	u8 bus_full_width;
> > > > 
> > > >  	const u8 dx_read_delays[NR_OF_BYTE_LANES]
[RD_LINES_PER_BYTE_LANE];
> > > >  	const u8 dx_write_delays[NR_OF_BYTE_LANES]
> > 
> > [WR_LINES_PER_BYTE_LANE];
> > 
> > > >  };
> > > > 
> > > > diff --git a/arch/arm/mach-sunxi/dram_sun50i_h6.c
> > > > b/arch/arm/mach-sunxi/dram_sun50i_h6.c index 2a8275da3a..0d65327d35
> > > > 100644
> > > > --- a/arch/arm/mach-sunxi/dram_sun50i_h6.c
> > > > +++ b/arch/arm/mach-sunxi/dram_sun50i_h6.c
> > > > @@ -201,6 +201,9 @@ static void mctl_set_addrmap(struct dram_para
> > > > *para)
> > > > 
> > > >  	u8 rows = para->rows;
> > > >  	u8 ranks = para->ranks;
> > > > 
> > > > +	if (!para->bus_full_width)
> > > > +		cols -= 1;
> > > > +
> > > > 
> > > >  	/* Ranks */
> > > >  	if (ranks == 2)
> > > >  	
> > > >  		mctl_ctl->addrmap[0] = rows + cols - 3;
> > > > 
> > > > @@ -213,6 +216,10 @@ static void mctl_set_addrmap(struct dram_para
> > > > *para)
> > > > 
> > > >  	/* Columns */
> > > >  	mctl_ctl->addrmap[2] = 0;
> > > >  	switch (cols) {
> > > > 
> > > > +	case 7:
> > > > +		mctl_ctl->addrmap[3] = 0x1F1F1F00;
> > > > +		mctl_ctl->addrmap[4] = 0x1F1F;
> > > > +		break;
> > > > 
> > > >  	case 8:
> > > >  		mctl_ctl->addrmap[3] = 0x1F1F0000;
> > > >  		mctl_ctl->addrmap[4] = 0x1F1F;
> > > > 
> > > > @@ -300,13 +307,16 @@ static void mctl_com_init(struct dram_para
> > > > *para)
> > > > 
> > > >  		reg_val = 0x3f00;
> > > >  	
> > > >  	clrsetbits_le32(&mctl_com->unk_0x008, 0x3f00, reg_val);
> > > > 
> > > > -	/* TODO: half DQ, DDR4 */
> > > > -	reg_val = MSTR_BUSWIDTH_FULL | MSTR_BURST_LENGTH(8) |
> > > > -		  MSTR_ACTIVE_RANKS(para->ranks);
> > > > +	/* TODO: DDR4 */
> > > > +	reg_val = MSTR_BURST_LENGTH(8) | MSTR_ACTIVE_RANKS(para->ranks);
> > > > 
> > > >  	if (para->type == SUNXI_DRAM_TYPE_LPDDR3)
> > > >  	
> > > >  		reg_val |= MSTR_DEVICETYPE_LPDDR3;
> > > >  	
> > > >  	if (para->type == SUNXI_DRAM_TYPE_DDR3)
> > > >  	
> > > >  		reg_val |= MSTR_DEVICETYPE_DDR3 | MSTR_2TMODE;
> > > > 
> > > > +	if (para->bus_full_width)
> > > > +		reg_val |= MSTR_BUSWIDTH_FULL;
> > > > +	else
> > > > +		reg_val |= MSTR_BUSWIDTH_HALF;
> > > > 
> > > >  	writel(reg_val | BIT(31), &mctl_ctl->mstr);
> > > >  	
> > > >  	if (para->type == SUNXI_DRAM_TYPE_LPDDR3)
> > > > 
> > > > @@ -333,7 +343,10 @@ static void mctl_com_init(struct dram_para *para)
> > > > 
> > > >  	}
> > > >  	writel(reg_val, &mctl_ctl->odtcfg);
> > > > 
> > > > -	/* TODO: half DQ */
> > > > +	if (!para->bus_full_width) {
> > > > +		writel(0x0, &mctl_phy->dx[2].gcr[0]);
> > > > +		writel(0x0, &mctl_phy->dx[3].gcr[0]);
> > > > +	}
> > > > 
> > > >  }
> > > >  
> > > >  static void mctl_bit_delay_set(struct dram_para *para)
> > > > 
> > > > @@ -514,22 +527,31 @@ static void mctl_channel_init(struct dram_para
> > > > *para)
> > > > 
> > > >  	if (readl(&mctl_phy->pgsr[0]) & 0x400000)
> > > >  	{
> > > > 
> > > > -		/*
> > > > -		 * Detect single rank.
> > > > -		 * TODO: also detect half DQ.
> > > > -		 */
> > > > +		/* Check for single rank and optionally half DQ. */
> > > > 
> > > >  		if ((readl(&mctl_phy->dx[0].rsr[0]) & 0x3) == 2 &&
> > > > 
> > > > -		    (readl(&mctl_phy->dx[1].rsr[0]) & 0x3) == 2 &&
> > > > -		    (readl(&mctl_phy->dx[2].rsr[0]) & 0x3) == 2 &&
> > > > -		    (readl(&mctl_phy->dx[3].rsr[0]) & 0x3) == 2) {
> > > > +		    (readl(&mctl_phy->dx[1].rsr[0]) & 0x3) == 2) {
> > > > 
> > > >  			para->ranks = 1;
> > > > 
> > > > +
> > > > +			if ((readl(&mctl_phy->dx[2].rsr[0]) & 0x3) 
!
> > 
> > = 2 ||
> > 
> > > > +			    (readl(&mctl_phy->dx[3].rsr[0]) & 0x3) 
!
> > 
> > = 2)
> > 
> > > > +				para->bus_full_width = 0;
> > > > +
> > > > 
> > > >  			/* Restart DRAM initialization from 
scratch.
> > 
> > */
> > 
> > > >  			mctl_core_init(para);
> > > >  			return;
> > > >  		
> > > >  		}
> > > > 
> > > > -		else {
> > > > -			panic("This DRAM setup is currently not
> > 
> > supported.\n");
> > 
> > > > +
> > > > +		/* Check for dual rank and half DQ */
> > > > +		if ((readl(&mctl_phy->dx[0].rsr[0]) & 0x3) == 0 &&
> > > > +		    (readl(&mctl_phy->dx[1].rsr[0]) & 0x3) == 0) {
> > > > +			para->bus_full_width = 0;
> > > 
> > > Mmh, aren't those bits always supposed to be 0 here, regardless of the
> > > bus width? So we just confirm dual rank here? Can we actually say from
> > > those error bits whether no errors in bits 16-31 mean them being masked
> > > or whether that means the upper 16 bits are working fine?
> > > Or is bus_fill_width always 1 at this point? But then we would need to
> > > check dx[2].rsr[0] and dx[3].rsr[0], wouldn't we?
> > 
> > As I already pointed out, I couldn't found useful documentation of RSR0,
> > so I checked what libdram does and put similar check here too. They
> > always check only two bits in all RSR0 registers. I suppose 1 and 3 can
> > be read from registers or register values may not be same. But I really
> > have no idea what they mean. I would be glad if anyone can provide more
> > information.
> Looking at the other registers (particularly RSR1) I assumed that it's one
> error bit per rank. So if just bit 1 is set, it probably means we only have
> one rank, assuming bit 0 is clear. So I would guess the following: DX0RSR0 
> DX1RSR0  DX2RSR0  DX3RSR0
>     00       00       00       00    dual rank, 32 bit
>     10       10       10       10    single rank, 32 bit
>     00       00       11       11    dual rank, 16 bit
>     10       10       11       11    single rank, 16 bit
> (the first line is probably filtered by the PGSR0 check above)

That's actually pretty nice explanation!

> But maybe the zeroing of DX[23]GCR0 also masks errors in the higher
> bytes? So they are always zero, and dual-rank, 32-bit is indistinguishable
> from dual rank, 16 bit? And the last line is actually 10-10-00-00?

Probably, but that's why we start with dual rank, full bus width assumption, 
in order not to miss anything.

> 
> Does that make sense?

Very much.

Slightly off topic - it would be interesting to compare error codes from your 
Eachlink box to above table and check if it make sense from what we know from 
experimentation.

> 
> > That being said, I see what bothers you. I checked libdram logic again and
> > unless I missed anything, this is exactly the check libdram has.
> 
> While it seems wise to follow libdram, I wouldn't give too much on their
> code. IIRC I have seen stupid things in there, so it's probably the usual
> "enterprise-grade" code, where everybody assumes it's super correct and
> well tested, where it actually just followed some "works? ship it!" rule
> ;-)
> > However,
> > because having dual rank and half DQ seems unlikely, we may skip this
> > block
> > altogether, but that would require another round of tests.
> 
> Na, don't bother. You could just add a comment stating that. Given the level
> of documentation we should only claim to support setups we actually tested,
> and leave it up to newer boards to trigger fixes.
> 
> So you could just send a v2 with that comment, and add my:
> 
> Reviewed-by: Andre Przywara <andre.przywara at arm.com>

Ok, thanks. I'll send v2 soon.

Best regards,
Jernej

> 
> Thanks,
> Andre.
> 
> > Best regards,
> > Jernej
> > 
> > > Cheers,
> > > Andre.
> > > 
> > > > +
> > > > +			/* Restart DRAM initialization from 
scratch.
> > 
> > */
> > 
> > > > +			mctl_core_init(para);
> > > > +			return;
> > > > 
> > > >  		}
> > > > 
> > > > +
> > > > +		panic("This DRAM setup is currently not supported.
\n");
> > > > 
> > > >  	}
> > > >  	
> > > >  	if (readl(&mctl_phy->pgsr[0]) & 0xff00000) {
> > > > 
> > > > @@ -557,11 +579,8 @@ static void mctl_channel_init(struct dram_para
> > > > *para)
> > > > 
> > > >  static void mctl_auto_detect_dram_size(struct dram_para *para)
> > > >  {
> > > > 
> > > > -	/* TODO: non-LPDDR3, half DQ */
> > > > -	/*
> > > > -	 * Detect rank number by the code in mctl_channel_init. Furtherly
> > > > -	 * when DQ detection is available it will also be executed there.
> > > > -	 */
> > > > +	/* TODO: non-(LP)DDR3 */
> > > > +	/* Detect rank number and half DQ by the code in
> > 
> > mctl_channel_init. */
> > 
> > > >  	mctl_core_init(para);
> > > >  	
> > > >  	/* detect row address bits */
> > > > 
> > > > @@ -570,8 +589,9 @@ static void mctl_auto_detect_dram_size(struct
> > > > dram_para *para)>
> > > > 
> > > >  	mctl_core_init(para);
> > > >  	
> > > >  	for (para->rows = 13; para->rows < 18; para->rows++) {
> > > > 
> > > > -		/* 8 banks, 8 bit per byte and 32 bit width */
> > > > -		if (mctl_mem_matches((1 << (para->rows + para->cols +
> > 
> > 5))))
> > 
> > > > +		/* 8 banks, 8 bit per byte and 16/32 bit width */
> > > > +		if (mctl_mem_matches((1 << (para->rows + para->cols +
> > > > +					    4 + para-
> > >
> > >bus_full_width))))
> > >
> > > >  			break;
> > > >  	
> > > >  	}
> > > > 
> > > > @@ -580,18 +600,21 @@ static void mctl_auto_detect_dram_size(struct
> > > > dram_para *para)>
> > > > 
> > > >  	mctl_core_init(para);
> > > >  	
> > > >  	for (para->cols = 8; para->cols < 11; para->cols++) {
> > > > 
> > > > -		/* 8 bits per byte and 32 bit width */
> > > > -		if (mctl_mem_matches(1 << (para->cols + 2)))
> > > > +		/* 8 bits per byte and 16/32 bit width */
> > > > +		if (mctl_mem_matches(1 << (para->cols + 1 +
> > > > +					   para-
> > >
> > >bus_full_width)))
> > >
> > > >  			break;
> > > >  	
> > > >  	}
> > > >  
> > > >  }
> > > >  
> > > >  unsigned long mctl_calc_size(struct dram_para *para)
> > > >  {
> > > > 
> > > > -	/* TODO: non-LPDDR3, half DQ */
> > > > +	u8 width = para->bus_full_width ? 4 : 2;
> > > > +
> > > > +	/* TODO: non-(LP)DDR3 */
> > > > 
> > > > -	/* 8 banks, 32-bit (4 byte) data width */
> > > > -	return (1ULL << (para->cols + para->rows + 3)) * 4 * para->ranks;
> > > > +	/* 8 banks */
> > > > +	return (1ULL << (para->cols + para->rows + 3)) * width * para-
> > >
> > >ranks;
> > >
> > > >  }
> > > >  
> > > >  #define SUN50I_H6_LPDDR3_DX_WRITE_DELAYS			
\
> > > > 
> > > > @@ -625,6 +648,7 @@ unsigned long sunxi_dram_init(void)
> > > > 
> > > >  		.ranks = 2,
> > > >  		.cols = 11,
> > > >  		.rows = 14,
> > > > 
> > > > +		.bus_full_width = 1,
> > > > 
> > > >  #ifdef CONFIG_SUNXI_DRAM_H6_LPDDR3
> > > >  
> > > >  		.type = SUNXI_DRAM_TYPE_LPDDR3,
> > > >  		.dx_read_delays  = SUN50I_H6_LPDDR3_DX_READ_DELAYS,






More information about the U-Boot mailing list