[U-Boot] [PATCHv4 2/3] driver/ddr/altera: Add the sdram calibration portion

Tue Jun 9 14:21:45 CEST 2015

In title, you can convert sdram->SDRAM

On Tue 2015-06-02 22:52:49, dinguyen at opensource.altera.com wrote:
> From: Dinh Nguyen <dinguyen at opensource.altera.com>
> 
> This patch adds the DDR calibration portion of the Altera SDRAM driver.
> 
> Signed-off-by: Dinh Nguyen <dinguyen at opensource.altera.com>

> +/*
> + * In order to reduce ROM size, most of the selectable calibration steps are
> + * decided at compile time based on the user's calibration mode selection,
> + * as captured by the STATIC_CALIB_STEPS selection below.
> + *
> + * However, to support simulation-time selection of fast simulation mode, where
> + * we skip everything except the bare minimum, we need a few of the steps to
> + * be dynamic.  In those cases, we either use the DYNAMIC_CALIB_STEPS for the
> + * check, which is based on the rtl-supplied value, or we dynamically compute
> + * the value to use based on the dynamically-chosen calibration mode

"." at the end of sentence.

> +
> +#define DLEVEL 0
> +#define STATIC_IN_RTL_SIM 0
> +#define STATIC_SKIP_DELAY_LOOPS 0
> +
> +#define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \
> +	STATIC_SKIP_DELAY_LOOPS)

Would it make sense to drop simulation-time support for initial merge?

> +/* calibration steps requested by the rtl */
> +uint16_t dyn_calib_steps;

rtl?

> +/*
> + * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option
> + * instead of static, we use boolean logic to select between
> + * non-skip and skip values
> + *
> + * The mask is set to include all bits when not-skipping, but is
> + * zero when skipping
> + */

"." at the end of sentence. Twice here :-).

> +
> +uint16_t skip_delay_mask;	/* mask off bits when skipping/not-skipping */
> +
> +#define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \
> +	((non_skip_value) & skip_delay_mask)
> +
> +struct gbl_type *gbl;
> +struct param_type *param;
> +uint32_t curr_shadow_reg;
> +
> +static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn,
> +	uint32_t write_group, uint32_t use_dm,
> +	uint32_t all_correct, uint32_t *bit_chk, uint32_t all_ranks);
> +
> +static u32 sdr_get_addr(u32 *base)
> +{
> +	u32 addr = (u32)base & MGR_SELECT_MASK;

You sometimes use uint32_t, and sometimes u32, as seen here. Would it
be more readable to just use u32 everywhere?

And this function would be really helped by taking "u32 base", not
"u32 *", as youd reduce number of typecasts below...

> +	switch (addr) {
> +	case BASE_PHY_MGR:
> +		addr = (((u32)base >> 8) & (1 << 6)) | ((u32)base & 0x3f) |
> +			SDR_PHYGRP_PHYMGRGRP_ADDRESS;
> +		break;
> +	case BASE_RW_MGR:
> +		addr = ((u32)base & 0x1fff) | SDR_PHYGRP_RWMGRGRP_ADDRESS;
> +		break;
> +	case BASE_DATA_MGR:
> +		addr = ((u32)base & 0x7ff) | SDR_PHYGRP_DATAMGRGRP_ADDRESS;
> +		break;
> +	case BASE_SCC_MGR:
> +		addr = ((u32)base & 0xfff) | SDR_PHYGRP_SCCGRP_ADDRESS;
> +		break;
> +	case BASE_REG_FILE:
> +		addr = ((u32)base & 0x7ff) | SDR_PHYGRP_REGFILEGRP_ADDRESS;
> +		break;
> +	case BASE_MMR:
> +		addr = ((u32)base & 0xfff) | SDR_CTRLGRP_ADDRESS;
> +		break;

Or at least introduce temporary variable...

> +static void initialize(void)
> +{
> +	u32 addr = sdr_get_addr(&phy_mgr_cfg->mux_sel);
> +
> +	debug("%s:%d\n", __func__, __LINE__);

Is this debugging neccessary? It will change when you for example
change comment in here...

> +	/* USER calibration has control over path to memory */
> +	/*
> +	 * In Hard PHY this is a 2-bit control:
> +	 * 0: AFI Mux Select
> +	 * 1: DDIO Mux Select
> +	 */
> +	writel(0x3, SOCFPGA_SDR_ADDRESS + addr);
> +
> +	/* USER memory clock is not stable we begin initialization  */

"  */" -> " */"

> +	for (i = 0; i < 16; i++) {
> +		debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u",
> +			   __func__, __LINE__, i);

Ok, custom debugging system. Neccessary?

> +static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay)
> +{
> +	uint32_t addr = sdr_get_addr((u32 *)SCC_MGR_DQS_EN_DELAY);

Now uint32_t vs. u32 on single line...

> +	for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
> +		r += NUM_RANKS_PER_SHADOW_REG) {
> +		scc_mgr_set_dqs_en_delay(read_group, delay);
> +
> +		addr = sdr_get_addr(&sdr_scc_mgr->dqs_ena);
> +		writel(read_group, SOCFPGA_SDR_ADDRESS + addr);
> +		/*
> +		 * In shadow register mode, the T11 settings are stored in
> +		 * registers in the core, which are updated by the DQS_ENA
> +		 * signals. Not issuing the SCC_MGR_UPD command allows us to
> +		 * save lots of rank switching overhead, by calling
> +		 * select_shadow_regs_for_update with update_scan_chains
> +		 * set to 0.
> +		 */
> +		addr = sdr_get_addr(&sdr_scc_mgr->update);
> +		writel(0, SOCFPGA_SDR_ADDRESS + addr);
> +	}
> +	/*
> +	 * In shadow register mode, the T11 settings are stored in
> +	 * registers in the core, which are updated by the DQS_ENA
> +	 * signals. Not issuing the SCC_MGR_UPD command allows us to
> +	 * save lots of rank switching overhead, by calling
> +	 * select_shadow_regs_for_update with update_scan_chains
> +	 * set to 0.
> +	 */
> +	addr = sdr_get_addr(&sdr_scc_mgr->update);
> +	writel(0, SOCFPGA_SDR_ADDRESS + addr);

Umm. Two completely same comments, two same pieces of code. Make it
function?

Does it make sense to run this twice in fact?

> +static void scc_set_bypass_mode(uint32_t write_group, uint32_t mode)
> +{
> +	uint32_t addr;
> +	/* mode = 0 : Do NOT bypass - Half Rate Mode */
> +	/* mode = 1 : Bypass - Full Rate Mode */

Then perhaps the parameter should be named "do_bypass" or "full_rate"?

> +		if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) {
> +			set_jump_as_return();
> +			addr = sdr_get_addr((u32 *)RW_MGR_RUN_SINGLE_GROUP);
> +			writel(RW_MGR_MRS2_MIRR, SOCFPGA_SDR_ADDRESS + addr);
> +			delay_for_n_mem_clocks(4);
> +			set_jump_as_return();
> +			writel(RW_MGR_MRS3_MIRR, SOCFPGA_SDR_ADDRESS + addr);
> +			delay_for_n_mem_clocks(4);

What is this, some kind of bytecode interpretter? Or does it create a
bytecode for someone?

> +static int find_working_phase(uint32_t *grp, uint32_t *bit_chk,
> +			      uint32_t dtaps_per_ptap, uint32_t *work_bgn,
> +			      uint32_t *v, uint32_t *d, uint32_t *p,
> +			      uint32_t *i, uint32_t *max_working_cnt)
> +{

Undocumented, single-letter parameters do not really make reading the
code easy...

Should we have structure for work_bgn, v, d, p, i ... instead of
separate parameters?

> +	uint32_t found_begin = 0;
> +	uint32_t tmp_delay = 0;
> +	uint32_t test_status;
> +
> +	for (*d = 0; *d <= dtaps_per_ptap; (*d)++, tmp_delay +=
> +		IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
> +		*work_bgn = tmp_delay;
> +		scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
> +
> +		for (*i = 0; *i < VFIFO_SIZE; (*i)++) {
> +			for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_bgn +=
> +				IO_DELAY_PER_OPA_TAP) {

Does this look like c-code to you?

> +	} else {
> +		/* ******************************************************* */
> +		/* * step 3-5b:  Find the right edge of the window using
> +		delay taps   * */
> +		debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase:vfifo=%u \
> +			   ptap=%u dtap=%u bgn=%u\n", __func__, __LINE__,
> +			   v, p, d, work_bgn);
> +
> +		work_end = work_bgn;
> +
> +		/* * The actual increment of dtaps is done outside of the
> +		if/else loop to share code */
> +
> +		/* Only here to counterbalance a subtract later on which is
> +		not needed if this branch of the algorithm is taken */
> +		max_working_cnt++;

Comment style in this block is even stranger than in other pieces of
this patch.

> +		} else {
> +			for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
> +				if (bit_chk & 1) {
> +					/* Remember a passing test as
> +					the right_edge */
> +					right_edge[i] = d;
> +				} else {
> +					if (d != 0) {
> +						/* If a right edge has not been
> +						seen yet, then a future passing
> +						test will mark this edge as the
> +						left edge */
> +						if (right_edge[i] ==
> +						IO_IO_IN_DELAY_MAX + 1) {
> +							left_edge[i] = -(d + 1);
> +						}
> +					} else {
> +						/* d = 0 failed, but it passed
> +						when testing the left edge,
> +						so it must be marginal,
> +						set it to -1 */
> +						if (right_edge[i] ==
> +							IO_IO_IN_DELAY_MAX + 1 &&
> +							left_edge[i] !=
> +							IO_IO_IN_DELAY_MAX
> +							+ 1) {
> +							right_edge[i] = -1;
> +						}

When this happens to your code, you know it is time to split your
function into sub-functions.

> +		if (rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay
> +		    (write_group, read_group, test_bgn)) {
> +				/*
> +				 * USER Read per-bit deskew can be done on a
> +				 * per shadow register basis.
> +				 */
> +				for (rank_bgn = 0, sr = 0;
> +					rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
> +					rank_bgn += NUM_RANKS_PER_SHADOW_REG,
> +					++sr) {
> +					/*
> +					 * Determine if this set of ranks
> +					 * should be skipped entirely.
> +					 */
> +					if (!param->skip_shadow_regs[sr]) {
> +						/*
> +						 * If doing read after write
> +						 * calibration, do not update
> +						 * FOM, now - do it then.
> +						 */
> +					if (!rw_mgr_mem_calibrate_vfifo_center
> +						(rank_bgn, write_group,
> +						read_group, test_bgn, 1, 0)) {
> +							grp_calibrated = 0;
> +							failed_substage =
> +						CAL_SUBSTAGE_VFIFO_CENTER;
> +						}
> +					}

Here too. Notice that indentation is actually wrong/confusing here,
the ifs are nested.

> +/* VFIFO Calibration -- Read Deskew Calibration after write deskew */
> +static uint32_t rw_mgr_mem_calibrate_vfifo_end(uint32_t read_group,
> +					       uint32_t test_bgn)
> +{
> +	uint32_t rank_bgn, sr;
> +	uint32_t grp_calibrated;
> +	uint32_t write_group;
> +
> +	debug("%s:%d %u %u", __func__, __LINE__, read_group, test_bgn);
> +
> +	/* update info for sims */
> +
> +	reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES);
> +	reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
> +
> +	write_group = read_group;
> +
> +	/* update info for sims */
> +	reg_file_set_group(read_group);
> +
> +	grp_calibrated = 1;
> +	/* Read per-bit deskew can be done on a per shadow register basis */
> +	for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
> +		rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) {
> +		/* Determine if this set of ranks should be skipped entirely */
> +		if (!param->skip_shadow_regs[sr]) {

if (param...)
   continue;

Then you get one less level of identation.

> +		/* This is the last calibration round, update FOM here */
> +			if (!rw_mgr_mem_calibrate_vfifo_center(rank_bgn,
> +								write_group,
> +								read_group,
> +								test_bgn, 0,
> +								1)) {
> +				grp_calibrated = 0;
> +			}

..and you'll be able to do something with this.

> +		writel(gbl->curr_read_lat, SOCFPGA_SDR_ADDRESS + addr);
> +		debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u",
> +			   __func__, __LINE__, gbl->curr_read_lat);
> +
> +		if (!rw_mgr_mem_calibrate_read_test_all_ranks(0,
> +							      NUM_READ_TESTS,
> +							      PASS_ALL_BITS,
> +							      &bit_chk, 1)) {
> +			break;
> +		}

If you can't fix it any other way,

	     if (!rw_mgr_mem_calibrate_read_test_all_ranks(0,
	     	   NUM_READ_TESTS, PASS_ALL_BITS, &bit_chk, 1)) {

is preferable. And you don't need {}'s around break;

> +		if (stop == 1) {
> +			if (d == 0) {
> +				for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS;
> +					i++) {
> +					/* d = 0 failed, but it passed when
> +					testing the left edge, so it must be
> +					marginal, set it to -1 */
> +					if (right_edge[i] ==
> +						IO_IO_OUT1_DELAY_MAX + 1 &&
> +						left_edge[i] !=
> +						IO_IO_OUT1_DELAY_MAX + 1) {
> +						right_edge[i] = -1;
> +					}
> +				}
> +			}
> +			break;
> +		} else {
> +			for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
> +				if (bit_chk & 1) {
> +					/*
> +					 * Remember a passing test as
> +					 * the right_edge.
> +					 */
> +					right_edge[i] = d;
> +				} else {
> +					if (d != 0) {
> +						/*
> +						 * If a right edge has not
> +						 * been seen yet, then a future
> +						 * passing test will mark this
> +						 * edge as the left edge.
> +						 */
> +						if (right_edge[i] ==
> +						    IO_IO_OUT1_DELAY_MAX + 1)
> +							left_edge[i] = -(d + 1);
> +					} else {
> +						/*
> +						 * d = 0 failed, but it passed
> +						 * when testing the left edge,
> +						 * so it must be marginal, set
> +						 * it to -1.
> +						 */
> +						if (right_edge[i] ==
> +						    IO_IO_OUT1_DELAY_MAX + 1 &&
> +						    left_edge[i] !=
> +						    IO_IO_OUT1_DELAY_MAX + 1)
> +							right_edge[i] = -1;
> +						/*
> +						 * If a right edge has not been
> +						 * seen yet, then a future
> +						 * passing test will mark this
> +						 * edge as the left edge.
> +						 */
> +						else if (right_edge[i] ==
> +							IO_IO_OUT1_DELAY_MAX +
> +							1)
> +							left_edge[i] = -(d + 1);
> +					}
> +				}

Something needs to be done here, too...

> +				for (read_group = write_group *
> +					RW_MGR_MEM_IF_READ_DQS_WIDTH /
> +					RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
> +					read_test_bgn = 0;
> +					read_group < (write_group + 1) *
> +					RW_MGR_MEM_IF_READ_DQS_WIDTH /
> +					RW_MGR_MEM_IF_WRITE_DQS_WIDTH &&
> +					group_failed == 0;
> +					read_group++, read_test_bgn +=
> +					RW_MGR_MEM_DQ_PER_READ_DQS) {
> +					/* Calibrate the VFIFO */
> +					if (!((STATIC_CALIB_STEPS) &
> +						CALIB_SKIP_VFIFO)) {
> +						if (!rw_mgr_mem_calibrate_vfifo
> +							(read_group,
> +							read_test_bgn)) {
> +							group_failed = 1;
> +
> +							if (!(gbl->
> +							phy_debug_mode_flags &
> +						PHY_DEBUG_SWEEP_ALL_GROUPS)) {
> +								return 0;
> +							}
> +						}
> +					}
> +				}
> +
> +				/* Calibrate the output side */
> +				if (group_failed == 0)	{
> +					for (rank_bgn = 0, sr = 0; rank_bgn
> +						< RW_MGR_MEM_NUMBER_OF_RANKS;
> +						rank_bgn +=
> +						NUM_RANKS_PER_SHADOW_REG,
> +						++sr) {
> +						sr_failed = 0;
> +						if (!((STATIC_CALIB_STEPS) &
> +						CALIB_SKIP_WRITES)) {
> +							if ((STATIC_CALIB_STEPS)
> +						& CALIB_SKIP_DELAY_SWEEPS) {
> +						/* not needed in quick mode! */
> +							} else {
> +						/*
> +						 * Determine if this set of
> +						 * ranks should be skipped
> +						 * entirely.
> +						 */
> +					if (!param->skip_shadow_regs[sr]) {
> +						if (!rw_mgr_mem_calibrate_writes
> +						(rank_bgn, write_group,
> +						write_test_bgn)) {
> +							sr_failed = 1;
> +							if (!(gbl->
> +							phy_debug_mode_flags &
> +						PHY_DEBUG_SWEEP_ALL_GROUPS)) {
> +								return 0;
> +									}
> +									}
> +								}
> +							}
> +						}
> +						if (sr_failed != 0)
> +							group_failed = 1;
> +					}
> +				}
> +
> +				if (group_failed == 0) {
> +					for (read_group = write_group *
> +					RW_MGR_MEM_IF_READ_DQS_WIDTH /
> +					RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
> +					read_test_bgn = 0;
> +						read_group < (write_group + 1)
> +						* RW_MGR_MEM_IF_READ_DQS_WIDTH
> +						/ RW_MGR_MEM_IF_WRITE_DQS_WIDTH &&
> +						group_failed == 0;
> +						read_group++, read_test_bgn +=
> +						RW_MGR_MEM_DQ_PER_READ_DQS) {
> +						if (!((STATIC_CALIB_STEPS) &
> +							CALIB_SKIP_WRITES)) {
> +					if (!rw_mgr_mem_calibrate_vfifo_end
> +						(read_group, read_test_bgn)) {
> +							group_failed = 1;
> +
> +						if (!(gbl->phy_debug_mode_flags
> +						& PHY_DEBUG_SWEEP_ALL_GROUPS)) {
> +								return 0;
> +								}
> +							}
> +						}
> +					}
> +				}

And here. Note how indentation is so deep ifs are actually indented
the other way around here.

I guess it would be better to ignore 80-columns rule than _this_. But
splitting it into smaller functions is of course preffered.

Thanks,
									Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html