[U-Boot] [PATCHv4 2/3] driver/ddr/altera: Add the sdram calibration portion
Pavel Machek
pavel at denx.de
Tue Jun 9 14:21:45 CEST 2015
In title, you can convert sdram->SDRAM
On Tue 2015-06-02 22:52:49, dinguyen at opensource.altera.com wrote:
> From: Dinh Nguyen <dinguyen at opensource.altera.com>
>
> This patch adds the DDR calibration portion of the Altera SDRAM driver.
>
> Signed-off-by: Dinh Nguyen <dinguyen at opensource.altera.com>
> +/*
> + * In order to reduce ROM size, most of the selectable calibration steps are
> + * decided at compile time based on the user's calibration mode selection,
> + * as captured by the STATIC_CALIB_STEPS selection below.
> + *
> + * However, to support simulation-time selection of fast simulation mode, where
> + * we skip everything except the bare minimum, we need a few of the steps to
> + * be dynamic. In those cases, we either use the DYNAMIC_CALIB_STEPS for the
> + * check, which is based on the rtl-supplied value, or we dynamically compute
> + * the value to use based on the dynamically-chosen calibration mode
"." at the end of sentence.
> +
> +#define DLEVEL 0
> +#define STATIC_IN_RTL_SIM 0
> +#define STATIC_SKIP_DELAY_LOOPS 0
> +
> +#define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \
> + STATIC_SKIP_DELAY_LOOPS)
Would it make sense to drop simulation-time support for initial merge?
> +/* calibration steps requested by the rtl */
> +uint16_t dyn_calib_steps;
rtl?
> +/*
> + * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option
> + * instead of static, we use boolean logic to select between
> + * non-skip and skip values
> + *
> + * The mask is set to include all bits when not-skipping, but is
> + * zero when skipping
> + */
"." at the end of sentence. Twice here :-).
> +
> +uint16_t skip_delay_mask; /* mask off bits when skipping/not-skipping */
> +
> +#define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \
> + ((non_skip_value) & skip_delay_mask)
> +
> +struct gbl_type *gbl;
> +struct param_type *param;
> +uint32_t curr_shadow_reg;
> +
> +static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn,
> + uint32_t write_group, uint32_t use_dm,
> + uint32_t all_correct, uint32_t *bit_chk, uint32_t all_ranks);
> +
> +static u32 sdr_get_addr(u32 *base)
> +{
> + u32 addr = (u32)base & MGR_SELECT_MASK;
You sometimes use uint32_t, and sometimes u32, as seen here. Would it
be more readable to just use u32 everywhere?
And this function would be really helped by taking "u32 base", not
"u32 *", as youd reduce number of typecasts below...
> + switch (addr) {
> + case BASE_PHY_MGR:
> + addr = (((u32)base >> 8) & (1 << 6)) | ((u32)base & 0x3f) |
> + SDR_PHYGRP_PHYMGRGRP_ADDRESS;
> + break;
> + case BASE_RW_MGR:
> + addr = ((u32)base & 0x1fff) | SDR_PHYGRP_RWMGRGRP_ADDRESS;
> + break;
> + case BASE_DATA_MGR:
> + addr = ((u32)base & 0x7ff) | SDR_PHYGRP_DATAMGRGRP_ADDRESS;
> + break;
> + case BASE_SCC_MGR:
> + addr = ((u32)base & 0xfff) | SDR_PHYGRP_SCCGRP_ADDRESS;
> + break;
> + case BASE_REG_FILE:
> + addr = ((u32)base & 0x7ff) | SDR_PHYGRP_REGFILEGRP_ADDRESS;
> + break;
> + case BASE_MMR:
> + addr = ((u32)base & 0xfff) | SDR_CTRLGRP_ADDRESS;
> + break;
Or at least introduce temporary variable...
> +static void initialize(void)
> +{
> + u32 addr = sdr_get_addr(&phy_mgr_cfg->mux_sel);
> +
> + debug("%s:%d\n", __func__, __LINE__);
Is this debugging neccessary? It will change when you for example
change comment in here...
> + /* USER calibration has control over path to memory */
> + /*
> + * In Hard PHY this is a 2-bit control:
> + * 0: AFI Mux Select
> + * 1: DDIO Mux Select
> + */
> + writel(0x3, SOCFPGA_SDR_ADDRESS + addr);
> +
> + /* USER memory clock is not stable we begin initialization */
" */" -> " */"
> + for (i = 0; i < 16; i++) {
> + debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u",
> + __func__, __LINE__, i);
Ok, custom debugging system. Neccessary?
> +static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay)
> +{
> + uint32_t addr = sdr_get_addr((u32 *)SCC_MGR_DQS_EN_DELAY);
Now uint32_t vs. u32 on single line...
> + for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
> + r += NUM_RANKS_PER_SHADOW_REG) {
> + scc_mgr_set_dqs_en_delay(read_group, delay);
> +
> + addr = sdr_get_addr(&sdr_scc_mgr->dqs_ena);
> + writel(read_group, SOCFPGA_SDR_ADDRESS + addr);
> + /*
> + * In shadow register mode, the T11 settings are stored in
> + * registers in the core, which are updated by the DQS_ENA
> + * signals. Not issuing the SCC_MGR_UPD command allows us to
> + * save lots of rank switching overhead, by calling
> + * select_shadow_regs_for_update with update_scan_chains
> + * set to 0.
> + */
> + addr = sdr_get_addr(&sdr_scc_mgr->update);
> + writel(0, SOCFPGA_SDR_ADDRESS + addr);
> + }
> + /*
> + * In shadow register mode, the T11 settings are stored in
> + * registers in the core, which are updated by the DQS_ENA
> + * signals. Not issuing the SCC_MGR_UPD command allows us to
> + * save lots of rank switching overhead, by calling
> + * select_shadow_regs_for_update with update_scan_chains
> + * set to 0.
> + */
> + addr = sdr_get_addr(&sdr_scc_mgr->update);
> + writel(0, SOCFPGA_SDR_ADDRESS + addr);
Umm. Two completely same comments, two same pieces of code. Make it
function?
Does it make sense to run this twice in fact?
> +static void scc_set_bypass_mode(uint32_t write_group, uint32_t mode)
> +{
> + uint32_t addr;
> + /* mode = 0 : Do NOT bypass - Half Rate Mode */
> + /* mode = 1 : Bypass - Full Rate Mode */
Then perhaps the parameter should be named "do_bypass" or "full_rate"?
> + if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) {
> + set_jump_as_return();
> + addr = sdr_get_addr((u32 *)RW_MGR_RUN_SINGLE_GROUP);
> + writel(RW_MGR_MRS2_MIRR, SOCFPGA_SDR_ADDRESS + addr);
> + delay_for_n_mem_clocks(4);
> + set_jump_as_return();
> + writel(RW_MGR_MRS3_MIRR, SOCFPGA_SDR_ADDRESS + addr);
> + delay_for_n_mem_clocks(4);
What is this, some kind of bytecode interpretter? Or does it create a
bytecode for someone?
> +static int find_working_phase(uint32_t *grp, uint32_t *bit_chk,
> + uint32_t dtaps_per_ptap, uint32_t *work_bgn,
> + uint32_t *v, uint32_t *d, uint32_t *p,
> + uint32_t *i, uint32_t *max_working_cnt)
> +{
Undocumented, single-letter parameters do not really make reading the
code easy...
Should we have structure for work_bgn, v, d, p, i ... instead of
separate parameters?
> + uint32_t found_begin = 0;
> + uint32_t tmp_delay = 0;
> + uint32_t test_status;
> +
> + for (*d = 0; *d <= dtaps_per_ptap; (*d)++, tmp_delay +=
> + IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
> + *work_bgn = tmp_delay;
> + scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
> +
> + for (*i = 0; *i < VFIFO_SIZE; (*i)++) {
> + for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_bgn +=
> + IO_DELAY_PER_OPA_TAP) {
Does this look like c-code to you?
> + } else {
> + /* ******************************************************* */
> + /* * step 3-5b: Find the right edge of the window using
> + delay taps * */
> + debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase:vfifo=%u \
> + ptap=%u dtap=%u bgn=%u\n", __func__, __LINE__,
> + v, p, d, work_bgn);
> +
> + work_end = work_bgn;
> +
> + /* * The actual increment of dtaps is done outside of the
> + if/else loop to share code */
> +
> + /* Only here to counterbalance a subtract later on which is
> + not needed if this branch of the algorithm is taken */
> + max_working_cnt++;
Comment style in this block is even stranger than in other pieces of
this patch.
> + } else {
> + for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
> + if (bit_chk & 1) {
> + /* Remember a passing test as
> + the right_edge */
> + right_edge[i] = d;
> + } else {
> + if (d != 0) {
> + /* If a right edge has not been
> + seen yet, then a future passing
> + test will mark this edge as the
> + left edge */
> + if (right_edge[i] ==
> + IO_IO_IN_DELAY_MAX + 1) {
> + left_edge[i] = -(d + 1);
> + }
> + } else {
> + /* d = 0 failed, but it passed
> + when testing the left edge,
> + so it must be marginal,
> + set it to -1 */
> + if (right_edge[i] ==
> + IO_IO_IN_DELAY_MAX + 1 &&
> + left_edge[i] !=
> + IO_IO_IN_DELAY_MAX
> + + 1) {
> + right_edge[i] = -1;
> + }
When this happens to your code, you know it is time to split your
function into sub-functions.
> + if (rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay
> + (write_group, read_group, test_bgn)) {
> + /*
> + * USER Read per-bit deskew can be done on a
> + * per shadow register basis.
> + */
> + for (rank_bgn = 0, sr = 0;
> + rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
> + rank_bgn += NUM_RANKS_PER_SHADOW_REG,
> + ++sr) {
> + /*
> + * Determine if this set of ranks
> + * should be skipped entirely.
> + */
> + if (!param->skip_shadow_regs[sr]) {
> + /*
> + * If doing read after write
> + * calibration, do not update
> + * FOM, now - do it then.
> + */
> + if (!rw_mgr_mem_calibrate_vfifo_center
> + (rank_bgn, write_group,
> + read_group, test_bgn, 1, 0)) {
> + grp_calibrated = 0;
> + failed_substage =
> + CAL_SUBSTAGE_VFIFO_CENTER;
> + }
> + }
Here too. Notice that indentation is actually wrong/confusing here,
the ifs are nested.
> +/* VFIFO Calibration -- Read Deskew Calibration after write deskew */
> +static uint32_t rw_mgr_mem_calibrate_vfifo_end(uint32_t read_group,
> + uint32_t test_bgn)
> +{
> + uint32_t rank_bgn, sr;
> + uint32_t grp_calibrated;
> + uint32_t write_group;
> +
> + debug("%s:%d %u %u", __func__, __LINE__, read_group, test_bgn);
> +
> + /* update info for sims */
> +
> + reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES);
> + reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
> +
> + write_group = read_group;
> +
> + /* update info for sims */
> + reg_file_set_group(read_group);
> +
> + grp_calibrated = 1;
> + /* Read per-bit deskew can be done on a per shadow register basis */
> + for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
> + rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) {
> + /* Determine if this set of ranks should be skipped entirely */
> + if (!param->skip_shadow_regs[sr]) {
if (param...)
continue;
Then you get one less level of identation.
> + /* This is the last calibration round, update FOM here */
> + if (!rw_mgr_mem_calibrate_vfifo_center(rank_bgn,
> + write_group,
> + read_group,
> + test_bgn, 0,
> + 1)) {
> + grp_calibrated = 0;
> + }
..and you'll be able to do something with this.
> + writel(gbl->curr_read_lat, SOCFPGA_SDR_ADDRESS + addr);
> + debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u",
> + __func__, __LINE__, gbl->curr_read_lat);
> +
> + if (!rw_mgr_mem_calibrate_read_test_all_ranks(0,
> + NUM_READ_TESTS,
> + PASS_ALL_BITS,
> + &bit_chk, 1)) {
> + break;
> + }
If you can't fix it any other way,
if (!rw_mgr_mem_calibrate_read_test_all_ranks(0,
NUM_READ_TESTS, PASS_ALL_BITS, &bit_chk, 1)) {
is preferable. And you don't need {}'s around break;
> + if (stop == 1) {
> + if (d == 0) {
> + for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS;
> + i++) {
> + /* d = 0 failed, but it passed when
> + testing the left edge, so it must be
> + marginal, set it to -1 */
> + if (right_edge[i] ==
> + IO_IO_OUT1_DELAY_MAX + 1 &&
> + left_edge[i] !=
> + IO_IO_OUT1_DELAY_MAX + 1) {
> + right_edge[i] = -1;
> + }
> + }
> + }
> + break;
> + } else {
> + for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
> + if (bit_chk & 1) {
> + /*
> + * Remember a passing test as
> + * the right_edge.
> + */
> + right_edge[i] = d;
> + } else {
> + if (d != 0) {
> + /*
> + * If a right edge has not
> + * been seen yet, then a future
> + * passing test will mark this
> + * edge as the left edge.
> + */
> + if (right_edge[i] ==
> + IO_IO_OUT1_DELAY_MAX + 1)
> + left_edge[i] = -(d + 1);
> + } else {
> + /*
> + * d = 0 failed, but it passed
> + * when testing the left edge,
> + * so it must be marginal, set
> + * it to -1.
> + */
> + if (right_edge[i] ==
> + IO_IO_OUT1_DELAY_MAX + 1 &&
> + left_edge[i] !=
> + IO_IO_OUT1_DELAY_MAX + 1)
> + right_edge[i] = -1;
> + /*
> + * If a right edge has not been
> + * seen yet, then a future
> + * passing test will mark this
> + * edge as the left edge.
> + */
> + else if (right_edge[i] ==
> + IO_IO_OUT1_DELAY_MAX +
> + 1)
> + left_edge[i] = -(d + 1);
> + }
> + }
Something needs to be done here, too...
> + for (read_group = write_group *
> + RW_MGR_MEM_IF_READ_DQS_WIDTH /
> + RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
> + read_test_bgn = 0;
> + read_group < (write_group + 1) *
> + RW_MGR_MEM_IF_READ_DQS_WIDTH /
> + RW_MGR_MEM_IF_WRITE_DQS_WIDTH &&
> + group_failed == 0;
> + read_group++, read_test_bgn +=
> + RW_MGR_MEM_DQ_PER_READ_DQS) {
> + /* Calibrate the VFIFO */
> + if (!((STATIC_CALIB_STEPS) &
> + CALIB_SKIP_VFIFO)) {
> + if (!rw_mgr_mem_calibrate_vfifo
> + (read_group,
> + read_test_bgn)) {
> + group_failed = 1;
> +
> + if (!(gbl->
> + phy_debug_mode_flags &
> + PHY_DEBUG_SWEEP_ALL_GROUPS)) {
> + return 0;
> + }
> + }
> + }
> + }
> +
> + /* Calibrate the output side */
> + if (group_failed == 0) {
> + for (rank_bgn = 0, sr = 0; rank_bgn
> + < RW_MGR_MEM_NUMBER_OF_RANKS;
> + rank_bgn +=
> + NUM_RANKS_PER_SHADOW_REG,
> + ++sr) {
> + sr_failed = 0;
> + if (!((STATIC_CALIB_STEPS) &
> + CALIB_SKIP_WRITES)) {
> + if ((STATIC_CALIB_STEPS)
> + & CALIB_SKIP_DELAY_SWEEPS) {
> + /* not needed in quick mode! */
> + } else {
> + /*
> + * Determine if this set of
> + * ranks should be skipped
> + * entirely.
> + */
> + if (!param->skip_shadow_regs[sr]) {
> + if (!rw_mgr_mem_calibrate_writes
> + (rank_bgn, write_group,
> + write_test_bgn)) {
> + sr_failed = 1;
> + if (!(gbl->
> + phy_debug_mode_flags &
> + PHY_DEBUG_SWEEP_ALL_GROUPS)) {
> + return 0;
> + }
> + }
> + }
> + }
> + }
> + if (sr_failed != 0)
> + group_failed = 1;
> + }
> + }
> +
> + if (group_failed == 0) {
> + for (read_group = write_group *
> + RW_MGR_MEM_IF_READ_DQS_WIDTH /
> + RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
> + read_test_bgn = 0;
> + read_group < (write_group + 1)
> + * RW_MGR_MEM_IF_READ_DQS_WIDTH
> + / RW_MGR_MEM_IF_WRITE_DQS_WIDTH &&
> + group_failed == 0;
> + read_group++, read_test_bgn +=
> + RW_MGR_MEM_DQ_PER_READ_DQS) {
> + if (!((STATIC_CALIB_STEPS) &
> + CALIB_SKIP_WRITES)) {
> + if (!rw_mgr_mem_calibrate_vfifo_end
> + (read_group, read_test_bgn)) {
> + group_failed = 1;
> +
> + if (!(gbl->phy_debug_mode_flags
> + & PHY_DEBUG_SWEEP_ALL_GROUPS)) {
> + return 0;
> + }
> + }
> + }
> + }
> + }
And here. Note how indentation is so deep ifs are actually indented
the other way around here.
I guess it would be better to ignore 80-columns rule than _this_. But
splitting it into smaller functions is of course preffered.
Thanks,
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
More information about the U-Boot
mailing list