X-Git-Url: https://git.kernelconcepts.de/?p=karo-tx-uboot.git;a=blobdiff_plain;f=drivers%2Fddr%2Faltera%2Fsequencer.c;h=f2d164a0358207bf1c948b86a27150a19e40a351;hp=9c698574c59bf7e29a172ca6774dfb0fbf3bece6;hb=7efcb4c62b91a8e7198a1d95ef297ad74d26e594;hpb=db062e1fc534891c9182de383befd9848aae7cb8 diff --git a/drivers/ddr/altera/sequencer.c b/drivers/ddr/altera/sequencer.c index 9c698574c5..f2d164a035 100644 --- a/drivers/ddr/altera/sequencer.c +++ b/drivers/ddr/altera/sequencer.c @@ -9,10 +9,6 @@ #include #include #include "sequencer.h" -#include "sequencer_auto.h" -#include "sequencer_auto_ac_init.h" -#include "sequencer_auto_inst_init.h" -#include "sequencer_defines.h" static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs = (struct socfpga_sdr_rw_load_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800); @@ -38,6 +34,10 @@ static struct socfpga_data_mgr *data_mgr = static struct socfpga_sdr_ctrl *sdr_ctrl = (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS; +const struct socfpga_sdram_rw_mgr_config *rwcfg; +const struct socfpga_sdram_io_config *iocfg; +const struct socfpga_sdram_misc_config *misccfg; + #define DELTA_D 1 /* @@ -60,7 +60,7 @@ static struct socfpga_sdr_ctrl *sdr_ctrl = STATIC_SKIP_DELAY_LOOPS) /* calibration steps requested by the rtl */ -uint16_t dyn_calib_steps; +u16 dyn_calib_steps; /* * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option @@ -71,21 +71,16 @@ uint16_t dyn_calib_steps; * zero when skipping */ -uint16_t skip_delay_mask; /* mask off bits when skipping/not-skipping */ +u16 skip_delay_mask; /* mask off bits when skipping/not-skipping */ #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \ ((non_skip_value) & skip_delay_mask) struct gbl_type *gbl; struct param_type *param; -uint32_t curr_shadow_reg; - -static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn, - uint32_t write_group, uint32_t use_dm, - uint32_t all_correct, uint32_t *bit_chk, uint32_t all_ranks); -static void set_failing_group_stage(uint32_t group, uint32_t stage, - uint32_t substage) +static void set_failing_group_stage(u32 group, u32 stage, + u32 substage) { /* * Only set the global stage if there was not been any other @@ -144,15 +139,12 @@ static void phy_mgr_initialize(void) if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) return; - ratio = RW_MGR_MEM_DQ_PER_READ_DQS / - RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS; + ratio = rwcfg->mem_dq_per_read_dqs / + rwcfg->mem_virtual_groups_per_read_dqs; param->read_correct_mask_vg = (1 << ratio) - 1; param->write_correct_mask_vg = (1 << ratio) - 1; - param->read_correct_mask = (1 << RW_MGR_MEM_DQ_PER_READ_DQS) - 1; - param->write_correct_mask = (1 << RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1; - ratio = RW_MGR_MEM_DATA_WIDTH / - RW_MGR_MEM_DATA_MASK_WIDTH; - param->dm_correct_mask = (1 << ratio) - 1; + param->read_correct_mask = (1 << rwcfg->mem_dq_per_read_dqs) - 1; + param->write_correct_mask = (1 << rwcfg->mem_dq_per_write_dqs) - 1; } /** @@ -172,14 +164,14 @@ static void set_rank_and_odt_mask(const u32 rank, const u32 odt_mode) odt_mask_0 = 0x0; odt_mask_1 = 0x0; } else { /* RW_MGR_ODT_MODE_READ_WRITE */ - switch (RW_MGR_MEM_NUMBER_OF_RANKS) { + switch (rwcfg->mem_number_of_ranks) { case 1: /* 1 Rank */ /* Read: ODT = 0 ; Write: ODT = 1 */ odt_mask_0 = 0x0; odt_mask_1 = 0x1; break; case 2: /* 2 Ranks */ - if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1) { + if (rwcfg->mem_number_of_cs_per_dimm == 1) { /* * - Dual-Slot , Single-Rank (1 CS per DIMM) * OR @@ -294,57 +286,57 @@ static void scc_mgr_initialize(void) } } -static void scc_mgr_set_dqdqs_output_phase(uint32_t write_group, uint32_t phase) +static void scc_mgr_set_dqdqs_output_phase(u32 write_group, u32 phase) { scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase); } -static void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, uint32_t delay) +static void scc_mgr_set_dqs_bus_in_delay(u32 read_group, u32 delay) { scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay); } -static void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase) +static void scc_mgr_set_dqs_en_phase(u32 read_group, u32 phase) { scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase); } -static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay) +static void scc_mgr_set_dqs_en_delay(u32 read_group, u32 delay) { scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay); } -static void scc_mgr_set_dqs_io_in_delay(uint32_t delay) +static void scc_mgr_set_dqs_io_in_delay(u32 delay) { - scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS, + scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, rwcfg->mem_dq_per_write_dqs, delay); } -static void scc_mgr_set_dq_in_delay(uint32_t dq_in_group, uint32_t delay) +static void scc_mgr_set_dq_in_delay(u32 dq_in_group, u32 delay) { scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay); } -static void scc_mgr_set_dq_out1_delay(uint32_t dq_in_group, uint32_t delay) +static void scc_mgr_set_dq_out1_delay(u32 dq_in_group, u32 delay) { scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay); } -static void scc_mgr_set_dqs_out1_delay(uint32_t delay) +static void scc_mgr_set_dqs_out1_delay(u32 delay) { - scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS, + scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, rwcfg->mem_dq_per_write_dqs, delay); } -static void scc_mgr_set_dm_out1_delay(uint32_t dm, uint32_t delay) +static void scc_mgr_set_dm_out1_delay(u32 dm, u32 delay) { scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, - RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + dm, + rwcfg->mem_dq_per_write_dqs + 1 + dm, delay); } /* load up dqs config settings */ -static void scc_mgr_load_dqs(uint32_t dqs) +static void scc_mgr_load_dqs(u32 dqs) { writel(dqs, &sdr_scc_mgr->dqs_ena); } @@ -356,13 +348,13 @@ static void scc_mgr_load_dqs_io(void) } /* load up dq config settings */ -static void scc_mgr_load_dq(uint32_t dq_in_group) +static void scc_mgr_load_dq(u32 dq_in_group) { writel(dq_in_group, &sdr_scc_mgr->dq_ena); } /* load up dm config settings */ -static void scc_mgr_load_dm(uint32_t dm) +static void scc_mgr_load_dm(u32 dm) { writel(dm, &sdr_scc_mgr->dm_ena); } @@ -382,7 +374,7 @@ static void scc_mgr_set_all_ranks(const u32 off, const u32 grp, const u32 val, { u32 r; - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; + for (r = 0; r < rwcfg->mem_number_of_ranks; r += NUM_RANKS_PER_SHADOW_REG) { scc_mgr_set(off, grp, val); @@ -407,8 +399,8 @@ static void scc_mgr_set_dqs_en_phase_all_ranks(u32 read_group, u32 phase) read_group, phase, 0); } -static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group, - uint32_t phase) +static void scc_mgr_set_dqdqs_output_phase_all_ranks(u32 write_group, + u32 phase) { /* * USER although the h/w doesn't support different phases per @@ -422,8 +414,8 @@ static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group, write_group, phase, 0); } -static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group, - uint32_t delay) +static void scc_mgr_set_dqs_en_delay_all_ranks(u32 read_group, + u32 delay) { /* * In shadow register mode, the T11 settings are stored in @@ -447,8 +439,8 @@ static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group, */ static void scc_mgr_set_oct_out1_delay(const u32 write_group, const u32 delay) { - const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH / - RW_MGR_MEM_IF_WRITE_DQS_WIDTH; + const int ratio = rwcfg->mem_if_read_dqs_width / + rwcfg->mem_if_write_dqs_width; const int base = write_group * ratio; int i; /* @@ -504,23 +496,23 @@ static void scc_mgr_zero_all(void) * USER Zero all DQS config settings, across all groups and all * shadow registers */ - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; + for (r = 0; r < rwcfg->mem_number_of_ranks; r += NUM_RANKS_PER_SHADOW_REG) { - for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { + for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) { /* * The phases actually don't exist on a per-rank basis, * but there's no harm updating them several times, so * let's keep the code simple. */ - scc_mgr_set_dqs_bus_in_delay(i, IO_DQS_IN_RESERVE); + scc_mgr_set_dqs_bus_in_delay(i, iocfg->dqs_in_reserve); scc_mgr_set_dqs_en_phase(i, 0); scc_mgr_set_dqs_en_delay(i, 0); } - for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { + for (i = 0; i < rwcfg->mem_if_write_dqs_width; i++) { scc_mgr_set_dqdqs_output_phase(i, 0); /* Arria V/Cyclone V don't have out2. */ - scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE); + scc_mgr_set_oct_out1_delay(i, iocfg->dqs_out_reserve); } } @@ -559,8 +551,8 @@ static void scc_set_bypass_mode(const u32 write_group) */ static void scc_mgr_load_dqs_for_write_group(const u32 write_group) { - const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH / - RW_MGR_MEM_IF_WRITE_DQS_WIDTH; + const int ratio = rwcfg->mem_if_read_dqs_width / + rwcfg->mem_if_write_dqs_width; const int base = write_group * ratio; int i; /* @@ -583,10 +575,10 @@ static void scc_mgr_zero_group(const u32 write_group, const int out_only) { int i, r; - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; + for (r = 0; r < rwcfg->mem_number_of_ranks; r += NUM_RANKS_PER_SHADOW_REG) { /* Zero all DQ config settings. */ - for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { + for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) { scc_mgr_set_dq_out1_delay(i, 0); if (!out_only) scc_mgr_set_dq_in_delay(i, 0); @@ -607,8 +599,8 @@ static void scc_mgr_zero_group(const u32 write_group, const int out_only) scc_mgr_set_dqs_io_in_delay(0); /* Arria V/Cyclone V don't have out2. */ - scc_mgr_set_dqs_out1_delay(IO_DQS_OUT_RESERVE); - scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE); + scc_mgr_set_dqs_out1_delay(iocfg->dqs_out_reserve); + scc_mgr_set_oct_out1_delay(write_group, iocfg->dqs_out_reserve); scc_mgr_load_dqs_for_write_group(write_group); /* Multicast to all DQS IO enables (only 1 in total). */ @@ -623,11 +615,11 @@ static void scc_mgr_zero_group(const u32 write_group, const int out_only) * apply and load a particular input delay for the DQ pins in a group * group_bgn is the index of the first dq pin (in the write group) */ -static void scc_mgr_apply_group_dq_in_delay(uint32_t group_bgn, uint32_t delay) +static void scc_mgr_apply_group_dq_in_delay(u32 group_bgn, u32 delay) { - uint32_t i, p; + u32 i, p; - for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { + for (i = 0, p = group_bgn; i < rwcfg->mem_dq_per_read_dqs; i++, p++) { scc_mgr_set_dq_in_delay(p, delay); scc_mgr_load_dq(p); } @@ -643,16 +635,16 @@ static void scc_mgr_apply_group_dq_out1_delay(const u32 delay) { int i; - for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { + for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) { scc_mgr_set_dq_out1_delay(i, delay); scc_mgr_load_dq(i); } } /* apply and load a particular output delay for the DM pins in a group */ -static void scc_mgr_apply_group_dm_out1_delay(uint32_t delay1) +static void scc_mgr_apply_group_dm_out1_delay(u32 delay1) { - uint32_t i; + u32 i; for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { scc_mgr_set_dm_out1_delay(i, delay1); @@ -662,8 +654,8 @@ static void scc_mgr_apply_group_dm_out1_delay(uint32_t delay1) /* apply and load delay on both DQS and OCT out1 */ -static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group, - uint32_t delay) +static void scc_mgr_apply_group_dqs_io_and_oct_out1(u32 write_group, + u32 delay) { scc_mgr_set_dqs_out1_delay(delay); scc_mgr_load_dqs_io(); @@ -685,7 +677,7 @@ static void scc_mgr_apply_group_all_out_delay_add(const u32 write_group, u32 i, new_delay; /* DQ shift */ - for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) + for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) scc_mgr_load_dq(i); /* DM shift */ @@ -694,13 +686,13 @@ static void scc_mgr_apply_group_all_out_delay_add(const u32 write_group, /* DQS shift */ new_delay = READ_SCC_DQS_IO_OUT2_DELAY + delay; - if (new_delay > IO_IO_OUT2_DELAY_MAX) { + if (new_delay > iocfg->io_out2_delay_max) { debug_cond(DLEVEL == 1, "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n", __func__, __LINE__, write_group, delay, new_delay, - IO_IO_OUT2_DELAY_MAX, - new_delay - IO_IO_OUT2_DELAY_MAX); - new_delay -= IO_IO_OUT2_DELAY_MAX; + iocfg->io_out2_delay_max, + new_delay - iocfg->io_out2_delay_max); + new_delay -= iocfg->io_out2_delay_max; scc_mgr_set_dqs_out1_delay(new_delay); } @@ -708,13 +700,13 @@ static void scc_mgr_apply_group_all_out_delay_add(const u32 write_group, /* OCT shift */ new_delay = READ_SCC_OCT_OUT2_DELAY + delay; - if (new_delay > IO_IO_OUT2_DELAY_MAX) { + if (new_delay > iocfg->io_out2_delay_max) { debug_cond(DLEVEL == 1, "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n", __func__, __LINE__, write_group, delay, - new_delay, IO_IO_OUT2_DELAY_MAX, - new_delay - IO_IO_OUT2_DELAY_MAX); - new_delay -= IO_IO_OUT2_DELAY_MAX; + new_delay, iocfg->io_out2_delay_max, + new_delay - iocfg->io_out2_delay_max); + new_delay -= iocfg->io_out2_delay_max; scc_mgr_set_oct_out1_delay(write_group, new_delay); } @@ -734,7 +726,7 @@ scc_mgr_apply_group_all_out_delay_add_all_ranks(const u32 write_group, { int r; - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; + for (r = 0; r < rwcfg->mem_number_of_ranks; r += NUM_RANKS_PER_SHADOW_REG) { scc_mgr_apply_group_all_out_delay_add(write_group, delay); writel(0, &sdr_scc_mgr->update); @@ -755,48 +747,39 @@ static void set_jump_as_return(void) * we always jump. */ writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0); - writel(RW_MGR_RETURN, &sdr_rw_load_jump_mgr_regs->load_jump_add0); + writel(rwcfg->rreturn, &sdr_rw_load_jump_mgr_regs->load_jump_add0); } -/* - * should always use constants as argument to ensure all computations are - * performed at compile time +/** + * delay_for_n_mem_clocks() - Delay for N memory clocks + * @clocks: Length of the delay + * + * Delay for N memory clocks. */ -static void delay_for_n_mem_clocks(const uint32_t clocks) +static void delay_for_n_mem_clocks(const u32 clocks) { - uint32_t afi_clocks; - uint8_t inner = 0; - uint8_t outer = 0; - uint16_t c_loop = 0; + u32 afi_clocks; + u16 c_loop; + u8 inner; + u8 outer; debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks); - - afi_clocks = (clocks + AFI_RATE_RATIO-1) / AFI_RATE_RATIO; - /* scale (rounding up) to get afi clocks */ + /* Scale (rounding up) to get afi clocks. */ + afi_clocks = DIV_ROUND_UP(clocks, misccfg->afi_rate_ratio); + if (afi_clocks) /* Temporary underflow protection */ + afi_clocks--; /* - * Note, we don't bother accounting for being off a little bit - * because of a few extra instructions in outer loops - * Note, the loops have a test at the end, and do the test before - * the decrement, and so always perform the loop + * Note, we don't bother accounting for being off a little + * bit because of a few extra instructions in outer loops. + * Note, the loops have a test at the end, and do the test + * before the decrement, and so always perform the loop * 1 time more than the counter value */ - if (afi_clocks == 0) { - ; - } else if (afi_clocks <= 0x100) { - inner = afi_clocks-1; - outer = 0; - c_loop = 0; - } else if (afi_clocks <= 0x10000) { - inner = 0xff; - outer = (afi_clocks-1) >> 8; - c_loop = 0; - } else { - inner = 0xff; - outer = 0xff; - c_loop = (afi_clocks-1) >> 16; - } + c_loop = afi_clocks >> 16; + outer = c_loop ? 0xff : (afi_clocks >> 8); + inner = outer ? 0xff : afi_clocks; /* * rom instructions are structured as follows: @@ -815,14 +798,14 @@ static void delay_for_n_mem_clocks(const uint32_t clocks) * and sequencer rom and keeps the delays more accurate and reduces * overhead */ - if (afi_clocks <= 0x100) { + if (afi_clocks < 0x100) { writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), &sdr_rw_load_mgr_regs->load_cntr1); - writel(RW_MGR_IDLE_LOOP1, + writel(rwcfg->idle_loop1, &sdr_rw_load_jump_mgr_regs->load_jump_add1); - writel(RW_MGR_IDLE_LOOP1, SDR_PHYGRP_RWMGRGRP_ADDRESS | + writel(rwcfg->idle_loop1, SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET); } else { writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), @@ -831,24 +814,17 @@ static void delay_for_n_mem_clocks(const uint32_t clocks) writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer), &sdr_rw_load_mgr_regs->load_cntr1); - writel(RW_MGR_IDLE_LOOP2, + writel(rwcfg->idle_loop2, &sdr_rw_load_jump_mgr_regs->load_jump_add0); - writel(RW_MGR_IDLE_LOOP2, + writel(rwcfg->idle_loop2, &sdr_rw_load_jump_mgr_regs->load_jump_add1); - /* hack to get around compiler not being smart enough */ - if (afi_clocks <= 0x10000) { - /* only need to run once */ - writel(RW_MGR_IDLE_LOOP2, SDR_PHYGRP_RWMGRGRP_ADDRESS | - RW_MGR_RUN_SINGLE_GROUP_OFFSET); - } else { - do { - writel(RW_MGR_IDLE_LOOP2, - SDR_PHYGRP_RWMGRGRP_ADDRESS | - RW_MGR_RUN_SINGLE_GROUP_OFFSET); - } while (c_loop-- != 0); - } + do { + writel(rwcfg->idle_loop2, + SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_SINGLE_GROUP_OFFSET); + } while (c_loop-- != 0); } debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks); } @@ -864,7 +840,7 @@ static void delay_for_n_mem_clocks(const uint32_t clocks) */ static void rw_mgr_mem_init_load_regs(u32 cntr0, u32 cntr1, u32 cntr2, u32 jump) { - uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS | + u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; /* Load counters */ @@ -899,44 +875,39 @@ static void rw_mgr_mem_load_user(const u32 fin1, const u32 fin2, RW_MGR_RUN_SINGLE_GROUP_OFFSET; u32 r; - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { - if (param->skip_ranks[r]) { - /* request to skip the rank */ - continue; - } - + for (r = 0; r < rwcfg->mem_number_of_ranks; r++) { /* set rank */ set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); /* precharge all banks ... */ if (precharge) - writel(RW_MGR_PRECHARGE_ALL, grpaddr); + writel(rwcfg->precharge_all, grpaddr); /* * USER Use Mirror-ed commands for odd ranks if address * mirrorring is on */ - if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) { + if ((rwcfg->mem_address_mirroring >> r) & 0x1) { set_jump_as_return(); - writel(RW_MGR_MRS2_MIRR, grpaddr); + writel(rwcfg->mrs2_mirr, grpaddr); delay_for_n_mem_clocks(4); set_jump_as_return(); - writel(RW_MGR_MRS3_MIRR, grpaddr); + writel(rwcfg->mrs3_mirr, grpaddr); delay_for_n_mem_clocks(4); set_jump_as_return(); - writel(RW_MGR_MRS1_MIRR, grpaddr); + writel(rwcfg->mrs1_mirr, grpaddr); delay_for_n_mem_clocks(4); set_jump_as_return(); writel(fin1, grpaddr); } else { set_jump_as_return(); - writel(RW_MGR_MRS2, grpaddr); + writel(rwcfg->mrs2, grpaddr); delay_for_n_mem_clocks(4); set_jump_as_return(); - writel(RW_MGR_MRS3, grpaddr); + writel(rwcfg->mrs3, grpaddr); delay_for_n_mem_clocks(4); set_jump_as_return(); - writel(RW_MGR_MRS1, grpaddr); + writel(rwcfg->mrs1, grpaddr); set_jump_as_return(); writel(fin2, grpaddr); } @@ -945,7 +916,7 @@ static void rw_mgr_mem_load_user(const u32 fin1, const u32 fin2, continue; set_jump_as_return(); - writel(RW_MGR_ZQCL, grpaddr); + writel(rwcfg->zqcl, grpaddr); /* tZQinit = tDLLK = 512 ck cycles */ delay_for_n_mem_clocks(512); @@ -988,9 +959,9 @@ static void rw_mgr_mem_initialize(void) * One possible solution is n = 0 , a = 256 , b = 106 => a = FF, * b = 6A */ - rw_mgr_mem_init_load_regs(SEQ_TINIT_CNTR0_VAL, SEQ_TINIT_CNTR1_VAL, - SEQ_TINIT_CNTR2_VAL, - RW_MGR_INIT_RESET_0_CKE_0); + rw_mgr_mem_init_load_regs(misccfg->tinit_cntr0_val, misccfg->tinit_cntr1_val, + misccfg->tinit_cntr2_val, + rwcfg->init_reset_0_cke_0); /* Indicate that memory is stable. */ writel(1, &phy_mgr_cfg->reset_mem_stbl); @@ -1009,194 +980,413 @@ static void rw_mgr_mem_initialize(void) * One possible solution is n = 2 , a = 131 , b = 256 => a = 83, * b = FF */ - rw_mgr_mem_init_load_regs(SEQ_TRESET_CNTR0_VAL, SEQ_TRESET_CNTR1_VAL, - SEQ_TRESET_CNTR2_VAL, - RW_MGR_INIT_RESET_1_CKE_0); + rw_mgr_mem_init_load_regs(misccfg->treset_cntr0_val, misccfg->treset_cntr1_val, + misccfg->treset_cntr2_val, + rwcfg->init_reset_1_cke_0); /* Bring up clock enable. */ /* tXRP < 250 ck cycles */ delay_for_n_mem_clocks(250); - rw_mgr_mem_load_user(RW_MGR_MRS0_DLL_RESET_MIRR, RW_MGR_MRS0_DLL_RESET, + rw_mgr_mem_load_user(rwcfg->mrs0_dll_reset_mirr, rwcfg->mrs0_dll_reset, 0); } -/* - * At the end of calibration we have to program the user settings in, and - * USER hand off the memory to the user. +/** + * rw_mgr_mem_handoff() - Hand off the memory to user + * + * At the end of calibration we have to program the user settings in + * and hand off the memory to the user. */ static void rw_mgr_mem_handoff(void) { - rw_mgr_mem_load_user(RW_MGR_MRS0_USER_MIRR, RW_MGR_MRS0_USER, 1); + rw_mgr_mem_load_user(rwcfg->mrs0_user_mirr, rwcfg->mrs0_user, 1); /* - * USER need to wait tMOD (12CK or 15ns) time before issuing - * other commands, but we will have plenty of NIOS cycles before - * actual handoff so its okay. + * Need to wait tMOD (12CK or 15ns) time before issuing other + * commands, but we will have plenty of NIOS cycles before actual + * handoff so its okay. */ } -/* - * performs a guaranteed read on the patterns we are going to use during a - * read test to ensure memory works +/** + * rw_mgr_mem_calibrate_write_test_issue() - Issue write test command + * @group: Write Group + * @use_dm: Use DM + * + * Issue write test command. Two variants are provided, one that just tests + * a write pattern and another that tests datamask functionality. */ -static uint32_t rw_mgr_mem_calibrate_read_test_patterns(uint32_t rank_bgn, - uint32_t group, uint32_t num_tries, uint32_t *bit_chk, - uint32_t all_ranks) +static void rw_mgr_mem_calibrate_write_test_issue(u32 group, + u32 test_dm) { - uint32_t r, vg; - uint32_t correct_mask_vg; - uint32_t tmp_bit_chk; - uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : - (rank_bgn + NUM_RANKS_PER_SHADOW_REG); - uint32_t addr; - uint32_t base_rw_mgr; + const u32 quick_write_mode = + (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES) && + misccfg->enable_super_quick_calibration; + u32 mcc_instruction; + u32 rw_wl_nop_cycles; - *bit_chk = param->read_correct_mask; - correct_mask_vg = param->read_correct_mask_vg; + /* + * Set counter and jump addresses for the right + * number of NOP cycles. + * The number of supported NOP cycles can range from -1 to infinity + * Three different cases are handled: + * + * 1. For a number of NOP cycles greater than 0, the RW Mgr looping + * mechanism will be used to insert the right number of NOPs + * + * 2. For a number of NOP cycles equals to 0, the micro-instruction + * issuing the write command will jump straight to the + * micro-instruction that turns on DQS (for DDRx), or outputs write + * data (for RLD), skipping + * the NOP micro-instruction all together + * + * 3. A number of NOP cycles equal to -1 indicates that DQS must be + * turned on in the same micro-instruction that issues the write + * command. Then we need + * to directly jump to the micro-instruction that sends out the data + * + * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters + * (2 and 3). One jump-counter (0) is used to perform multiple + * write-read operations. + * one counter left to issue this command in "multiple-group" mode + */ + + rw_wl_nop_cycles = gbl->rw_wl_nop_cycles; + + if (rw_wl_nop_cycles == -1) { + /* + * CNTR 2 - We want to execute the special write operation that + * turns on DQS right away and then skip directly to the + * instruction that sends out the data. We set the counter to a + * large number so that the jump is always taken. + */ + writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); + + /* CNTR 3 - Not used */ + if (test_dm) { + mcc_instruction = rwcfg->lfsr_wr_rd_dm_bank_0_wl_1; + writel(rwcfg->lfsr_wr_rd_dm_bank_0_data, + &sdr_rw_load_jump_mgr_regs->load_jump_add2); + writel(rwcfg->lfsr_wr_rd_dm_bank_0_nop, + &sdr_rw_load_jump_mgr_regs->load_jump_add3); + } else { + mcc_instruction = rwcfg->lfsr_wr_rd_bank_0_wl_1; + writel(rwcfg->lfsr_wr_rd_bank_0_data, + &sdr_rw_load_jump_mgr_regs->load_jump_add2); + writel(rwcfg->lfsr_wr_rd_bank_0_nop, + &sdr_rw_load_jump_mgr_regs->load_jump_add3); + } + } else if (rw_wl_nop_cycles == 0) { + /* + * CNTR 2 - We want to skip the NOP operation and go straight + * to the DQS enable instruction. We set the counter to a large + * number so that the jump is always taken. + */ + writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); + + /* CNTR 3 - Not used */ + if (test_dm) { + mcc_instruction = rwcfg->lfsr_wr_rd_dm_bank_0; + writel(rwcfg->lfsr_wr_rd_dm_bank_0_dqs, + &sdr_rw_load_jump_mgr_regs->load_jump_add2); + } else { + mcc_instruction = rwcfg->lfsr_wr_rd_bank_0; + writel(rwcfg->lfsr_wr_rd_bank_0_dqs, + &sdr_rw_load_jump_mgr_regs->load_jump_add2); + } + } else { + /* + * CNTR 2 - In this case we want to execute the next instruction + * and NOT take the jump. So we set the counter to 0. The jump + * address doesn't count. + */ + writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2); + writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2); + + /* + * CNTR 3 - Set the nop counter to the number of cycles we + * need to loop for, minus 1. + */ + writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3); + if (test_dm) { + mcc_instruction = rwcfg->lfsr_wr_rd_dm_bank_0; + writel(rwcfg->lfsr_wr_rd_dm_bank_0_nop, + &sdr_rw_load_jump_mgr_regs->load_jump_add3); + } else { + mcc_instruction = rwcfg->lfsr_wr_rd_bank_0; + writel(rwcfg->lfsr_wr_rd_bank_0_nop, + &sdr_rw_load_jump_mgr_regs->load_jump_add3); + } + } + + writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RESET_READ_DATAPATH_OFFSET); + + if (quick_write_mode) + writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0); + else + writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0); + + writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0); + + /* + * CNTR 1 - This is used to ensure enough time elapses + * for read data to come back. + */ + writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1); + + if (test_dm) { + writel(rwcfg->lfsr_wr_rd_dm_bank_0_wait, + &sdr_rw_load_jump_mgr_regs->load_jump_add1); + } else { + writel(rwcfg->lfsr_wr_rd_bank_0_wait, + &sdr_rw_load_jump_mgr_regs->load_jump_add1); + } + + writel(mcc_instruction, (SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_SINGLE_GROUP_OFFSET) + + (group << 2)); +} + +/** + * rw_mgr_mem_calibrate_write_test() - Test writes, check for single/multiple pass + * @rank_bgn: Rank number + * @write_group: Write Group + * @use_dm: Use DM + * @all_correct: All bits must be correct in the mask + * @bit_chk: Resulting bit mask after the test + * @all_ranks: Test all ranks + * + * Test writes, can check for a single bit pass or multiple bit pass. + */ +static int +rw_mgr_mem_calibrate_write_test(const u32 rank_bgn, const u32 write_group, + const u32 use_dm, const u32 all_correct, + u32 *bit_chk, const u32 all_ranks) +{ + const u32 rank_end = all_ranks ? + rwcfg->mem_number_of_ranks : + (rank_bgn + NUM_RANKS_PER_SHADOW_REG); + const u32 shift_ratio = rwcfg->mem_dq_per_write_dqs / + rwcfg->mem_virtual_groups_per_write_dqs; + const u32 correct_mask_vg = param->write_correct_mask_vg; + + u32 tmp_bit_chk, base_rw_mgr; + int vg, r; + + *bit_chk = param->write_correct_mask; for (r = rank_bgn; r < rank_end; r++) { - if (param->skip_ranks[r]) - /* request to skip the rank */ - continue; + /* Set rank */ + set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); - /* set rank */ + tmp_bit_chk = 0; + for (vg = rwcfg->mem_virtual_groups_per_write_dqs - 1; + vg >= 0; vg--) { + /* Reset the FIFOs to get pointers to known state. */ + writel(0, &phy_mgr_cmd->fifo_reset); + + rw_mgr_mem_calibrate_write_test_issue( + write_group * + rwcfg->mem_virtual_groups_per_write_dqs + vg, + use_dm); + + base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); + tmp_bit_chk <<= shift_ratio; + tmp_bit_chk |= (correct_mask_vg & ~(base_rw_mgr)); + } + + *bit_chk &= tmp_bit_chk; + } + + set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); + if (all_correct) { + debug_cond(DLEVEL == 2, + "write_test(%u,%u,ALL) : %u == %u => %i\n", + write_group, use_dm, *bit_chk, + param->write_correct_mask, + *bit_chk == param->write_correct_mask); + return *bit_chk == param->write_correct_mask; + } else { + set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); + debug_cond(DLEVEL == 2, + "write_test(%u,%u,ONE) : %u != %i => %i\n", + write_group, use_dm, *bit_chk, 0, *bit_chk != 0); + return *bit_chk != 0x00; + } +} + +/** + * rw_mgr_mem_calibrate_read_test_patterns() - Read back test patterns + * @rank_bgn: Rank number + * @group: Read/Write Group + * @all_ranks: Test all ranks + * + * Performs a guaranteed read on the patterns we are going to use during a + * read test to ensure memory works. + */ +static int +rw_mgr_mem_calibrate_read_test_patterns(const u32 rank_bgn, const u32 group, + const u32 all_ranks) +{ + const u32 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_SINGLE_GROUP_OFFSET; + const u32 addr_offset = + (group * rwcfg->mem_virtual_groups_per_read_dqs) << 2; + const u32 rank_end = all_ranks ? + rwcfg->mem_number_of_ranks : + (rank_bgn + NUM_RANKS_PER_SHADOW_REG); + const u32 shift_ratio = rwcfg->mem_dq_per_read_dqs / + rwcfg->mem_virtual_groups_per_read_dqs; + const u32 correct_mask_vg = param->read_correct_mask_vg; + + u32 tmp_bit_chk, base_rw_mgr, bit_chk; + int vg, r; + int ret = 0; + + bit_chk = param->read_correct_mask; + + for (r = rank_bgn; r < rank_end; r++) { + /* Set rank */ set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); /* Load up a constant bursts of read commands */ writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0); - writel(RW_MGR_GUARANTEED_READ, + writel(rwcfg->guaranteed_read, &sdr_rw_load_jump_mgr_regs->load_jump_add0); writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1); - writel(RW_MGR_GUARANTEED_READ_CONT, + writel(rwcfg->guaranteed_read_cont, &sdr_rw_load_jump_mgr_regs->load_jump_add1); tmp_bit_chk = 0; - for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) { - /* reset the fifos to get pointers to known state */ - + for (vg = rwcfg->mem_virtual_groups_per_read_dqs - 1; + vg >= 0; vg--) { + /* Reset the FIFOs to get pointers to known state. */ writel(0, &phy_mgr_cmd->fifo_reset); writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RESET_READ_DATAPATH_OFFSET); - - tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS - / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); - - addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; - writel(RW_MGR_GUARANTEED_READ, addr + - ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS + - vg) << 2)); + writel(rwcfg->guaranteed_read, + addr + addr_offset + (vg << 2)); base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); - tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & (~base_rw_mgr)); - - if (vg == 0) - break; + tmp_bit_chk <<= shift_ratio; + tmp_bit_chk |= correct_mask_vg & ~base_rw_mgr; } - *bit_chk &= tmp_bit_chk; + + bit_chk &= tmp_bit_chk; } - addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; - writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2)); + writel(rwcfg->clear_dqs_enable, addr + (group << 2)); set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); - debug_cond(DLEVEL == 1, "%s:%d test_load_patterns(%u,ALL) => (%u == %u) =>\ - %lu\n", __func__, __LINE__, group, *bit_chk, param->read_correct_mask, - (long unsigned int)(*bit_chk == param->read_correct_mask)); - return *bit_chk == param->read_correct_mask; -} -static uint32_t rw_mgr_mem_calibrate_read_test_patterns_all_ranks - (uint32_t group, uint32_t num_tries, uint32_t *bit_chk) -{ - return rw_mgr_mem_calibrate_read_test_patterns(0, group, - num_tries, bit_chk, 1); + if (bit_chk != param->read_correct_mask) + ret = -EIO; + + debug_cond(DLEVEL == 1, + "%s:%d test_load_patterns(%u,ALL) => (%u == %u) => %i\n", + __func__, __LINE__, group, bit_chk, + param->read_correct_mask, ret); + + return ret; } -/* load up the patterns we are going to use during a read test */ -static void rw_mgr_mem_calibrate_read_load_patterns(uint32_t rank_bgn, - uint32_t all_ranks) +/** + * rw_mgr_mem_calibrate_read_load_patterns() - Load up the patterns for read test + * @rank_bgn: Rank number + * @all_ranks: Test all ranks + * + * Load up the patterns we are going to use during a read test. + */ +static void rw_mgr_mem_calibrate_read_load_patterns(const u32 rank_bgn, + const int all_ranks) { - uint32_t r; - uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : - (rank_bgn + NUM_RANKS_PER_SHADOW_REG); + const u32 rank_end = all_ranks ? + rwcfg->mem_number_of_ranks : + (rank_bgn + NUM_RANKS_PER_SHADOW_REG); + u32 r; debug("%s:%d\n", __func__, __LINE__); - for (r = rank_bgn; r < rank_end; r++) { - if (param->skip_ranks[r]) - /* request to skip the rank */ - continue; + for (r = rank_bgn; r < rank_end; r++) { /* set rank */ set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); /* Load up a constant bursts */ writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0); - writel(RW_MGR_GUARANTEED_WRITE_WAIT0, + writel(rwcfg->guaranteed_write_wait0, &sdr_rw_load_jump_mgr_regs->load_jump_add0); writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1); - writel(RW_MGR_GUARANTEED_WRITE_WAIT1, + writel(rwcfg->guaranteed_write_wait1, &sdr_rw_load_jump_mgr_regs->load_jump_add1); writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2); - writel(RW_MGR_GUARANTEED_WRITE_WAIT2, + writel(rwcfg->guaranteed_write_wait2, &sdr_rw_load_jump_mgr_regs->load_jump_add2); writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3); - writel(RW_MGR_GUARANTEED_WRITE_WAIT3, + writel(rwcfg->guaranteed_write_wait3, &sdr_rw_load_jump_mgr_regs->load_jump_add3); - writel(RW_MGR_GUARANTEED_WRITE, SDR_PHYGRP_RWMGRGRP_ADDRESS | + writel(rwcfg->guaranteed_write, SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET); } set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); } -/* - * try a read and see if it returns correct data back. has dummy reads - * inserted into the mix used to align dqs enable. has more thorough checks - * than the regular read test. - */ -static uint32_t rw_mgr_mem_calibrate_read_test(uint32_t rank_bgn, uint32_t group, - uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk, - uint32_t all_groups, uint32_t all_ranks) -{ - uint32_t r, vg; - uint32_t correct_mask_vg; - uint32_t tmp_bit_chk; - uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : +/** + * rw_mgr_mem_calibrate_read_test() - Perform READ test on single rank + * @rank_bgn: Rank number + * @group: Read/Write group + * @num_tries: Number of retries of the test + * @all_correct: All bits must be correct in the mask + * @bit_chk: Resulting bit mask after the test + * @all_groups: Test all R/W groups + * @all_ranks: Test all ranks + * + * Try a read and see if it returns correct data back. Test has dummy reads + * inserted into the mix used to align DQS enable. Test has more thorough + * checks than the regular read test. + */ +static int +rw_mgr_mem_calibrate_read_test(const u32 rank_bgn, const u32 group, + const u32 num_tries, const u32 all_correct, + u32 *bit_chk, + const u32 all_groups, const u32 all_ranks) +{ + const u32 rank_end = all_ranks ? rwcfg->mem_number_of_ranks : (rank_bgn + NUM_RANKS_PER_SHADOW_REG); - uint32_t addr; - uint32_t base_rw_mgr; + const u32 quick_read_mode = + ((STATIC_CALIB_STEPS & CALIB_SKIP_DELAY_SWEEPS) && + misccfg->enable_super_quick_calibration); + u32 correct_mask_vg = param->read_correct_mask_vg; + u32 tmp_bit_chk; + u32 base_rw_mgr; + u32 addr; - *bit_chk = param->read_correct_mask; - correct_mask_vg = param->read_correct_mask_vg; + int r, vg, ret; - uint32_t quick_read_mode = (((STATIC_CALIB_STEPS) & - CALIB_SKIP_DELAY_SWEEPS) && ENABLE_SUPER_QUICK_CALIBRATION); + *bit_chk = param->read_correct_mask; for (r = rank_bgn; r < rank_end; r++) { - if (param->skip_ranks[r]) - /* request to skip the rank */ - continue; - /* set rank */ set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1); - writel(RW_MGR_READ_B2B_WAIT1, + writel(rwcfg->read_b2b_wait1, &sdr_rw_load_jump_mgr_regs->load_jump_add1); writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2); - writel(RW_MGR_READ_B2B_WAIT2, + writel(rwcfg->read_b2b_wait2, &sdr_rw_load_jump_mgr_regs->load_jump_add2); if (quick_read_mode) @@ -1207,339 +1397,408 @@ static uint32_t rw_mgr_mem_calibrate_read_test(uint32_t rank_bgn, uint32_t group else writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0); - writel(RW_MGR_READ_B2B, + writel(rwcfg->read_b2b, &sdr_rw_load_jump_mgr_regs->load_jump_add0); if (all_groups) - writel(RW_MGR_MEM_IF_READ_DQS_WIDTH * - RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1, + writel(rwcfg->mem_if_read_dqs_width * + rwcfg->mem_virtual_groups_per_read_dqs - 1, &sdr_rw_load_mgr_regs->load_cntr3); else writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3); - writel(RW_MGR_READ_B2B, + writel(rwcfg->read_b2b, &sdr_rw_load_jump_mgr_regs->load_jump_add3); tmp_bit_chk = 0; - for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) { - /* reset the fifos to get pointers to known state */ + for (vg = rwcfg->mem_virtual_groups_per_read_dqs - 1; vg >= 0; + vg--) { + /* Reset the FIFOs to get pointers to known state. */ writel(0, &phy_mgr_cmd->fifo_reset); writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RESET_READ_DATAPATH_OFFSET); - tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS - / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); - - if (all_groups) - addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_ALL_GROUPS_OFFSET; - else - addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; + if (all_groups) { + addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_ALL_GROUPS_OFFSET; + } else { + addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | + RW_MGR_RUN_SINGLE_GROUP_OFFSET; + } - writel(RW_MGR_READ_B2B, addr + - ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS + + writel(rwcfg->read_b2b, addr + + ((group * rwcfg->mem_virtual_groups_per_read_dqs + vg) << 2)); base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); - tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr)); - - if (vg == 0) - break; + tmp_bit_chk <<= rwcfg->mem_dq_per_read_dqs / + rwcfg->mem_virtual_groups_per_read_dqs; + tmp_bit_chk |= correct_mask_vg & ~(base_rw_mgr); } + *bit_chk &= tmp_bit_chk; } addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; - writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2)); + writel(rwcfg->clear_dqs_enable, addr + (group << 2)); + + set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); if (all_correct) { - set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); - debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ALL,%u) =>\ - (%u == %u) => %lu", __func__, __LINE__, group, - all_groups, *bit_chk, param->read_correct_mask, - (long unsigned int)(*bit_chk == - param->read_correct_mask)); - return *bit_chk == param->read_correct_mask; + ret = (*bit_chk == param->read_correct_mask); + debug_cond(DLEVEL == 2, + "%s:%d read_test(%u,ALL,%u) => (%u == %u) => %i\n", + __func__, __LINE__, group, all_groups, *bit_chk, + param->read_correct_mask, ret); } else { - set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); - debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ONE,%u) =>\ - (%u != %lu) => %lu\n", __func__, __LINE__, - group, all_groups, *bit_chk, (long unsigned int)0, - (long unsigned int)(*bit_chk != 0x00)); - return *bit_chk != 0x00; + ret = (*bit_chk != 0x00); + debug_cond(DLEVEL == 2, + "%s:%d read_test(%u,ONE,%u) => (%u != %u) => %i\n", + __func__, __LINE__, group, all_groups, *bit_chk, + 0, ret); } + + return ret; } -static uint32_t rw_mgr_mem_calibrate_read_test_all_ranks(uint32_t group, - uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk, - uint32_t all_groups) +/** + * rw_mgr_mem_calibrate_read_test_all_ranks() - Perform READ test on all ranks + * @grp: Read/Write group + * @num_tries: Number of retries of the test + * @all_correct: All bits must be correct in the mask + * @all_groups: Test all R/W groups + * + * Perform a READ test across all memory ranks. + */ +static int +rw_mgr_mem_calibrate_read_test_all_ranks(const u32 grp, const u32 num_tries, + const u32 all_correct, + const u32 all_groups) { - return rw_mgr_mem_calibrate_read_test(0, group, num_tries, all_correct, - bit_chk, all_groups, 1); + u32 bit_chk; + return rw_mgr_mem_calibrate_read_test(0, grp, num_tries, all_correct, + &bit_chk, all_groups, 1); } -static void rw_mgr_incr_vfifo(uint32_t grp, uint32_t *v) +/** + * rw_mgr_incr_vfifo() - Increase VFIFO value + * @grp: Read/Write group + * + * Increase VFIFO value. + */ +static void rw_mgr_incr_vfifo(const u32 grp) { writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy); - (*v)++; } -static void rw_mgr_decr_vfifo(uint32_t grp, uint32_t *v) +/** + * rw_mgr_decr_vfifo() - Decrease VFIFO value + * @grp: Read/Write group + * + * Decrease VFIFO value. + */ +static void rw_mgr_decr_vfifo(const u32 grp) { - uint32_t i; + u32 i; - for (i = 0; i < VFIFO_SIZE-1; i++) - rw_mgr_incr_vfifo(grp, v); + for (i = 0; i < misccfg->read_valid_fifo_size - 1; i++) + rw_mgr_incr_vfifo(grp); } -static int find_vfifo_read(uint32_t grp, uint32_t *bit_chk) +/** + * find_vfifo_failing_read() - Push VFIFO to get a failing read + * @grp: Read/Write group + * + * Push VFIFO until a failing read happens. + */ +static int find_vfifo_failing_read(const u32 grp) { - uint32_t v; - uint32_t fail_cnt = 0; - uint32_t test_status; + u32 v, ret, fail_cnt = 0; - for (v = 0; v < VFIFO_SIZE; ) { - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo %u\n", + for (v = 0; v < misccfg->read_valid_fifo_size; v++) { + debug_cond(DLEVEL == 2, "%s:%d: vfifo %u\n", __func__, __LINE__, v); - test_status = rw_mgr_mem_calibrate_read_test_all_ranks - (grp, 1, PASS_ONE_BIT, bit_chk, 0); - if (!test_status) { + ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, + PASS_ONE_BIT, 0); + if (!ret) { fail_cnt++; if (fail_cnt == 2) - break; + return v; } - /* fiddle with FIFO */ - rw_mgr_incr_vfifo(grp, &v); + /* Fiddle with FIFO. */ + rw_mgr_incr_vfifo(grp); } - if (v >= VFIFO_SIZE) { - /* no failing read found!! Something must have gone wrong */ - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo failed\n", - __func__, __LINE__); - return 0; - } else { - return v; - } + /* No failing read found! Something must have gone wrong. */ + debug_cond(DLEVEL == 2, "%s:%d: vfifo failed\n", __func__, __LINE__); + return 0; } -static int find_working_phase(uint32_t *grp, uint32_t *bit_chk, - uint32_t dtaps_per_ptap, uint32_t *work_bgn, - uint32_t *v, uint32_t *d, uint32_t *p, - uint32_t *i, uint32_t *max_working_cnt) +/** + * sdr_find_phase_delay() - Find DQS enable phase or delay + * @working: If 1, look for working phase/delay, if 0, look for non-working + * @delay: If 1, look for delay, if 0, look for phase + * @grp: Read/Write group + * @work: Working window position + * @work_inc: Working window increment + * @pd: DQS Phase/Delay Iterator + * + * Find working or non-working DQS enable phase setting. + */ +static int sdr_find_phase_delay(int working, int delay, const u32 grp, + u32 *work, const u32 work_inc, u32 *pd) { - uint32_t found_begin = 0; - uint32_t tmp_delay = 0; - uint32_t test_status; - - for (*d = 0; *d <= dtaps_per_ptap; (*d)++, tmp_delay += - IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { - *work_bgn = tmp_delay; - scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); - - for (*i = 0; *i < VFIFO_SIZE; (*i)++) { - for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_bgn += - IO_DELAY_PER_OPA_TAP) { - scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); + const u32 max = delay ? iocfg->dqs_en_delay_max : iocfg->dqs_en_phase_max; + u32 ret; - test_status = - rw_mgr_mem_calibrate_read_test_all_ranks - (*grp, 1, PASS_ONE_BIT, bit_chk, 0); - - if (test_status) { - *max_working_cnt = 1; - found_begin = 1; - break; - } - } + for (; *pd <= max; (*pd)++) { + if (delay) + scc_mgr_set_dqs_en_delay_all_ranks(grp, *pd); + else + scc_mgr_set_dqs_en_phase_all_ranks(grp, *pd); - if (found_begin) - break; + ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, + PASS_ONE_BIT, 0); + if (!working) + ret = !ret; - if (*p > IO_DQS_EN_PHASE_MAX) - /* fiddle with FIFO */ - rw_mgr_incr_vfifo(*grp, v); - } + if (ret) + return 0; - if (found_begin) - break; + if (work) + *work += work_inc; } - if (*i >= VFIFO_SIZE) { - /* cannot find working solution */ - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/\ - ptap/dtap\n", __func__, __LINE__); - return 0; - } else { - return 1; - } + return -EINVAL; } - -static void sdr_backup_phase(uint32_t *grp, uint32_t *bit_chk, - uint32_t *work_bgn, uint32_t *v, uint32_t *d, - uint32_t *p, uint32_t *max_working_cnt) +/** + * sdr_find_phase() - Find DQS enable phase + * @working: If 1, look for working phase, if 0, look for non-working phase + * @grp: Read/Write group + * @work: Working window position + * @i: Iterator + * @p: DQS Phase Iterator + * + * Find working or non-working DQS enable phase setting. + */ +static int sdr_find_phase(int working, const u32 grp, u32 *work, + u32 *i, u32 *p) { - uint32_t found_begin = 0; - uint32_t tmp_delay; + const u32 end = misccfg->read_valid_fifo_size + (working ? 0 : 1); + int ret; - /* Special case code for backing up a phase */ + for (; *i < end; (*i)++) { + if (working) + *p = 0; + + ret = sdr_find_phase_delay(working, 0, grp, work, + iocfg->delay_per_opa_tap, p); + if (!ret) + return 0; + + if (*p > iocfg->dqs_en_phase_max) { + /* Fiddle with FIFO. */ + rw_mgr_incr_vfifo(grp); + if (!working) + *p = 0; + } + } + + return -EINVAL; +} + +/** + * sdr_working_phase() - Find working DQS enable phase + * @grp: Read/Write group + * @work_bgn: Working window start position + * @d: dtaps output value + * @p: DQS Phase Iterator + * @i: Iterator + * + * Find working DQS enable phase setting. + */ +static int sdr_working_phase(const u32 grp, u32 *work_bgn, u32 *d, + u32 *p, u32 *i) +{ + const u32 dtaps_per_ptap = iocfg->delay_per_opa_tap / + iocfg->delay_per_dqs_en_dchain_tap; + int ret; + + *work_bgn = 0; + + for (*d = 0; *d <= dtaps_per_ptap; (*d)++) { + *i = 0; + scc_mgr_set_dqs_en_delay_all_ranks(grp, *d); + ret = sdr_find_phase(1, grp, work_bgn, i, p); + if (!ret) + return 0; + *work_bgn += iocfg->delay_per_dqs_en_dchain_tap; + } + + /* Cannot find working solution */ + debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/ptap/dtap\n", + __func__, __LINE__); + return -EINVAL; +} + +/** + * sdr_backup_phase() - Find DQS enable backup phase + * @grp: Read/Write group + * @work_bgn: Working window start position + * @p: DQS Phase Iterator + * + * Find DQS enable backup phase setting. + */ +static void sdr_backup_phase(const u32 grp, u32 *work_bgn, u32 *p) +{ + u32 tmp_delay, d; + int ret; + + /* Special case code for backing up a phase */ if (*p == 0) { - *p = IO_DQS_EN_PHASE_MAX; - rw_mgr_decr_vfifo(*grp, v); + *p = iocfg->dqs_en_phase_max; + rw_mgr_decr_vfifo(grp); } else { (*p)--; } - tmp_delay = *work_bgn - IO_DELAY_PER_OPA_TAP; - scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); + tmp_delay = *work_bgn - iocfg->delay_per_opa_tap; + scc_mgr_set_dqs_en_phase_all_ranks(grp, *p); - for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_bgn; - (*d)++, tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { - scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); + for (d = 0; d <= iocfg->dqs_en_delay_max && tmp_delay < *work_bgn; d++) { + scc_mgr_set_dqs_en_delay_all_ranks(grp, d); - if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1, - PASS_ONE_BIT, - bit_chk, 0)) { - found_begin = 1; + ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, + PASS_ONE_BIT, 0); + if (ret) { *work_bgn = tmp_delay; break; } - } - /* We have found a working dtap before the ptap found above */ - if (found_begin == 1) - (*max_working_cnt)++; + tmp_delay += iocfg->delay_per_dqs_en_dchain_tap; + } - /* - * Restore VFIFO to old state before we decremented it - * (if needed). - */ + /* Restore VFIFO to old state before we decremented it (if needed). */ (*p)++; - if (*p > IO_DQS_EN_PHASE_MAX) { + if (*p > iocfg->dqs_en_phase_max) { *p = 0; - rw_mgr_incr_vfifo(*grp, v); + rw_mgr_incr_vfifo(grp); } - scc_mgr_set_dqs_en_delay_all_ranks(*grp, 0); + scc_mgr_set_dqs_en_delay_all_ranks(grp, 0); } -static int sdr_nonworking_phase(uint32_t *grp, uint32_t *bit_chk, - uint32_t *work_bgn, uint32_t *v, uint32_t *d, - uint32_t *p, uint32_t *i, uint32_t *max_working_cnt, - uint32_t *work_end) +/** + * sdr_nonworking_phase() - Find non-working DQS enable phase + * @grp: Read/Write group + * @work_end: Working window end position + * @p: DQS Phase Iterator + * @i: Iterator + * + * Find non-working DQS enable phase setting. + */ +static int sdr_nonworking_phase(const u32 grp, u32 *work_end, u32 *p, u32 *i) { - uint32_t found_end = 0; + int ret; (*p)++; - *work_end += IO_DELAY_PER_OPA_TAP; - if (*p > IO_DQS_EN_PHASE_MAX) { - /* fiddle with FIFO */ + *work_end += iocfg->delay_per_opa_tap; + if (*p > iocfg->dqs_en_phase_max) { + /* Fiddle with FIFO. */ *p = 0; - rw_mgr_incr_vfifo(*grp, v); + rw_mgr_incr_vfifo(grp); } - for (; *i < VFIFO_SIZE + 1; (*i)++) { - for (; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_end - += IO_DELAY_PER_OPA_TAP) { - scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); - - if (!rw_mgr_mem_calibrate_read_test_all_ranks - (*grp, 1, PASS_ONE_BIT, bit_chk, 0)) { - found_end = 1; - break; - } else { - (*max_working_cnt)++; - } - } - - if (found_end) - break; - - if (*p > IO_DQS_EN_PHASE_MAX) { - /* fiddle with FIFO */ - rw_mgr_incr_vfifo(*grp, v); - *p = 0; - } + ret = sdr_find_phase(0, grp, work_end, i, p); + if (ret) { + /* Cannot see edge of failing read. */ + debug_cond(DLEVEL == 2, "%s:%d: end: failed\n", + __func__, __LINE__); } - if (*i >= VFIFO_SIZE + 1) { - /* cannot see edge of failing read */ - debug_cond(DLEVEL == 2, "%s:%d sdr_nonworking_phase: end:\ - failed\n", __func__, __LINE__); - return 0; - } else { - return 1; - } + return ret; } -static int sdr_find_window_centre(uint32_t *grp, uint32_t *bit_chk, - uint32_t *work_bgn, uint32_t *v, uint32_t *d, - uint32_t *p, uint32_t *work_mid, - uint32_t *work_end) +/** + * sdr_find_window_center() - Find center of the working DQS window. + * @grp: Read/Write group + * @work_bgn: First working settings + * @work_end: Last working settings + * + * Find center of the working DQS enable window. + */ +static int sdr_find_window_center(const u32 grp, const u32 work_bgn, + const u32 work_end) { - int i; + u32 work_mid; int tmp_delay = 0; + int i, p, d; - *work_mid = (*work_bgn + *work_end) / 2; + work_mid = (work_bgn + work_end) / 2; debug_cond(DLEVEL == 2, "work_bgn=%d work_end=%d work_mid=%d\n", - *work_bgn, *work_end, *work_mid); + work_bgn, work_end, work_mid); /* Get the middle delay to be less than a VFIFO delay */ - for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; - (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP) - ; + tmp_delay = (iocfg->dqs_en_phase_max + 1) * iocfg->delay_per_opa_tap; + debug_cond(DLEVEL == 2, "vfifo ptap delay %d\n", tmp_delay); - while (*work_mid > tmp_delay) - *work_mid -= tmp_delay; - debug_cond(DLEVEL == 2, "new work_mid %d\n", *work_mid); - - tmp_delay = 0; - for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX && tmp_delay < *work_mid; - (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP) - ; - tmp_delay -= IO_DELAY_PER_OPA_TAP; - debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", (*p) - 1, tmp_delay); - for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_mid; (*d)++, - tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) - ; - debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", *d, tmp_delay); - - scc_mgr_set_dqs_en_phase_all_ranks(*grp, (*p) - 1); - scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); + work_mid %= tmp_delay; + debug_cond(DLEVEL == 2, "new work_mid %d\n", work_mid); + + tmp_delay = rounddown(work_mid, iocfg->delay_per_opa_tap); + if (tmp_delay > iocfg->dqs_en_phase_max * iocfg->delay_per_opa_tap) + tmp_delay = iocfg->dqs_en_phase_max * iocfg->delay_per_opa_tap; + p = tmp_delay / iocfg->delay_per_opa_tap; + + debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", p, tmp_delay); + + d = DIV_ROUND_UP(work_mid - tmp_delay, iocfg->delay_per_dqs_en_dchain_tap); + if (d > iocfg->dqs_en_delay_max) + d = iocfg->dqs_en_delay_max; + tmp_delay += d * iocfg->delay_per_dqs_en_dchain_tap; + + debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", d, tmp_delay); + + scc_mgr_set_dqs_en_phase_all_ranks(grp, p); + scc_mgr_set_dqs_en_delay_all_ranks(grp, d); /* * push vfifo until we can successfully calibrate. We can do this * because the largest possible margin in 1 VFIFO cycle. */ - for (i = 0; i < VFIFO_SIZE; i++) { - debug_cond(DLEVEL == 2, "find_dqs_en_phase: center: vfifo=%u\n", - *v); - if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1, + for (i = 0; i < misccfg->read_valid_fifo_size; i++) { + debug_cond(DLEVEL == 2, "find_dqs_en_phase: center\n"); + if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, PASS_ONE_BIT, - bit_chk, 0)) { - break; + 0)) { + debug_cond(DLEVEL == 2, + "%s:%d center: found: ptap=%u dtap=%u\n", + __func__, __LINE__, p, d); + return 0; } - /* fiddle with FIFO */ - rw_mgr_incr_vfifo(*grp, v); + /* Fiddle with FIFO. */ + rw_mgr_incr_vfifo(grp); } - if (i >= VFIFO_SIZE) { - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center: \ - failed\n", __func__, __LINE__); - return 0; - } else { - return 1; - } + debug_cond(DLEVEL == 2, "%s:%d center: failed.\n", + __func__, __LINE__); + return -EINVAL; } -/* find a good dqs enable to use */ -static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(uint32_t grp) +/** + * rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase() - Find a good DQS enable to use + * @grp: Read/Write Group + * + * Find a good DQS enable to use. + */ +static int rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(const u32 grp) { - uint32_t v, d, p, i; - uint32_t max_working_cnt; - uint32_t bit_chk; - uint32_t dtaps_per_ptap; - uint32_t work_bgn, work_mid, work_end; - uint32_t found_passing_read, found_failing_read, initial_failing_dtap; + u32 d, p, i; + u32 dtaps_per_ptap; + u32 work_bgn, work_end; + u32 found_passing_read, found_failing_read, initial_failing_dtap; + int ret; debug("%s:%d %u\n", __func__, __LINE__, grp); @@ -1548,635 +1807,689 @@ static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(uint32_t grp) scc_mgr_set_dqs_en_delay_all_ranks(grp, 0); scc_mgr_set_dqs_en_phase_all_ranks(grp, 0); - /* ************************************************************** */ - /* * Step 0 : Determine number of delay taps for each phase tap * */ - dtaps_per_ptap = IO_DELAY_PER_OPA_TAP/IO_DELAY_PER_DQS_EN_DCHAIN_TAP; + /* Step 0: Determine number of delay taps for each phase tap. */ + dtaps_per_ptap = iocfg->delay_per_opa_tap / iocfg->delay_per_dqs_en_dchain_tap; - /* ********************************************************* */ - /* * Step 1 : First push vfifo until we get a failing read * */ - v = find_vfifo_read(grp, &bit_chk); + /* Step 1: First push vfifo until we get a failing read. */ + find_vfifo_failing_read(grp); - max_working_cnt = 0; - - /* ******************************************************** */ - /* * step 2: find first working phase, increment in ptaps * */ + /* Step 2: Find first working phase, increment in ptaps. */ work_bgn = 0; - if (find_working_phase(&grp, &bit_chk, dtaps_per_ptap, &work_bgn, &v, &d, - &p, &i, &max_working_cnt) == 0) - return 0; + ret = sdr_working_phase(grp, &work_bgn, &d, &p, &i); + if (ret) + return ret; work_end = work_bgn; /* - * If d is 0 then the working window covers a phase tap and - * we can follow the old procedure otherwise, we've found the beginning, + * If d is 0 then the working window covers a phase tap and we can + * follow the old procedure. Otherwise, we've found the beginning * and we need to increment the dtaps until we find the end. */ if (d == 0) { - /* ********************************************************* */ - /* * step 3a: if we have room, back off by one and - increment in dtaps * */ - - sdr_backup_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p, - &max_working_cnt); - - /* ********************************************************* */ - /* * step 4a: go forward from working phase to non working - phase, increment in ptaps * */ - if (sdr_nonworking_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p, - &i, &max_working_cnt, &work_end) == 0) - return 0; + /* + * Step 3a: If we have room, back off by one and + * increment in dtaps. + */ + sdr_backup_phase(grp, &work_bgn, &p); + + /* + * Step 4a: go forward from working phase to non working + * phase, increment in ptaps. + */ + ret = sdr_nonworking_phase(grp, &work_end, &p, &i); + if (ret) + return ret; - /* ********************************************************* */ - /* * step 5a: back off one from last, increment in dtaps * */ + /* Step 5a: Back off one from last, increment in dtaps. */ /* Special case code for backing up a phase */ if (p == 0) { - p = IO_DQS_EN_PHASE_MAX; - rw_mgr_decr_vfifo(grp, &v); + p = iocfg->dqs_en_phase_max; + rw_mgr_decr_vfifo(grp); } else { p = p - 1; } - work_end -= IO_DELAY_PER_OPA_TAP; + work_end -= iocfg->delay_per_opa_tap; scc_mgr_set_dqs_en_phase_all_ranks(grp, p); - /* * The actual increment of dtaps is done outside of - the if/else loop to share code */ d = 0; - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p: \ - vfifo=%u ptap=%u\n", __func__, __LINE__, - v, p); - } else { - /* ******************************************************* */ - /* * step 3-5b: Find the right edge of the window using - delay taps * */ - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase:vfifo=%u \ - ptap=%u dtap=%u bgn=%u\n", __func__, __LINE__, - v, p, d, work_bgn); - - work_end = work_bgn; - - /* * The actual increment of dtaps is done outside of the - if/else loop to share code */ - - /* Only here to counterbalance a subtract later on which is - not needed if this branch of the algorithm is taken */ - max_working_cnt++; + debug_cond(DLEVEL == 2, "%s:%d p: ptap=%u\n", + __func__, __LINE__, p); } - /* The dtap increment to find the failing edge is done here */ - for (; d <= IO_DQS_EN_DELAY_MAX; d++, work_end += - IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \ - end-2: dtap=%u\n", __func__, __LINE__, d); - scc_mgr_set_dqs_en_delay_all_ranks(grp, d); - - if (!rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, - PASS_ONE_BIT, - &bit_chk, 0)) { - break; - } - } + /* The dtap increment to find the failing edge is done here. */ + sdr_find_phase_delay(0, 1, grp, &work_end, + iocfg->delay_per_dqs_en_dchain_tap, &d); /* Go back to working dtap */ if (d != 0) - work_end -= IO_DELAY_PER_DQS_EN_DCHAIN_TAP; + work_end -= iocfg->delay_per_dqs_en_dchain_tap; - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p/d: vfifo=%u \ - ptap=%u dtap=%u end=%u\n", __func__, __LINE__, - v, p, d-1, work_end); + debug_cond(DLEVEL == 2, + "%s:%d p/d: ptap=%u dtap=%u end=%u\n", + __func__, __LINE__, p, d - 1, work_end); if (work_end < work_bgn) { /* nil range */ - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: end-2: \ - failed\n", __func__, __LINE__); - return 0; + debug_cond(DLEVEL == 2, "%s:%d end-2: failed\n", + __func__, __LINE__); + return -EINVAL; } - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: found range [%u,%u]\n", + debug_cond(DLEVEL == 2, "%s:%d found range [%u,%u]\n", __func__, __LINE__, work_bgn, work_end); - /* *************************************************************** */ /* - * * We need to calculate the number of dtaps that equal a ptap - * * To do that we'll back up a ptap and re-find the edge of the - * * window using dtaps + * We need to calculate the number of dtaps that equal a ptap. + * To do that we'll back up a ptap and re-find the edge of the + * window using dtaps */ - - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: calculate dtaps_per_ptap \ - for tracking\n", __func__, __LINE__); + debug_cond(DLEVEL == 2, "%s:%d calculate dtaps_per_ptap for tracking\n", + __func__, __LINE__); /* Special case code for backing up a phase */ if (p == 0) { - p = IO_DQS_EN_PHASE_MAX; - rw_mgr_decr_vfifo(grp, &v); - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \ - cycle/phase: v=%u p=%u\n", __func__, __LINE__, - v, p); + p = iocfg->dqs_en_phase_max; + rw_mgr_decr_vfifo(grp); + debug_cond(DLEVEL == 2, "%s:%d backedup cycle/phase: p=%u\n", + __func__, __LINE__, p); } else { p = p - 1; - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \ - phase only: v=%u p=%u", __func__, __LINE__, - v, p); + debug_cond(DLEVEL == 2, "%s:%d backedup phase only: p=%u", + __func__, __LINE__, p); } scc_mgr_set_dqs_en_phase_all_ranks(grp, p); /* * Increase dtap until we first see a passing read (in case the - * window is smaller than a ptap), - * and then a failing read to mark the edge of the window again + * window is smaller than a ptap), and then a failing read to + * mark the edge of the window again. */ - /* Find a passing read */ - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find passing read\n", + /* Find a passing read. */ + debug_cond(DLEVEL == 2, "%s:%d find passing read\n", __func__, __LINE__); - found_passing_read = 0; - found_failing_read = 0; - initial_failing_dtap = d; - for (; d <= IO_DQS_EN_DELAY_MAX; d++) { - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: testing \ - read d=%u\n", __func__, __LINE__, d); - scc_mgr_set_dqs_en_delay_all_ranks(grp, d); - if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, - PASS_ONE_BIT, - &bit_chk, 0)) { - found_passing_read = 1; - break; - } - } + initial_failing_dtap = d; + found_passing_read = !sdr_find_phase_delay(1, 1, grp, NULL, 0, &d); if (found_passing_read) { - /* Find a failing read */ - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find failing \ - read\n", __func__, __LINE__); - for (d = d + 1; d <= IO_DQS_EN_DELAY_MAX; d++) { - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \ - testing read d=%u\n", __func__, __LINE__, d); - scc_mgr_set_dqs_en_delay_all_ranks(grp, d); - - if (!rw_mgr_mem_calibrate_read_test_all_ranks - (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { - found_failing_read = 1; - break; - } - } + /* Find a failing read. */ + debug_cond(DLEVEL == 2, "%s:%d find failing read\n", + __func__, __LINE__); + d++; + found_failing_read = !sdr_find_phase_delay(0, 1, grp, NULL, 0, + &d); } else { - debug_cond(DLEVEL == 1, "%s:%d find_dqs_en_phase: failed to \ - calculate dtaps", __func__, __LINE__); - debug_cond(DLEVEL == 1, "per ptap. Fall back on static value\n"); + debug_cond(DLEVEL == 1, + "%s:%d failed to calculate dtaps per ptap. Fall back on static value\n", + __func__, __LINE__); } /* * The dynamically calculated dtaps_per_ptap is only valid if we * found a passing/failing read. If we didn't, it means d hit the max - * (IO_DQS_EN_DELAY_MAX). Otherwise, dtaps_per_ptap retains its + * (iocfg->dqs_en_delay_max). Otherwise, dtaps_per_ptap retains its * statically calculated value. */ if (found_passing_read && found_failing_read) dtaps_per_ptap = d - initial_failing_dtap; writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap); - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: dtaps_per_ptap=%u \ - - %u = %u", __func__, __LINE__, d, - initial_failing_dtap, dtaps_per_ptap); - - /* ******************************************** */ - /* * step 6: Find the centre of the window * */ - if (sdr_find_window_centre(&grp, &bit_chk, &work_bgn, &v, &d, &p, - &work_mid, &work_end) == 0) - return 0; - - debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center found: \ - vfifo=%u ptap=%u dtap=%u\n", __func__, __LINE__, - v, p-1, d); - return 1; -} - -/* - * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different - * dq_in_delay values - */ -static uint32_t -rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay -(uint32_t write_group, uint32_t read_group, uint32_t test_bgn) -{ - uint32_t found; - uint32_t i; - uint32_t p; - uint32_t d; - uint32_t r; - - const uint32_t delay_step = IO_IO_IN_DELAY_MAX / - (RW_MGR_MEM_DQ_PER_READ_DQS-1); - /* we start at zero, so have one less dq to devide among */ - - debug("%s:%d (%u,%u,%u)", __func__, __LINE__, write_group, read_group, - test_bgn); - - /* try different dq_in_delays since the dq path is shorter than dqs */ + debug_cond(DLEVEL == 2, "%s:%d dtaps_per_ptap=%u - %u = %u", + __func__, __LINE__, d, initial_failing_dtap, dtaps_per_ptap); - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; - r += NUM_RANKS_PER_SHADOW_REG) { - for (i = 0, p = test_bgn, d = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++, d += delay_step) { - debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_\ - vfifo_find_dqs_", __func__, __LINE__); - debug_cond(DLEVEL == 1, "en_phase_sweep_dq_in_delay: g=%u/%u ", - write_group, read_group); - debug_cond(DLEVEL == 1, "r=%u, i=%u p=%u d=%u\n", r, i , p, d); - scc_mgr_set_dq_in_delay(p, d); - scc_mgr_load_dq(p); - } - writel(0, &sdr_scc_mgr->update); - } - - found = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(read_group); - - debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_vfifo_find_dqs_\ - en_phase_sweep_dq", __func__, __LINE__); - debug_cond(DLEVEL == 1, "_in_delay: g=%u/%u found=%u; Reseting delay \ - chain to zero\n", write_group, read_group, found); + /* Step 6: Find the centre of the window. */ + ret = sdr_find_window_center(grp, work_bgn, work_end); - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; - r += NUM_RANKS_PER_SHADOW_REG) { - for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; - i++, p++) { - scc_mgr_set_dq_in_delay(p, 0); - scc_mgr_load_dq(p); - } - writel(0, &sdr_scc_mgr->update); - } - - return found; + return ret; } -/* per-bit deskew DQ and center */ -static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn, - uint32_t write_group, uint32_t read_group, uint32_t test_bgn, - uint32_t use_read_test, uint32_t update_fom) -{ - uint32_t i, p, d, min_index; +/** + * search_stop_check() - Check if the detected edge is valid + * @write: Perform read (Stage 2) or write (Stage 3) calibration + * @d: DQS delay + * @rank_bgn: Rank number + * @write_group: Write Group + * @read_group: Read Group + * @bit_chk: Resulting bit mask after the test + * @sticky_bit_chk: Resulting sticky bit mask after the test + * @use_read_test: Perform read test + * + * Test if the found edge is valid. + */ +static u32 search_stop_check(const int write, const int d, const int rank_bgn, + const u32 write_group, const u32 read_group, + u32 *bit_chk, u32 *sticky_bit_chk, + const u32 use_read_test) +{ + const u32 ratio = rwcfg->mem_if_read_dqs_width / + rwcfg->mem_if_write_dqs_width; + const u32 correct_mask = write ? param->write_correct_mask : + param->read_correct_mask; + const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs : + rwcfg->mem_dq_per_read_dqs; + u32 ret; /* - * Store these as signed since there are comparisons with - * signed numbers. + * Stop searching when the read test doesn't pass AND when + * we've seen a passing read on every bit. */ - uint32_t bit_chk; - uint32_t sticky_bit_chk; - int32_t left_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; - int32_t right_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; - int32_t final_dq[RW_MGR_MEM_DQ_PER_READ_DQS]; - int32_t mid; - int32_t orig_mid_min, mid_min; - int32_t new_dqs, start_dqs, start_dqs_en, shift_dq, final_dqs, - final_dqs_en; - int32_t dq_margin, dqs_margin; - uint32_t stop; - uint32_t temp_dq_in_delay1, temp_dq_in_delay2; - uint32_t addr; - - debug("%s:%d: %u %u", __func__, __LINE__, read_group, test_bgn); - - addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_IN_DELAY_OFFSET; - start_dqs = readl(addr + (read_group << 2)); - if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) - start_dqs_en = readl(addr + ((read_group << 2) - - IO_DQS_EN_DELAY_OFFSET)); - - /* set the left and right edge of each bit to an illegal value */ - /* use (IO_IO_IN_DELAY_MAX + 1) as an illegal value */ - sticky_bit_chk = 0; - for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { - left_edge[i] = IO_IO_IN_DELAY_MAX + 1; - right_edge[i] = IO_IO_IN_DELAY_MAX + 1; - } + if (write) { /* WRITE-ONLY */ + ret = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, + 0, PASS_ONE_BIT, + bit_chk, 0); + } else if (use_read_test) { /* READ-ONLY */ + ret = !rw_mgr_mem_calibrate_read_test(rank_bgn, read_group, + NUM_READ_PB_TESTS, + PASS_ONE_BIT, bit_chk, + 0, 0); + } else { /* READ-ONLY */ + rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 0, + PASS_ONE_BIT, bit_chk, 0); + *bit_chk = *bit_chk >> (per_dqs * + (read_group - (write_group * ratio))); + ret = (*bit_chk == 0); + } + *sticky_bit_chk = *sticky_bit_chk | *bit_chk; + ret = ret && (*sticky_bit_chk == correct_mask); + debug_cond(DLEVEL == 2, + "%s:%d center(left): dtap=%u => %u == %u && %u", + __func__, __LINE__, d, + *sticky_bit_chk, correct_mask, ret); + return ret; +} - /* Search for the left edge of the window for each bit */ - for (d = 0; d <= IO_IO_IN_DELAY_MAX; d++) { - scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, d); +/** + * search_left_edge() - Find left edge of DQ/DQS working phase + * @write: Perform read (Stage 2) or write (Stage 3) calibration + * @rank_bgn: Rank number + * @write_group: Write Group + * @read_group: Read Group + * @test_bgn: Rank number to begin the test + * @sticky_bit_chk: Resulting sticky bit mask after the test + * @left_edge: Left edge of the DQ/DQS phase + * @right_edge: Right edge of the DQ/DQS phase + * @use_read_test: Perform read test + * + * Find left edge of DQ/DQS working phase. + */ +static void search_left_edge(const int write, const int rank_bgn, + const u32 write_group, const u32 read_group, const u32 test_bgn, + u32 *sticky_bit_chk, + int *left_edge, int *right_edge, const u32 use_read_test) +{ + const u32 delay_max = write ? iocfg->io_out1_delay_max : iocfg->io_in_delay_max; + const u32 dqs_max = write ? iocfg->io_out1_delay_max : iocfg->dqs_in_delay_max; + const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs : + rwcfg->mem_dq_per_read_dqs; + u32 stop, bit_chk; + int i, d; + + for (d = 0; d <= dqs_max; d++) { + if (write) + scc_mgr_apply_group_dq_out1_delay(d); + else + scc_mgr_apply_group_dq_in_delay(test_bgn, d); writel(0, &sdr_scc_mgr->update); - /* - * Stop searching when the read test doesn't pass AND when - * we've seen a passing read on every bit. - */ - if (use_read_test) { - stop = !rw_mgr_mem_calibrate_read_test(rank_bgn, - read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT, - &bit_chk, 0, 0); - } else { - rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, - 0, PASS_ONE_BIT, - &bit_chk, 0); - bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * - (read_group - (write_group * - RW_MGR_MEM_IF_READ_DQS_WIDTH / - RW_MGR_MEM_IF_WRITE_DQS_WIDTH))); - stop = (bit_chk == 0); - } - sticky_bit_chk = sticky_bit_chk | bit_chk; - stop = stop && (sticky_bit_chk == param->read_correct_mask); - debug_cond(DLEVEL == 2, "%s:%d vfifo_center(left): dtap=%u => %u == %u \ - && %u", __func__, __LINE__, d, - sticky_bit_chk, - param->read_correct_mask, stop); - - if (stop == 1) { + stop = search_stop_check(write, d, rank_bgn, write_group, + read_group, &bit_chk, sticky_bit_chk, + use_read_test); + if (stop == 1) break; - } else { - for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { - if (bit_chk & 1) { - /* Remember a passing test as the - left_edge */ - left_edge[i] = d; - } else { - /* If a left edge has not been seen yet, - then a future passing test will mark - this edge as the right edge */ - if (left_edge[i] == - IO_IO_IN_DELAY_MAX + 1) { - right_edge[i] = -(d + 1); - } - } - bit_chk = bit_chk >> 1; + + /* stop != 1 */ + for (i = 0; i < per_dqs; i++) { + if (bit_chk & 1) { + /* + * Remember a passing test as + * the left_edge. + */ + left_edge[i] = d; + } else { + /* + * If a left edge has not been seen + * yet, then a future passing test + * will mark this edge as the right + * edge. + */ + if (left_edge[i] == delay_max + 1) + right_edge[i] = -(d + 1); } + bit_chk >>= 1; } } /* Reset DQ delay chains to 0 */ - scc_mgr_apply_group_dq_in_delay(test_bgn, 0); - sticky_bit_chk = 0; - for (i = RW_MGR_MEM_DQ_PER_READ_DQS - 1;; i--) { - debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \ - %d right_edge[%u]: %d\n", __func__, __LINE__, - i, left_edge[i], i, right_edge[i]); + if (write) + scc_mgr_apply_group_dq_out1_delay(0); + else + scc_mgr_apply_group_dq_in_delay(test_bgn, 0); + + *sticky_bit_chk = 0; + for (i = per_dqs - 1; i >= 0; i--) { + debug_cond(DLEVEL == 2, + "%s:%d vfifo_center: left_edge[%u]: %d right_edge[%u]: %d\n", + __func__, __LINE__, i, left_edge[i], + i, right_edge[i]); /* * Check for cases where we haven't found the left edge, * which makes our assignment of the the right edge invalid. * Reset it to the illegal value. */ - if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) && ( - right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { - right_edge[i] = IO_IO_IN_DELAY_MAX + 1; - debug_cond(DLEVEL == 2, "%s:%d vfifo_center: reset \ - right_edge[%u]: %d\n", __func__, __LINE__, - i, right_edge[i]); + if ((left_edge[i] == delay_max + 1) && + (right_edge[i] != delay_max + 1)) { + right_edge[i] = delay_max + 1; + debug_cond(DLEVEL == 2, + "%s:%d vfifo_center: reset right_edge[%u]: %d\n", + __func__, __LINE__, i, right_edge[i]); } /* - * Reset sticky bit (except for bits where we have seen - * both the left and right edge). + * Reset sticky bit + * READ: except for bits where we have seen both + * the left and right edge. + * WRITE: except for bits where we have seen the + * left edge. */ - sticky_bit_chk = sticky_bit_chk << 1; - if ((left_edge[i] != IO_IO_IN_DELAY_MAX + 1) && - (right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { - sticky_bit_chk = sticky_bit_chk | 1; + *sticky_bit_chk <<= 1; + if (write) { + if (left_edge[i] != delay_max + 1) + *sticky_bit_chk |= 1; + } else { + if ((left_edge[i] != delay_max + 1) && + (right_edge[i] != delay_max + 1)) + *sticky_bit_chk |= 1; } - - if (i == 0) - break; } - /* Search for the right edge of the window for each bit */ - for (d = 0; d <= IO_DQS_IN_DELAY_MAX - start_dqs; d++) { - scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs); - if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { - uint32_t delay = d + start_dqs_en; - if (delay > IO_DQS_EN_DELAY_MAX) - delay = IO_DQS_EN_DELAY_MAX; - scc_mgr_set_dqs_en_delay(read_group, delay); - } - scc_mgr_load_dqs(read_group); - writel(0, &sdr_scc_mgr->update); +} - /* - * Stop searching when the read test doesn't pass AND when - * we've seen a passing read on every bit. - */ - if (use_read_test) { - stop = !rw_mgr_mem_calibrate_read_test(rank_bgn, - read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT, - &bit_chk, 0, 0); - } else { - rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, - 0, PASS_ONE_BIT, - &bit_chk, 0); - bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * - (read_group - (write_group * - RW_MGR_MEM_IF_READ_DQS_WIDTH / - RW_MGR_MEM_IF_WRITE_DQS_WIDTH))); - stop = (bit_chk == 0); +/** + * search_right_edge() - Find right edge of DQ/DQS working phase + * @write: Perform read (Stage 2) or write (Stage 3) calibration + * @rank_bgn: Rank number + * @write_group: Write Group + * @read_group: Read Group + * @start_dqs: DQS start phase + * @start_dqs_en: DQS enable start phase + * @sticky_bit_chk: Resulting sticky bit mask after the test + * @left_edge: Left edge of the DQ/DQS phase + * @right_edge: Right edge of the DQ/DQS phase + * @use_read_test: Perform read test + * + * Find right edge of DQ/DQS working phase. + */ +static int search_right_edge(const int write, const int rank_bgn, + const u32 write_group, const u32 read_group, + const int start_dqs, const int start_dqs_en, + u32 *sticky_bit_chk, + int *left_edge, int *right_edge, const u32 use_read_test) +{ + const u32 delay_max = write ? iocfg->io_out1_delay_max : iocfg->io_in_delay_max; + const u32 dqs_max = write ? iocfg->io_out1_delay_max : iocfg->dqs_in_delay_max; + const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs : + rwcfg->mem_dq_per_read_dqs; + u32 stop, bit_chk; + int i, d; + + for (d = 0; d <= dqs_max - start_dqs; d++) { + if (write) { /* WRITE-ONLY */ + scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, + d + start_dqs); + } else { /* READ-ONLY */ + scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs); + if (iocfg->shift_dqs_en_when_shift_dqs) { + u32 delay = d + start_dqs_en; + if (delay > iocfg->dqs_en_delay_max) + delay = iocfg->dqs_en_delay_max; + scc_mgr_set_dqs_en_delay(read_group, delay); + } + scc_mgr_load_dqs(read_group); } - sticky_bit_chk = sticky_bit_chk | bit_chk; - stop = stop && (sticky_bit_chk == param->read_correct_mask); - debug_cond(DLEVEL == 2, "%s:%d vfifo_center(right): dtap=%u => %u == \ - %u && %u", __func__, __LINE__, d, - sticky_bit_chk, param->read_correct_mask, stop); + writel(0, &sdr_scc_mgr->update); + stop = search_stop_check(write, d, rank_bgn, write_group, + read_group, &bit_chk, sticky_bit_chk, + use_read_test); if (stop == 1) { + if (write && (d == 0)) { /* WRITE-ONLY */ + for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) { + /* + * d = 0 failed, but it passed when + * testing the left edge, so it must be + * marginal, set it to -1 + */ + if (right_edge[i] == delay_max + 1 && + left_edge[i] != delay_max + 1) + right_edge[i] = -1; + } + } break; - } else { - for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { - if (bit_chk & 1) { - /* Remember a passing test as - the right_edge */ - right_edge[i] = d; + } + + /* stop != 1 */ + for (i = 0; i < per_dqs; i++) { + if (bit_chk & 1) { + /* + * Remember a passing test as + * the right_edge. + */ + right_edge[i] = d; + } else { + if (d != 0) { + /* + * If a right edge has not + * been seen yet, then a future + * passing test will mark this + * edge as the left edge. + */ + if (right_edge[i] == delay_max + 1) + left_edge[i] = -(d + 1); } else { - if (d != 0) { - /* If a right edge has not been - seen yet, then a future passing - test will mark this edge as the - left edge */ - if (right_edge[i] == - IO_IO_IN_DELAY_MAX + 1) { - left_edge[i] = -(d + 1); - } - } else { - /* d = 0 failed, but it passed - when testing the left edge, - so it must be marginal, - set it to -1 */ - if (right_edge[i] == - IO_IO_IN_DELAY_MAX + 1 && - left_edge[i] != - IO_IO_IN_DELAY_MAX - + 1) { - right_edge[i] = -1; - } - /* If a right edge has not been - seen yet, then a future passing - test will mark this edge as the - left edge */ - else if (right_edge[i] == - IO_IO_IN_DELAY_MAX + - 1) { - left_edge[i] = -(d + 1); - } - } + /* + * d = 0 failed, but it passed + * when testing the left edge, + * so it must be marginal, set + * it to -1 + */ + if (right_edge[i] == delay_max + 1 && + left_edge[i] != delay_max + 1) + right_edge[i] = -1; + /* + * If a right edge has not been + * seen yet, then a future + * passing test will mark this + * edge as the left edge. + */ + else if (right_edge[i] == delay_max + 1) + left_edge[i] = -(d + 1); } - - debug_cond(DLEVEL == 2, "%s:%d vfifo_center[r,\ - d=%u]: ", __func__, __LINE__, d); - debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d ", - (int)(bit_chk & 1), i, left_edge[i]); - debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, - right_edge[i]); - bit_chk = bit_chk >> 1; } + + debug_cond(DLEVEL == 2, "%s:%d center[r,d=%u]: ", + __func__, __LINE__, d); + debug_cond(DLEVEL == 2, + "bit_chk_test=%i left_edge[%u]: %d ", + bit_chk & 1, i, left_edge[i]); + debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, + right_edge[i]); + bit_chk >>= 1; } } /* Check that all bits have a window */ - for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { - debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \ - %d right_edge[%u]: %d", __func__, __LINE__, - i, left_edge[i], i, right_edge[i]); - if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) || (right_edge[i] - == IO_IO_IN_DELAY_MAX + 1)) { - /* - * Restore delay chain settings before letting the loop - * in rw_mgr_mem_calibrate_vfifo to retry different - * dqs/ck relationships. - */ - scc_mgr_set_dqs_bus_in_delay(read_group, start_dqs); - if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { - scc_mgr_set_dqs_en_delay(read_group, - start_dqs_en); - } - scc_mgr_load_dqs(read_group); - writel(0, &sdr_scc_mgr->update); - - debug_cond(DLEVEL == 1, "%s:%d vfifo_center: failed to \ - find edge [%u]: %d %d", __func__, __LINE__, - i, left_edge[i], right_edge[i]); - if (use_read_test) { - set_failing_group_stage(read_group * - RW_MGR_MEM_DQ_PER_READ_DQS + i, - CAL_STAGE_VFIFO, - CAL_SUBSTAGE_VFIFO_CENTER); - } else { - set_failing_group_stage(read_group * - RW_MGR_MEM_DQ_PER_READ_DQS + i, - CAL_STAGE_VFIFO_AFTER_WRITES, - CAL_SUBSTAGE_VFIFO_CENTER); - } - return 0; - } + for (i = 0; i < per_dqs; i++) { + debug_cond(DLEVEL == 2, + "%s:%d write_center: left_edge[%u]: %d right_edge[%u]: %d", + __func__, __LINE__, i, left_edge[i], + i, right_edge[i]); + if ((left_edge[i] == dqs_max + 1) || + (right_edge[i] == dqs_max + 1)) + return i + 1; /* FIXME: If we fail, retval > 0 */ } + return 0; +} + +/** + * get_window_mid_index() - Find the best middle setting of DQ/DQS phase + * @write: Perform read (Stage 2) or write (Stage 3) calibration + * @left_edge: Left edge of the DQ/DQS phase + * @right_edge: Right edge of the DQ/DQS phase + * @mid_min: Best DQ/DQS phase middle setting + * + * Find index and value of the middle of the DQ/DQS working phase. + */ +static int get_window_mid_index(const int write, int *left_edge, + int *right_edge, int *mid_min) +{ + const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs : + rwcfg->mem_dq_per_read_dqs; + int i, mid, min_index; + /* Find middle of window for each DQ bit */ - mid_min = left_edge[0] - right_edge[0]; + *mid_min = left_edge[0] - right_edge[0]; min_index = 0; - for (i = 1; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { + for (i = 1; i < per_dqs; i++) { mid = left_edge[i] - right_edge[i]; - if (mid < mid_min) { - mid_min = mid; + if (mid < *mid_min) { + *mid_min = mid; min_index = i; } } /* * -mid_min/2 represents the amount that we need to move DQS. - * If mid_min is odd and positive we'll need to add one to - * make sure the rounding in further calculations is correct - * (always bias to the right), so just add 1 for all positive values. + * If mid_min is odd and positive we'll need to add one to make + * sure the rounding in further calculations is correct (always + * bias to the right), so just add 1 for all positive values. */ - if (mid_min > 0) - mid_min++; + if (*mid_min > 0) + (*mid_min)++; + *mid_min = *mid_min / 2; - mid_min = mid_min / 2; - - debug_cond(DLEVEL == 1, "%s:%d vfifo_center: mid_min=%d (index=%u)\n", - __func__, __LINE__, mid_min, min_index); - - /* Determine the amount we can change DQS (which is -mid_min) */ - orig_mid_min = mid_min; - new_dqs = start_dqs - mid_min; - if (new_dqs > IO_DQS_IN_DELAY_MAX) - new_dqs = IO_DQS_IN_DELAY_MAX; - else if (new_dqs < 0) - new_dqs = 0; - - mid_min = start_dqs - new_dqs; - debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n", - mid_min, new_dqs); + debug_cond(DLEVEL == 1, "%s:%d vfifo_center: *mid_min=%d (index=%u)\n", + __func__, __LINE__, *mid_min, min_index); + return min_index; +} - if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { - if (start_dqs_en - mid_min > IO_DQS_EN_DELAY_MAX) - mid_min += start_dqs_en - mid_min - IO_DQS_EN_DELAY_MAX; - else if (start_dqs_en - mid_min < 0) - mid_min += start_dqs_en - mid_min; - } - new_dqs = start_dqs - mid_min; +/** + * center_dq_windows() - Center the DQ/DQS windows + * @write: Perform read (Stage 2) or write (Stage 3) calibration + * @left_edge: Left edge of the DQ/DQS phase + * @right_edge: Right edge of the DQ/DQS phase + * @mid_min: Adjusted DQ/DQS phase middle setting + * @orig_mid_min: Original DQ/DQS phase middle setting + * @min_index: DQ/DQS phase middle setting index + * @test_bgn: Rank number to begin the test + * @dq_margin: Amount of shift for the DQ + * @dqs_margin: Amount of shift for the DQS + * + * Align the DQ/DQS windows in each group. + */ +static void center_dq_windows(const int write, int *left_edge, int *right_edge, + const int mid_min, const int orig_mid_min, + const int min_index, const int test_bgn, + int *dq_margin, int *dqs_margin) +{ + const u32 delay_max = write ? iocfg->io_out1_delay_max : iocfg->io_in_delay_max; + const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs : + rwcfg->mem_dq_per_read_dqs; + const u32 delay_off = write ? SCC_MGR_IO_OUT1_DELAY_OFFSET : + SCC_MGR_IO_IN_DELAY_OFFSET; + const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | delay_off; - debug_cond(DLEVEL == 1, "vfifo_center: start_dqs=%d start_dqs_en=%d \ - new_dqs=%d mid_min=%d\n", start_dqs, - IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1, - new_dqs, mid_min); + u32 temp_dq_io_delay1, temp_dq_io_delay2; + int shift_dq, i, p; /* Initialize data for export structures */ - dqs_margin = IO_IO_IN_DELAY_MAX + 1; - dq_margin = IO_IO_IN_DELAY_MAX + 1; + *dqs_margin = delay_max + 1; + *dq_margin = delay_max + 1; /* add delay to bring centre of all DQ windows to the same "level" */ - for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { + for (i = 0, p = test_bgn; i < per_dqs; i++, p++) { /* Use values before divide by 2 to reduce round off error */ shift_dq = (left_edge[i] - right_edge[i] - (left_edge[min_index] - right_edge[min_index]))/2 + (orig_mid_min - mid_min); - debug_cond(DLEVEL == 2, "vfifo_center: before: \ - shift_dq[%u]=%d\n", i, shift_dq); + debug_cond(DLEVEL == 2, + "vfifo_center: before: shift_dq[%u]=%d\n", + i, shift_dq); + + temp_dq_io_delay1 = readl(addr + (p << 2)); + temp_dq_io_delay2 = readl(addr + (i << 2)); - addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET; - temp_dq_in_delay1 = readl(addr + (p << 2)); - temp_dq_in_delay2 = readl(addr + (i << 2)); + if (shift_dq + temp_dq_io_delay1 > delay_max) + shift_dq = delay_max - temp_dq_io_delay2; + else if (shift_dq + temp_dq_io_delay1 < 0) + shift_dq = -temp_dq_io_delay1; + + debug_cond(DLEVEL == 2, + "vfifo_center: after: shift_dq[%u]=%d\n", + i, shift_dq); + + if (write) + scc_mgr_set_dq_out1_delay(i, temp_dq_io_delay1 + shift_dq); + else + scc_mgr_set_dq_in_delay(p, temp_dq_io_delay1 + shift_dq); - if (shift_dq + (int32_t)temp_dq_in_delay1 > - (int32_t)IO_IO_IN_DELAY_MAX) { - shift_dq = (int32_t)IO_IO_IN_DELAY_MAX - temp_dq_in_delay2; - } else if (shift_dq + (int32_t)temp_dq_in_delay1 < 0) { - shift_dq = -(int32_t)temp_dq_in_delay1; - } - debug_cond(DLEVEL == 2, "vfifo_center: after: \ - shift_dq[%u]=%d\n", i, shift_dq); - final_dq[i] = temp_dq_in_delay1 + shift_dq; - scc_mgr_set_dq_in_delay(p, final_dq[i]); scc_mgr_load_dq(p); - debug_cond(DLEVEL == 2, "vfifo_center: margin[%u]=[%d,%d]\n", i, + debug_cond(DLEVEL == 2, + "vfifo_center: margin[%u]=[%d,%d]\n", i, left_edge[i] - shift_dq + (-mid_min), right_edge[i] + shift_dq - (-mid_min)); + /* To determine values for export structures */ - if (left_edge[i] - shift_dq + (-mid_min) < dq_margin) - dq_margin = left_edge[i] - shift_dq + (-mid_min); + if (left_edge[i] - shift_dq + (-mid_min) < *dq_margin) + *dq_margin = left_edge[i] - shift_dq + (-mid_min); - if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin) - dqs_margin = right_edge[i] + shift_dq - (-mid_min); + if (right_edge[i] + shift_dq - (-mid_min) < *dqs_margin) + *dqs_margin = right_edge[i] + shift_dq - (-mid_min); } - final_dqs = new_dqs; - if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) - final_dqs_en = start_dqs_en - mid_min; +} + +/** + * rw_mgr_mem_calibrate_vfifo_center() - Per-bit deskew DQ and centering + * @rank_bgn: Rank number + * @rw_group: Read/Write Group + * @test_bgn: Rank at which the test begins + * @use_read_test: Perform a read test + * @update_fom: Update FOM + * + * Per-bit deskew DQ and centering. + */ +static int rw_mgr_mem_calibrate_vfifo_center(const u32 rank_bgn, + const u32 rw_group, const u32 test_bgn, + const int use_read_test, const int update_fom) +{ + const u32 addr = + SDR_PHYGRP_SCCGRP_ADDRESS + SCC_MGR_DQS_IN_DELAY_OFFSET + + (rw_group << 2); + /* + * Store these as signed since there are comparisons with + * signed numbers. + */ + u32 sticky_bit_chk; + int32_t left_edge[rwcfg->mem_dq_per_read_dqs]; + int32_t right_edge[rwcfg->mem_dq_per_read_dqs]; + int32_t orig_mid_min, mid_min; + int32_t new_dqs, start_dqs, start_dqs_en = 0, final_dqs_en; + int32_t dq_margin, dqs_margin; + int i, min_index; + int ret; + + debug("%s:%d: %u %u", __func__, __LINE__, rw_group, test_bgn); + + start_dqs = readl(addr); + if (iocfg->shift_dqs_en_when_shift_dqs) + start_dqs_en = readl(addr - iocfg->dqs_en_delay_offset); + + /* set the left and right edge of each bit to an illegal value */ + /* use (iocfg->io_in_delay_max + 1) as an illegal value */ + sticky_bit_chk = 0; + for (i = 0; i < rwcfg->mem_dq_per_read_dqs; i++) { + left_edge[i] = iocfg->io_in_delay_max + 1; + right_edge[i] = iocfg->io_in_delay_max + 1; + } + + /* Search for the left edge of the window for each bit */ + search_left_edge(0, rank_bgn, rw_group, rw_group, test_bgn, + &sticky_bit_chk, + left_edge, right_edge, use_read_test); + + + /* Search for the right edge of the window for each bit */ + ret = search_right_edge(0, rank_bgn, rw_group, rw_group, + start_dqs, start_dqs_en, + &sticky_bit_chk, + left_edge, right_edge, use_read_test); + if (ret) { + /* + * Restore delay chain settings before letting the loop + * in rw_mgr_mem_calibrate_vfifo to retry different + * dqs/ck relationships. + */ + scc_mgr_set_dqs_bus_in_delay(rw_group, start_dqs); + if (iocfg->shift_dqs_en_when_shift_dqs) + scc_mgr_set_dqs_en_delay(rw_group, start_dqs_en); + + scc_mgr_load_dqs(rw_group); + writel(0, &sdr_scc_mgr->update); + + debug_cond(DLEVEL == 1, + "%s:%d vfifo_center: failed to find edge [%u]: %d %d", + __func__, __LINE__, i, left_edge[i], right_edge[i]); + if (use_read_test) { + set_failing_group_stage(rw_group * + rwcfg->mem_dq_per_read_dqs + i, + CAL_STAGE_VFIFO, + CAL_SUBSTAGE_VFIFO_CENTER); + } else { + set_failing_group_stage(rw_group * + rwcfg->mem_dq_per_read_dqs + i, + CAL_STAGE_VFIFO_AFTER_WRITES, + CAL_SUBSTAGE_VFIFO_CENTER); + } + return -EIO; + } + + min_index = get_window_mid_index(0, left_edge, right_edge, &mid_min); + + /* Determine the amount we can change DQS (which is -mid_min) */ + orig_mid_min = mid_min; + new_dqs = start_dqs - mid_min; + if (new_dqs > iocfg->dqs_in_delay_max) + new_dqs = iocfg->dqs_in_delay_max; + else if (new_dqs < 0) + new_dqs = 0; + + mid_min = start_dqs - new_dqs; + debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n", + mid_min, new_dqs); + + if (iocfg->shift_dqs_en_when_shift_dqs) { + if (start_dqs_en - mid_min > iocfg->dqs_en_delay_max) + mid_min += start_dqs_en - mid_min - iocfg->dqs_en_delay_max; + else if (start_dqs_en - mid_min < 0) + mid_min += start_dqs_en - mid_min; + } + new_dqs = start_dqs - mid_min; + + debug_cond(DLEVEL == 1, + "vfifo_center: start_dqs=%d start_dqs_en=%d new_dqs=%d mid_min=%d\n", + start_dqs, + iocfg->shift_dqs_en_when_shift_dqs ? start_dqs_en : -1, + new_dqs, mid_min); + + /* Add delay to bring centre of all DQ windows to the same "level". */ + center_dq_windows(0, left_edge, right_edge, mid_min, orig_mid_min, + min_index, test_bgn, &dq_margin, &dqs_margin); /* Move DQS-en */ - if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { - scc_mgr_set_dqs_en_delay(read_group, final_dqs_en); - scc_mgr_load_dqs(read_group); + if (iocfg->shift_dqs_en_when_shift_dqs) { + final_dqs_en = start_dqs_en - mid_min; + scc_mgr_set_dqs_en_delay(rw_group, final_dqs_en); + scc_mgr_load_dqs(rw_group); } /* Move DQS */ - scc_mgr_set_dqs_bus_in_delay(read_group, final_dqs); - scc_mgr_load_dqs(read_group); - debug_cond(DLEVEL == 2, "%s:%d vfifo_center: dq_margin=%d \ - dqs_margin=%d", __func__, __LINE__, - dq_margin, dqs_margin); + scc_mgr_set_dqs_bus_in_delay(rw_group, new_dqs); + scc_mgr_load_dqs(rw_group); + debug_cond(DLEVEL == 2, + "%s:%d vfifo_center: dq_margin=%d dqs_margin=%d", + __func__, __LINE__, dq_margin, dqs_margin); /* * Do not remove this line as it makes sure all of our decisions @@ -2184,7 +2497,10 @@ static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn, */ writel(0, &sdr_scc_mgr->update); - return (dq_margin >= 0) && (dqs_margin >= 0); + if ((dq_margin < 0) || (dqs_margin < 0)) + return -EINVAL; + + return 0; } /** @@ -2199,7 +2515,6 @@ static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn, static int rw_mgr_mem_calibrate_guaranteed_write(const u32 rw_group, const u32 phase) { - u32 bit_chk; int ret; /* Set a particular DQ/DQS phase. */ @@ -2222,16 +2537,12 @@ static int rw_mgr_mem_calibrate_guaranteed_write(const u32 rw_group, * Altera EMI_RM 2015.05.04 :: Figure 1-26 * Back-to-Back reads of the patterns used for calibration. */ - ret = rw_mgr_mem_calibrate_read_test_patterns_all_ranks(rw_group, 1, - &bit_chk); - if (!ret) { /* FIXME: 0 means failure in this old code :-( */ + ret = rw_mgr_mem_calibrate_read_test_patterns(0, rw_group, 1); + if (ret) debug_cond(DLEVEL == 1, "%s:%d Guaranteed read test failed: g=%u p=%u\n", __func__, __LINE__, rw_group, phase); - return -EIO; - } - - return 0; + return ret; } /** @@ -2245,18 +2556,53 @@ static int rw_mgr_mem_calibrate_guaranteed_write(const u32 rw_group, static int rw_mgr_mem_calibrate_dqs_enable_calibration(const u32 rw_group, const u32 test_bgn) { - int ret; - /* * Altera EMI_RM 2015.05.04 :: Figure 1-27 * DQS and DQS Eanble Signal Relationships. */ - ret = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay( - rw_group, rw_group, test_bgn); - if (!ret) /* FIXME: 0 means failure in this old code :-( */ - return -EIO; - return 0; + /* We start at zero, so have one less dq to devide among */ + const u32 delay_step = iocfg->io_in_delay_max / + (rwcfg->mem_dq_per_read_dqs - 1); + int ret; + u32 i, p, d, r; + + debug("%s:%d (%u,%u)\n", __func__, __LINE__, rw_group, test_bgn); + + /* Try different dq_in_delays since the DQ path is shorter than DQS. */ + for (r = 0; r < rwcfg->mem_number_of_ranks; + r += NUM_RANKS_PER_SHADOW_REG) { + for (i = 0, p = test_bgn, d = 0; + i < rwcfg->mem_dq_per_read_dqs; + i++, p++, d += delay_step) { + debug_cond(DLEVEL == 1, + "%s:%d: g=%u r=%u i=%u p=%u d=%u\n", + __func__, __LINE__, rw_group, r, i, p, d); + + scc_mgr_set_dq_in_delay(p, d); + scc_mgr_load_dq(p); + } + + writel(0, &sdr_scc_mgr->update); + } + + /* + * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different + * dq_in_delay values + */ + ret = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(rw_group); + + debug_cond(DLEVEL == 1, + "%s:%d: g=%u found=%u; Reseting delay chain to zero\n", + __func__, __LINE__, rw_group, !ret); + + for (r = 0; r < rwcfg->mem_number_of_ranks; + r += NUM_RANKS_PER_SHADOW_REG) { + scc_mgr_apply_group_dq_in_delay(test_bgn, 0); + writel(0, &sdr_scc_mgr->update); + } + + return ret; } /** @@ -2284,17 +2630,13 @@ rw_mgr_mem_calibrate_dq_dqs_centering(const u32 rw_group, const u32 test_bgn, */ grp_calibrated = 1; for (rank_bgn = 0, sr = 0; - rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; + rank_bgn < rwcfg->mem_number_of_ranks; rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) { - /* Check if this set of ranks should be skipped entirely. */ - if (param->skip_shadow_regs[sr]) - continue; - ret = rw_mgr_mem_calibrate_vfifo_center(rank_bgn, rw_group, - rw_group, test_bgn, + test_bgn, use_read_test, update_fom); - if (ret) + if (!ret) continue; grp_calibrated = 0; @@ -2323,9 +2665,9 @@ rw_mgr_mem_calibrate_dq_dqs_centering(const u32 rw_group, const u32 test_bgn, */ static int rw_mgr_mem_calibrate_vfifo(const u32 rw_group, const u32 test_bgn) { - uint32_t p, d; - uint32_t dtaps_per_ptap; - uint32_t failed_substage; + u32 p, d; + u32 dtaps_per_ptap; + u32 failed_substage; int ret; @@ -2339,8 +2681,8 @@ static int rw_mgr_mem_calibrate_vfifo(const u32 rw_group, const u32 test_bgn) failed_substage = CAL_SUBSTAGE_GUARANTEED_READ; /* USER Determine number of delay taps for each phase tap. */ - dtaps_per_ptap = DIV_ROUND_UP(IO_DELAY_PER_OPA_TAP, - IO_DELAY_PER_DQS_EN_DCHAIN_TAP) - 1; + dtaps_per_ptap = DIV_ROUND_UP(iocfg->delay_per_opa_tap, + iocfg->delay_per_dqs_en_dchain_tap) - 1; for (d = 0; d <= dtaps_per_ptap; d += 2) { /* @@ -2354,7 +2696,7 @@ static int rw_mgr_mem_calibrate_vfifo(const u32 rw_group, const u32 test_bgn) rw_group, d); } - for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX; p++) { + for (p = 0; p <= iocfg->dqdqs_out_phase_max; p++) { /* 1) Guaranteed Write */ ret = rw_mgr_mem_calibrate_guaranteed_write(rw_group, p); if (ret) @@ -2402,610 +2744,259 @@ cal_done_ok: return 1; } -/* VFIFO Calibration -- Read Deskew Calibration after write deskew */ -static uint32_t rw_mgr_mem_calibrate_vfifo_end(uint32_t read_group, - uint32_t test_bgn) +/** + * rw_mgr_mem_calibrate_vfifo_end() - DQ/DQS Centering. + * @rw_group: Read/Write Group + * @test_bgn: Rank at which the test begins + * + * Stage 3: DQ/DQS Centering. + * + * This function implements UniPHY calibration Stage 3, as explained in + * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages". + */ +static int rw_mgr_mem_calibrate_vfifo_end(const u32 rw_group, + const u32 test_bgn) { - uint32_t rank_bgn, sr; - uint32_t grp_calibrated; - uint32_t write_group; - - debug("%s:%d %u %u", __func__, __LINE__, read_group, test_bgn); + int ret; - /* update info for sims */ + debug("%s:%d %u %u", __func__, __LINE__, rw_group, test_bgn); + /* Update info for sims. */ + reg_file_set_group(rw_group); reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES); reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); - write_group = read_group; - - /* update info for sims */ - reg_file_set_group(read_group); - - grp_calibrated = 1; - /* Read per-bit deskew can be done on a per shadow register basis */ - for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; - rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) { - /* Determine if this set of ranks should be skipped entirely */ - if (!param->skip_shadow_regs[sr]) { - /* This is the last calibration round, update FOM here */ - if (!rw_mgr_mem_calibrate_vfifo_center(rank_bgn, - write_group, - read_group, - test_bgn, 0, - 1)) { - grp_calibrated = 0; - } - } - } - - - if (grp_calibrated == 0) { - set_failing_group_stage(write_group, + ret = rw_mgr_mem_calibrate_dq_dqs_centering(rw_group, test_bgn, 0, 1); + if (ret) + set_failing_group_stage(rw_group, CAL_STAGE_VFIFO_AFTER_WRITES, CAL_SUBSTAGE_VFIFO_CENTER); - return 0; - } - - return 1; -} - -/* Calibrate LFIFO to find smallest read latency */ -static uint32_t rw_mgr_mem_calibrate_lfifo(void) -{ - uint32_t found_one; - uint32_t bit_chk; - - debug("%s:%d\n", __func__, __LINE__); - - /* update info for sims */ - reg_file_set_stage(CAL_STAGE_LFIFO); - reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY); - - /* Load up the patterns used by read calibration for all ranks */ - rw_mgr_mem_calibrate_read_load_patterns(0, 1); - found_one = 0; - - do { - writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); - debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u", - __func__, __LINE__, gbl->curr_read_lat); - - if (!rw_mgr_mem_calibrate_read_test_all_ranks(0, - NUM_READ_TESTS, - PASS_ALL_BITS, - &bit_chk, 1)) { - break; - } - - found_one = 1; - /* reduce read latency and see if things are working */ - /* correctly */ - gbl->curr_read_lat--; - } while (gbl->curr_read_lat > 0); - - /* reset the fifos to get pointers to known state */ - - writel(0, &phy_mgr_cmd->fifo_reset); - - if (found_one) { - /* add a fudge factor to the read latency that was determined */ - gbl->curr_read_lat += 2; - writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); - debug_cond(DLEVEL == 2, "%s:%d lfifo: success: using \ - read_lat=%u\n", __func__, __LINE__, - gbl->curr_read_lat); - return 1; - } else { - set_failing_group_stage(0xff, CAL_STAGE_LFIFO, - CAL_SUBSTAGE_READ_LATENCY); - - debug_cond(DLEVEL == 2, "%s:%d lfifo: failed at initial \ - read_lat=%u\n", __func__, __LINE__, - gbl->curr_read_lat); - return 0; - } + return ret; } -/* - * issue write test command. - * two variants are provided. one that just tests a write pattern and - * another that tests datamask functionality. +/** + * rw_mgr_mem_calibrate_lfifo() - Minimize latency + * + * Stage 4: Minimize latency. + * + * This function implements UniPHY calibration Stage 4, as explained in + * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages". + * Calibrate LFIFO to find smallest read latency. */ -static void rw_mgr_mem_calibrate_write_test_issue(uint32_t group, - uint32_t test_dm) +static u32 rw_mgr_mem_calibrate_lfifo(void) { - uint32_t mcc_instruction; - uint32_t quick_write_mode = (((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES) && - ENABLE_SUPER_QUICK_CALIBRATION); - uint32_t rw_wl_nop_cycles; - uint32_t addr; + int found_one = 0; - /* - * Set counter and jump addresses for the right - * number of NOP cycles. - * The number of supported NOP cycles can range from -1 to infinity - * Three different cases are handled: - * - * 1. For a number of NOP cycles greater than 0, the RW Mgr looping - * mechanism will be used to insert the right number of NOPs - * - * 2. For a number of NOP cycles equals to 0, the micro-instruction - * issuing the write command will jump straight to the - * micro-instruction that turns on DQS (for DDRx), or outputs write - * data (for RLD), skipping - * the NOP micro-instruction all together - * - * 3. A number of NOP cycles equal to -1 indicates that DQS must be - * turned on in the same micro-instruction that issues the write - * command. Then we need - * to directly jump to the micro-instruction that sends out the data - * - * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters - * (2 and 3). One jump-counter (0) is used to perform multiple - * write-read operations. - * one counter left to issue this command in "multiple-group" mode - */ - - rw_wl_nop_cycles = gbl->rw_wl_nop_cycles; - - if (rw_wl_nop_cycles == -1) { - /* - * CNTR 2 - We want to execute the special write operation that - * turns on DQS right away and then skip directly to the - * instruction that sends out the data. We set the counter to a - * large number so that the jump is always taken. - */ - writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); - - /* CNTR 3 - Not used */ - if (test_dm) { - mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1; - writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA, - &sdr_rw_load_jump_mgr_regs->load_jump_add2); - writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP, - &sdr_rw_load_jump_mgr_regs->load_jump_add3); - } else { - mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0_WL_1; - writel(RW_MGR_LFSR_WR_RD_BANK_0_DATA, - &sdr_rw_load_jump_mgr_regs->load_jump_add2); - writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP, - &sdr_rw_load_jump_mgr_regs->load_jump_add3); - } - } else if (rw_wl_nop_cycles == 0) { - /* - * CNTR 2 - We want to skip the NOP operation and go straight - * to the DQS enable instruction. We set the counter to a large - * number so that the jump is always taken. - */ - writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); - - /* CNTR 3 - Not used */ - if (test_dm) { - mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0; - writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS, - &sdr_rw_load_jump_mgr_regs->load_jump_add2); - } else { - mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0; - writel(RW_MGR_LFSR_WR_RD_BANK_0_DQS, - &sdr_rw_load_jump_mgr_regs->load_jump_add2); - } - } else { - /* - * CNTR 2 - In this case we want to execute the next instruction - * and NOT take the jump. So we set the counter to 0. The jump - * address doesn't count. - */ - writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2); - writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2); - - /* - * CNTR 3 - Set the nop counter to the number of cycles we - * need to loop for, minus 1. - */ - writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3); - if (test_dm) { - mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0; - writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP, - &sdr_rw_load_jump_mgr_regs->load_jump_add3); - } else { - mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0; - writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP, - &sdr_rw_load_jump_mgr_regs->load_jump_add3); - } - } + debug("%s:%d\n", __func__, __LINE__); - writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | - RW_MGR_RESET_READ_DATAPATH_OFFSET); + /* Update info for sims. */ + reg_file_set_stage(CAL_STAGE_LFIFO); + reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY); - if (quick_write_mode) - writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0); - else - writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0); + /* Load up the patterns used by read calibration for all ranks */ + rw_mgr_mem_calibrate_read_load_patterns(0, 1); - writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0); + do { + writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); + debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u", + __func__, __LINE__, gbl->curr_read_lat); - /* - * CNTR 1 - This is used to ensure enough time elapses - * for read data to come back. - */ - writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1); + if (!rw_mgr_mem_calibrate_read_test_all_ranks(0, NUM_READ_TESTS, + PASS_ALL_BITS, 1)) + break; - if (test_dm) { - writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT, - &sdr_rw_load_jump_mgr_regs->load_jump_add1); + found_one = 1; + /* + * Reduce read latency and see if things are + * working correctly. + */ + gbl->curr_read_lat--; + } while (gbl->curr_read_lat > 0); + + /* Reset the fifos to get pointers to known state. */ + writel(0, &phy_mgr_cmd->fifo_reset); + + if (found_one) { + /* Add a fudge factor to the read latency that was determined */ + gbl->curr_read_lat += 2; + writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); + debug_cond(DLEVEL == 2, + "%s:%d lfifo: success: using read_lat=%u\n", + __func__, __LINE__, gbl->curr_read_lat); } else { - writel(RW_MGR_LFSR_WR_RD_BANK_0_WAIT, - &sdr_rw_load_jump_mgr_regs->load_jump_add1); + set_failing_group_stage(0xff, CAL_STAGE_LFIFO, + CAL_SUBSTAGE_READ_LATENCY); + + debug_cond(DLEVEL == 2, + "%s:%d lfifo: failed at initial read_lat=%u\n", + __func__, __LINE__, gbl->curr_read_lat); } - addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; - writel(mcc_instruction, addr + (group << 2)); + return found_one; } -/* Test writes, can check for a single bit pass or multiple bit pass */ -static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn, - uint32_t write_group, uint32_t use_dm, uint32_t all_correct, - uint32_t *bit_chk, uint32_t all_ranks) +/** + * search_window() - Search for the/part of the window with DM/DQS shift + * @search_dm: If 1, search for the DM shift, if 0, search for DQS shift + * @rank_bgn: Rank number + * @write_group: Write Group + * @bgn_curr: Current window begin + * @end_curr: Current window end + * @bgn_best: Current best window begin + * @end_best: Current best window end + * @win_best: Size of the best window + * @new_dqs: New DQS value (only applicable if search_dm = 0). + * + * Search for the/part of the window with DM/DQS shift. + */ +static void search_window(const int search_dm, + const u32 rank_bgn, const u32 write_group, + int *bgn_curr, int *end_curr, int *bgn_best, + int *end_best, int *win_best, int new_dqs) { - uint32_t r; - uint32_t correct_mask_vg; - uint32_t tmp_bit_chk; - uint32_t vg; - uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : - (rank_bgn + NUM_RANKS_PER_SHADOW_REG); - uint32_t addr_rw_mgr; - uint32_t base_rw_mgr; + u32 bit_chk; + const int max = iocfg->io_out1_delay_max - new_dqs; + int d, di; + + /* Search for the/part of the window with DM/DQS shift. */ + for (di = max; di >= 0; di -= DELTA_D) { + if (search_dm) { + d = di; + scc_mgr_apply_group_dm_out1_delay(d); + } else { + /* For DQS, we go from 0...max */ + d = max - di; + /* + * Note: This only shifts DQS, so are we limiting ourselve to + * width of DQ unnecessarily. + */ + scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, + d + new_dqs); + } - *bit_chk = param->write_correct_mask; - correct_mask_vg = param->write_correct_mask_vg; + writel(0, &sdr_scc_mgr->update); - for (r = rank_bgn; r < rank_end; r++) { - if (param->skip_ranks[r]) { - /* request to skip the rank */ - continue; - } + if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, + PASS_ALL_BITS, &bit_chk, + 0)) { + /* Set current end of the window. */ + *end_curr = search_dm ? -d : d; - /* set rank */ - set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); + /* + * If a starting edge of our window has not been seen + * this is our current start of the DM window. + */ + if (*bgn_curr == iocfg->io_out1_delay_max + 1) + *bgn_curr = search_dm ? -d : d; - tmp_bit_chk = 0; - addr_rw_mgr = SDR_PHYGRP_RWMGRGRP_ADDRESS; - for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS-1; ; vg--) { - /* reset the fifos to get pointers to known state */ - writel(0, &phy_mgr_cmd->fifo_reset); + /* + * If current window is bigger than best seen. + * Set best seen to be current window. + */ + if ((*end_curr - *bgn_curr + 1) > *win_best) { + *win_best = *end_curr - *bgn_curr + 1; + *bgn_best = *bgn_curr; + *end_best = *end_curr; + } + } else { + /* We just saw a failing test. Reset temp edge. */ + *bgn_curr = iocfg->io_out1_delay_max + 1; + *end_curr = iocfg->io_out1_delay_max + 1; - tmp_bit_chk = tmp_bit_chk << - (RW_MGR_MEM_DQ_PER_WRITE_DQS / - RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS); - rw_mgr_mem_calibrate_write_test_issue(write_group * - RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS+vg, - use_dm); + /* Early exit is only applicable to DQS. */ + if (search_dm) + continue; - base_rw_mgr = readl(addr_rw_mgr); - tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr)); - if (vg == 0) + /* + * Early exit optimization: if the remaining delay + * chain space is less than already seen largest + * window we can exit. + */ + if (*win_best - 1 > iocfg->io_out1_delay_max - new_dqs - d) break; } - *bit_chk &= tmp_bit_chk; - } - - if (all_correct) { - set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); - debug_cond(DLEVEL == 2, "write_test(%u,%u,ALL) : %u == \ - %u => %lu", write_group, use_dm, - *bit_chk, param->write_correct_mask, - (long unsigned int)(*bit_chk == - param->write_correct_mask)); - return *bit_chk == param->write_correct_mask; - } else { - set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); - debug_cond(DLEVEL == 2, "write_test(%u,%u,ONE) : %u != ", - write_group, use_dm, *bit_chk); - debug_cond(DLEVEL == 2, "%lu" " => %lu", (long unsigned int)0, - (long unsigned int)(*bit_chk != 0)); - return *bit_chk != 0x00; } } /* - * center all windows. do per-bit-deskew to possibly increase size of + * rw_mgr_mem_calibrate_writes_center() - Center all windows + * @rank_bgn: Rank number + * @write_group: Write group + * @test_bgn: Rank at which the test begins + * + * Center all windows. Do per-bit-deskew to possibly increase size of * certain windows. */ -static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn, - uint32_t write_group, uint32_t test_bgn) +static int +rw_mgr_mem_calibrate_writes_center(const u32 rank_bgn, const u32 write_group, + const u32 test_bgn) { - uint32_t i, p, min_index; - int32_t d; - /* - * Store these as signed since there are comparisons with - * signed numbers. - */ - uint32_t bit_chk; - uint32_t sticky_bit_chk; - int32_t left_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS]; - int32_t right_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS]; - int32_t mid; - int32_t mid_min, orig_mid_min; - int32_t new_dqs, start_dqs, shift_dq; - int32_t dq_margin, dqs_margin, dm_margin; - uint32_t stop; - uint32_t temp_dq_out1_delay; - uint32_t addr; + int i; + u32 sticky_bit_chk; + u32 min_index; + int left_edge[rwcfg->mem_dq_per_write_dqs]; + int right_edge[rwcfg->mem_dq_per_write_dqs]; + int mid; + int mid_min, orig_mid_min; + int new_dqs, start_dqs; + int dq_margin, dqs_margin, dm_margin; + int bgn_curr = iocfg->io_out1_delay_max + 1; + int end_curr = iocfg->io_out1_delay_max + 1; + int bgn_best = iocfg->io_out1_delay_max + 1; + int end_best = iocfg->io_out1_delay_max + 1; + int win_best = 0; + + int ret; debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn); dm_margin = 0; - addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET; - start_dqs = readl(addr + - (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2)); + start_dqs = readl((SDR_PHYGRP_SCCGRP_ADDRESS | + SCC_MGR_IO_OUT1_DELAY_OFFSET) + + (rwcfg->mem_dq_per_write_dqs << 2)); - /* per-bit deskew */ + /* Per-bit deskew. */ /* - * set the left and right edge of each bit to an illegal value - * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value. + * Set the left and right edge of each bit to an illegal value. + * Use (iocfg->io_out1_delay_max + 1) as an illegal value. */ sticky_bit_chk = 0; - for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { - left_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; - right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; - } - - /* Search for the left edge of the window for each bit */ - for (d = 0; d <= IO_IO_OUT1_DELAY_MAX; d++) { - scc_mgr_apply_group_dq_out1_delay(write_group, d); - - writel(0, &sdr_scc_mgr->update); - - /* - * Stop searching when the read test doesn't pass AND when - * we've seen a passing read on every bit. - */ - stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, - 0, PASS_ONE_BIT, &bit_chk, 0); - sticky_bit_chk = sticky_bit_chk | bit_chk; - stop = stop && (sticky_bit_chk == param->write_correct_mask); - debug_cond(DLEVEL == 2, "write_center(left): dtap=%d => %u \ - == %u && %u [bit_chk= %u ]\n", - d, sticky_bit_chk, param->write_correct_mask, - stop, bit_chk); - - if (stop == 1) { - break; - } else { - for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { - if (bit_chk & 1) { - /* - * Remember a passing test as the - * left_edge. - */ - left_edge[i] = d; - } else { - /* - * If a left edge has not been seen - * yet, then a future passing test will - * mark this edge as the right edge. - */ - if (left_edge[i] == - IO_IO_OUT1_DELAY_MAX + 1) { - right_edge[i] = -(d + 1); - } - } - debug_cond(DLEVEL == 2, "write_center[l,d=%d):", d); - debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d", - (int)(bit_chk & 1), i, left_edge[i]); - debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, - right_edge[i]); - bit_chk = bit_chk >> 1; - } - } - } - - /* Reset DQ delay chains to 0 */ - scc_mgr_apply_group_dq_out1_delay(0); - sticky_bit_chk = 0; - for (i = RW_MGR_MEM_DQ_PER_WRITE_DQS - 1;; i--) { - debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \ - %d right_edge[%u]: %d\n", __func__, __LINE__, - i, left_edge[i], i, right_edge[i]); - - /* - * Check for cases where we haven't found the left edge, - * which makes our assignment of the the right edge invalid. - * Reset it to the illegal value. - */ - if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) && - (right_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) { - right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; - debug_cond(DLEVEL == 2, "%s:%d write_center: reset \ - right_edge[%u]: %d\n", __func__, __LINE__, - i, right_edge[i]); - } - - /* - * Reset sticky bit (except for bits where we have - * seen the left edge). - */ - sticky_bit_chk = sticky_bit_chk << 1; - if ((left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) - sticky_bit_chk = sticky_bit_chk | 1; - - if (i == 0) - break; - } - - /* Search for the right edge of the window for each bit */ - for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - start_dqs; d++) { - scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, - d + start_dqs); - - writel(0, &sdr_scc_mgr->update); - - /* - * Stop searching when the read test doesn't pass AND when - * we've seen a passing read on every bit. - */ - stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, - 0, PASS_ONE_BIT, &bit_chk, 0); - - sticky_bit_chk = sticky_bit_chk | bit_chk; - stop = stop && (sticky_bit_chk == param->write_correct_mask); - - debug_cond(DLEVEL == 2, "write_center (right): dtap=%u => %u == \ - %u && %u\n", d, sticky_bit_chk, - param->write_correct_mask, stop); - - if (stop == 1) { - if (d == 0) { - for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; - i++) { - /* d = 0 failed, but it passed when - testing the left edge, so it must be - marginal, set it to -1 */ - if (right_edge[i] == - IO_IO_OUT1_DELAY_MAX + 1 && - left_edge[i] != - IO_IO_OUT1_DELAY_MAX + 1) { - right_edge[i] = -1; - } - } - } - break; - } else { - for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { - if (bit_chk & 1) { - /* - * Remember a passing test as - * the right_edge. - */ - right_edge[i] = d; - } else { - if (d != 0) { - /* - * If a right edge has not - * been seen yet, then a future - * passing test will mark this - * edge as the left edge. - */ - if (right_edge[i] == - IO_IO_OUT1_DELAY_MAX + 1) - left_edge[i] = -(d + 1); - } else { - /* - * d = 0 failed, but it passed - * when testing the left edge, - * so it must be marginal, set - * it to -1. - */ - if (right_edge[i] == - IO_IO_OUT1_DELAY_MAX + 1 && - left_edge[i] != - IO_IO_OUT1_DELAY_MAX + 1) - right_edge[i] = -1; - /* - * If a right edge has not been - * seen yet, then a future - * passing test will mark this - * edge as the left edge. - */ - else if (right_edge[i] == - IO_IO_OUT1_DELAY_MAX + - 1) - left_edge[i] = -(d + 1); - } - } - debug_cond(DLEVEL == 2, "write_center[r,d=%d):", d); - debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d", - (int)(bit_chk & 1), i, left_edge[i]); - debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, - right_edge[i]); - bit_chk = bit_chk >> 1; - } - } - } - - /* Check that all bits have a window */ - for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { - debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \ - %d right_edge[%u]: %d", __func__, __LINE__, - i, left_edge[i], i, right_edge[i]); - if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) || - (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1)) { - set_failing_group_stage(test_bgn + i, - CAL_STAGE_WRITES, - CAL_SUBSTAGE_WRITES_CENTER); - return 0; - } - } - - /* Find middle of window for each DQ bit */ - mid_min = left_edge[0] - right_edge[0]; - min_index = 0; - for (i = 1; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { - mid = left_edge[i] - right_edge[i]; - if (mid < mid_min) { - mid_min = mid; - min_index = i; - } + for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) { + left_edge[i] = iocfg->io_out1_delay_max + 1; + right_edge[i] = iocfg->io_out1_delay_max + 1; + } + + /* Search for the left edge of the window for each bit. */ + search_left_edge(1, rank_bgn, write_group, 0, test_bgn, + &sticky_bit_chk, + left_edge, right_edge, 0); + + /* Search for the right edge of the window for each bit. */ + ret = search_right_edge(1, rank_bgn, write_group, 0, + start_dqs, 0, + &sticky_bit_chk, + left_edge, right_edge, 0); + if (ret) { + set_failing_group_stage(test_bgn + ret - 1, CAL_STAGE_WRITES, + CAL_SUBSTAGE_WRITES_CENTER); + return -EINVAL; } - /* - * -mid_min/2 represents the amount that we need to move DQS. - * If mid_min is odd and positive we'll need to add one to - * make sure the rounding in further calculations is correct - * (always bias to the right), so just add 1 for all positive values. - */ - if (mid_min > 0) - mid_min++; - mid_min = mid_min / 2; - debug_cond(DLEVEL == 1, "%s:%d write_center: mid_min=%d\n", __func__, - __LINE__, mid_min); + min_index = get_window_mid_index(1, left_edge, right_edge, &mid_min); - /* Determine the amount we can change DQS (which is -mid_min) */ + /* Determine the amount we can change DQS (which is -mid_min). */ orig_mid_min = mid_min; new_dqs = start_dqs; mid_min = 0; - debug_cond(DLEVEL == 1, "%s:%d write_center: start_dqs=%d new_dqs=%d \ - mid_min=%d\n", __func__, __LINE__, start_dqs, new_dqs, mid_min); - /* Initialize data for export structures */ - dqs_margin = IO_IO_OUT1_DELAY_MAX + 1; - dq_margin = IO_IO_OUT1_DELAY_MAX + 1; - - /* add delay to bring centre of all DQ windows to the same "level" */ - for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { - /* Use values before divide by 2 to reduce round off error */ - shift_dq = (left_edge[i] - right_edge[i] - - (left_edge[min_index] - right_edge[min_index]))/2 + - (orig_mid_min - mid_min); - - debug_cond(DLEVEL == 2, "%s:%d write_center: before: shift_dq \ - [%u]=%d\n", __func__, __LINE__, i, shift_dq); - - addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET; - temp_dq_out1_delay = readl(addr + (i << 2)); - if (shift_dq + (int32_t)temp_dq_out1_delay > - (int32_t)IO_IO_OUT1_DELAY_MAX) { - shift_dq = (int32_t)IO_IO_OUT1_DELAY_MAX - temp_dq_out1_delay; - } else if (shift_dq + (int32_t)temp_dq_out1_delay < 0) { - shift_dq = -(int32_t)temp_dq_out1_delay; - } - debug_cond(DLEVEL == 2, "write_center: after: shift_dq[%u]=%d\n", - i, shift_dq); - scc_mgr_set_dq_out1_delay(i, temp_dq_out1_delay + shift_dq); - scc_mgr_load_dq(i); - - debug_cond(DLEVEL == 2, "write_center: margin[%u]=[%d,%d]\n", i, - left_edge[i] - shift_dq + (-mid_min), - right_edge[i] + shift_dq - (-mid_min)); - /* To determine values for export structures */ - if (left_edge[i] - shift_dq + (-mid_min) < dq_margin) - dq_margin = left_edge[i] - shift_dq + (-mid_min); + debug_cond(DLEVEL == 1, + "%s:%d write_center: start_dqs=%d new_dqs=%d mid_min=%d\n", + __func__, __LINE__, start_dqs, new_dqs, mid_min); - if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin) - dqs_margin = right_edge[i] + shift_dq - (-mid_min); - } + /* Add delay to bring centre of all DQ windows to the same "level". */ + center_dq_windows(1, left_edge, right_edge, mid_min, orig_mid_min, + min_index, 0, &dq_margin, &dqs_margin); /* Move DQS */ scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); @@ -3015,129 +3006,53 @@ static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn, debug_cond(DLEVEL == 2, "%s:%d write_center: DM\n", __func__, __LINE__); /* - * set the left and right edge of each bit to an illegal value, - * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value, + * Set the left and right edge of each bit to an illegal value. + * Use (iocfg->io_out1_delay_max + 1) as an illegal value. */ - left_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; - right_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; - int32_t bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; - int32_t end_curr = IO_IO_OUT1_DELAY_MAX + 1; - int32_t bgn_best = IO_IO_OUT1_DELAY_MAX + 1; - int32_t end_best = IO_IO_OUT1_DELAY_MAX + 1; - int32_t win_best = 0; - - /* Search for the/part of the window with DM shift */ - for (d = IO_IO_OUT1_DELAY_MAX; d >= 0; d -= DELTA_D) { - scc_mgr_apply_group_dm_out1_delay(d); - writel(0, &sdr_scc_mgr->update); - - if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, - PASS_ALL_BITS, &bit_chk, - 0)) { - /* USE Set current end of the window */ - end_curr = -d; - /* - * If a starting edge of our window has not been seen - * this is our current start of the DM window. - */ - if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) - bgn_curr = -d; - - /* - * If current window is bigger than best seen. - * Set best seen to be current window. - */ - if ((end_curr-bgn_curr+1) > win_best) { - win_best = end_curr-bgn_curr+1; - bgn_best = bgn_curr; - end_best = end_curr; - } - } else { - /* We just saw a failing test. Reset temp edge */ - bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; - end_curr = IO_IO_OUT1_DELAY_MAX + 1; - } - } + left_edge[0] = iocfg->io_out1_delay_max + 1; + right_edge[0] = iocfg->io_out1_delay_max + 1; + /* Search for the/part of the window with DM shift. */ + search_window(1, rank_bgn, write_group, &bgn_curr, &end_curr, + &bgn_best, &end_best, &win_best, 0); - /* Reset DM delay chains to 0 */ + /* Reset DM delay chains to 0. */ scc_mgr_apply_group_dm_out1_delay(0); /* * Check to see if the current window nudges up aganist 0 delay. * If so we need to continue the search by shifting DQS otherwise DQS - * search begins as a new search. */ + * search begins as a new search. + */ if (end_curr != 0) { - bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; - end_curr = IO_IO_OUT1_DELAY_MAX + 1; + bgn_curr = iocfg->io_out1_delay_max + 1; + end_curr = iocfg->io_out1_delay_max + 1; } - /* Search for the/part of the window with DQS shifts */ - for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - new_dqs; d += DELTA_D) { - /* - * Note: This only shifts DQS, so are we limiting ourselve to - * width of DQ unnecessarily. - */ - scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, - d + new_dqs); - - writel(0, &sdr_scc_mgr->update); - if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, - PASS_ALL_BITS, &bit_chk, - 0)) { - /* USE Set current end of the window */ - end_curr = d; - /* - * If a beginning edge of our window has not been seen - * this is our current begin of the DM window. - */ - if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) - bgn_curr = d; - - /* - * If current window is bigger than best seen. Set best - * seen to be current window. - */ - if ((end_curr-bgn_curr+1) > win_best) { - win_best = end_curr-bgn_curr+1; - bgn_best = bgn_curr; - end_best = end_curr; - } - } else { - /* We just saw a failing test. Reset temp edge */ - bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; - end_curr = IO_IO_OUT1_DELAY_MAX + 1; - - /* Early exit optimization: if ther remaining delay - chain space is less than already seen largest window - we can exit */ - if ((win_best-1) > - (IO_IO_OUT1_DELAY_MAX - new_dqs - d)) { - break; - } - } - } + /* Search for the/part of the window with DQS shifts. */ + search_window(0, rank_bgn, write_group, &bgn_curr, &end_curr, + &bgn_best, &end_best, &win_best, new_dqs); - /* assign left and right edge for cal and reporting; */ - left_edge[0] = -1*bgn_best; + /* Assign left and right edge for cal and reporting. */ + left_edge[0] = -1 * bgn_best; right_edge[0] = end_best; - debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n", __func__, - __LINE__, left_edge[0], right_edge[0]); + debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n", + __func__, __LINE__, left_edge[0], right_edge[0]); - /* Move DQS (back to orig) */ + /* Move DQS (back to orig). */ scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); /* Move DM */ - /* Find middle of window for the DM bit */ + /* Find middle of window for the DM bit. */ mid = (left_edge[0] - right_edge[0]) / 2; - /* only move right, since we are not moving DQS/DQ */ + /* Only move right, since we are not moving DQS/DQ. */ if (mid < 0) mid = 0; - /* dm_marign should fail if we never find a window */ + /* dm_marign should fail if we never find a window. */ if (win_best == 0) dm_margin = -1; else @@ -3146,43 +3061,58 @@ static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn, scc_mgr_apply_group_dm_out1_delay(mid); writel(0, &sdr_scc_mgr->update); - debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d mid=%d \ - dm_margin=%d\n", __func__, __LINE__, left_edge[0], - right_edge[0], mid, dm_margin); - /* Export values */ + debug_cond(DLEVEL == 2, + "%s:%d dm_calib: left=%d right=%d mid=%d dm_margin=%d\n", + __func__, __LINE__, left_edge[0], right_edge[0], + mid, dm_margin); + /* Export values. */ gbl->fom_out += dq_margin + dqs_margin; - debug_cond(DLEVEL == 2, "%s:%d write_center: dq_margin=%d \ - dqs_margin=%d dm_margin=%d\n", __func__, __LINE__, - dq_margin, dqs_margin, dm_margin); + debug_cond(DLEVEL == 2, + "%s:%d write_center: dq_margin=%d dqs_margin=%d dm_margin=%d\n", + __func__, __LINE__, dq_margin, dqs_margin, dm_margin); /* * Do not remove this line as it makes sure all of our * decisions have been applied. */ writel(0, &sdr_scc_mgr->update); - return (dq_margin >= 0) && (dqs_margin >= 0) && (dm_margin >= 0); + + if ((dq_margin < 0) || (dqs_margin < 0) || (dm_margin < 0)) + return -EINVAL; + + return 0; } -/* calibrate the write operations */ -static uint32_t rw_mgr_mem_calibrate_writes(uint32_t rank_bgn, uint32_t g, - uint32_t test_bgn) +/** + * rw_mgr_mem_calibrate_writes() - Write Calibration Part One + * @rank_bgn: Rank number + * @group: Read/Write Group + * @test_bgn: Rank at which the test begins + * + * Stage 2: Write Calibration Part One. + * + * This function implements UniPHY calibration Stage 2, as explained in + * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages". + */ +static int rw_mgr_mem_calibrate_writes(const u32 rank_bgn, const u32 group, + const u32 test_bgn) { - /* update info for sims */ - debug("%s:%d %u %u\n", __func__, __LINE__, g, test_bgn); + int ret; + /* Update info for sims */ + debug("%s:%d %u %u\n", __func__, __LINE__, group, test_bgn); + + reg_file_set_group(group); reg_file_set_stage(CAL_STAGE_WRITES); reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER); - reg_file_set_group(g); - - if (!rw_mgr_mem_calibrate_writes_center(rank_bgn, g, test_bgn)) { - set_failing_group_stage(g, CAL_STAGE_WRITES, + ret = rw_mgr_mem_calibrate_writes_center(rank_bgn, group, test_bgn); + if (ret) + set_failing_group_stage(group, CAL_STAGE_WRITES, CAL_SUBSTAGE_WRITES_CENTER); - return 0; - } - return 1; + return ret; } /** @@ -3194,28 +3124,24 @@ static void mem_precharge_and_activate(void) { int r; - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { - /* Test if the rank should be skipped. */ - if (param->skip_ranks[r]) - continue; - + for (r = 0; r < rwcfg->mem_number_of_ranks; r++) { /* Set rank. */ set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); /* Precharge all banks. */ - writel(RW_MGR_PRECHARGE_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS | + writel(rwcfg->precharge_all, SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET); writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0); - writel(RW_MGR_ACTIVATE_0_AND_1_WAIT1, + writel(rwcfg->activate_0_and_1_wait1, &sdr_rw_load_jump_mgr_regs->load_jump_add0); writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1); - writel(RW_MGR_ACTIVATE_0_AND_1_WAIT2, + writel(rwcfg->activate_0_and_1_wait2, &sdr_rw_load_jump_mgr_regs->load_jump_add1); /* Activate rows. */ - writel(RW_MGR_ACTIVATE_0_AND_1, SDR_PHYGRP_RWMGRGRP_ADDRESS | + writel(rwcfg->activate_0_and_1, SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET); } } @@ -3232,7 +3158,7 @@ static void mem_init_latency(void) * so max latency in AFI clocks, used here, is correspondingly * smaller. */ - const u32 max_latency = (1 << MAX_LATENCY_COUNT_WIDTH) - 1; + const u32 max_latency = (1 << misccfg->max_latency_count_width) - 1; u32 rlat, wlat; debug("%s:%d\n", __func__, __LINE__); @@ -3267,24 +3193,23 @@ static void mem_init_latency(void) */ static void mem_skip_calibrate(void) { - uint32_t vfifo_offset; - uint32_t i, j, r; + u32 vfifo_offset; + u32 i, j, r; debug("%s:%d\n", __func__, __LINE__); /* Need to update every shadow register set used by the interface */ - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; + for (r = 0; r < rwcfg->mem_number_of_ranks; r += NUM_RANKS_PER_SHADOW_REG) { /* * Set output phase alignment settings appropriate for * skip calibration. */ - for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { + for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) { scc_mgr_set_dqs_en_phase(i, 0); -#if IO_DLL_CHAIN_LENGTH == 6 - scc_mgr_set_dqdqs_output_phase(i, 6); -#else - scc_mgr_set_dqdqs_output_phase(i, 7); -#endif + if (iocfg->dll_chain_length == 6) + scc_mgr_set_dqdqs_output_phase(i, 6); + else + scc_mgr_set_dqdqs_output_phase(i, 7); /* * Case:33398 * @@ -3303,20 +3228,20 @@ static void mem_skip_calibrate(void) * * Hence, to make DQS aligned to CK, we need to delay * DQS by: - * (720 - 90 - 180 - 2 * (360 / IO_DLL_CHAIN_LENGTH)) + * (720 - 90 - 180 - 2 * (360 / iocfg->dll_chain_length)) * - * Dividing the above by (360 / IO_DLL_CHAIN_LENGTH) + * Dividing the above by (360 / iocfg->dll_chain_length) * gives us the number of ptaps, which simplies to: * - * (1.25 * IO_DLL_CHAIN_LENGTH - 2) + * (1.25 * iocfg->dll_chain_length - 2) */ scc_mgr_set_dqdqs_output_phase(i, - 1.25 * IO_DLL_CHAIN_LENGTH - 2); + 1.25 * iocfg->dll_chain_length - 2); } writel(0xff, &sdr_scc_mgr->dqs_ena); writel(0xff, &sdr_scc_mgr->dqs_io_ena); - for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { + for (i = 0; i < rwcfg->mem_if_write_dqs_width; i++) { writel(i, SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_GROUP_COUNTER_OFFSET); } @@ -3326,7 +3251,7 @@ static void mem_skip_calibrate(void) } /* Compensate for simulation model behaviour */ - for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { + for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) { scc_mgr_set_dqs_bus_in_delay(i, 10); scc_mgr_load_dqs(i); } @@ -3336,7 +3261,7 @@ static void mem_skip_calibrate(void) * ArriaV has hard FIFOs that can only be initialized by incrementing * in sequencer. */ - vfifo_offset = CALIB_VFIFO_OFFSET; + vfifo_offset = misccfg->calib_vfifo_offset; for (j = 0; j < vfifo_offset; j++) writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy); writel(0, &phy_mgr_cmd->fifo_reset); @@ -3345,7 +3270,7 @@ static void mem_skip_calibrate(void) * For Arria V and Cyclone V with hard LFIFO, we get the skip-cal * setting from generation-time constant. */ - gbl->curr_read_lat = CALIB_LFIFO_OFFSET; + gbl->curr_read_lat = misccfg->calib_lfifo_offset; writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); } @@ -3354,18 +3279,18 @@ static void mem_skip_calibrate(void) * * Perform memory calibration. */ -static uint32_t mem_calibrate(void) +static u32 mem_calibrate(void) { - uint32_t i; - uint32_t rank_bgn, sr; - uint32_t write_group, write_test_bgn; - uint32_t read_group, read_test_bgn; - uint32_t run_groups, current_run; - uint32_t failing_groups = 0; - uint32_t group_failed = 0; + u32 i; + u32 rank_bgn, sr; + u32 write_group, write_test_bgn; + u32 read_group, read_test_bgn; + u32 run_groups, current_run; + u32 failing_groups = 0; + u32 group_failed = 0; - const u32 rwdqs_ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH / - RW_MGR_MEM_IF_WRITE_DQS_WIDTH; + const u32 rwdqs_ratio = rwcfg->mem_if_read_dqs_width / + rwcfg->mem_if_write_dqs_width; debug("%s:%d\n", __func__, __LINE__); @@ -3382,7 +3307,7 @@ static uint32_t mem_calibrate(void) /* Initialize bit slips. */ mem_precharge_and_activate(); - for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { + for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) { writel(i, SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_GROUP_COUNTER_OFFSET); /* Only needed once to set all groups, pins, DQ, DQS, DM. */ @@ -3416,11 +3341,11 @@ static uint32_t mem_calibrate(void) */ scc_mgr_zero_all(); - run_groups = ~param->skip_groups; + run_groups = ~0; for (write_group = 0, write_test_bgn = 0; write_group - < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; write_group++, - write_test_bgn += RW_MGR_MEM_DQ_PER_WRITE_DQS) { + < rwcfg->mem_if_write_dqs_width; write_group++, + write_test_bgn += rwcfg->mem_dq_per_write_dqs) { /* Initialize the group failure */ group_failed = 0; @@ -3441,7 +3366,7 @@ static uint32_t mem_calibrate(void) read_test_bgn = 0; read_group < (write_group + 1) * rwdqs_ratio; read_group++, - read_test_bgn += RW_MGR_MEM_DQ_PER_READ_DQS) { + read_test_bgn += rwcfg->mem_dq_per_read_dqs) { if (STATIC_CALIB_STEPS & CALIB_SKIP_VFIFO) continue; @@ -3459,7 +3384,7 @@ static uint32_t mem_calibrate(void) /* Calibrate the output side */ for (rank_bgn = 0, sr = 0; - rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; + rank_bgn < rwcfg->mem_number_of_ranks; rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) { if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES) continue; @@ -3468,15 +3393,8 @@ static uint32_t mem_calibrate(void) if (STATIC_CALIB_STEPS & CALIB_SKIP_DELAY_SWEEPS) continue; - /* - * Determine if this set of ranks - * should be skipped entirely. - */ - if (param->skip_shadow_regs[sr]) - continue; - /* Calibrate WRITEs */ - if (rw_mgr_mem_calibrate_writes(rank_bgn, + if (!rw_mgr_mem_calibrate_writes(rank_bgn, write_group, write_test_bgn)) continue; @@ -3493,11 +3411,11 @@ static uint32_t mem_calibrate(void) read_test_bgn = 0; read_group < (write_group + 1) * rwdqs_ratio; read_group++, - read_test_bgn += RW_MGR_MEM_DQ_PER_READ_DQS) { + read_test_bgn += rwcfg->mem_dq_per_read_dqs) { if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES) continue; - if (rw_mgr_mem_calibrate_vfifo_end(read_group, + if (!rw_mgr_mem_calibrate_vfifo_end(read_group, read_test_bgn)) continue; @@ -3525,13 +3443,6 @@ grp_failed: /* A group failed, increment the counter. */ if (STATIC_CALIB_STEPS & CALIB_SKIP_LFIFO) continue; - /* - * If we're skipping groups as part of debug, - * don't calibrate LFIFO. - */ - if (param->skip_groups != 0) - continue; - /* Calibrate the LFIFO */ if (!rw_mgr_mem_calibrate_lfifo()) return 0; @@ -3595,7 +3506,7 @@ static int run_mem_calibrate(void) */ static void debug_mem_calibrate(int pass) { - uint32_t debug_info; + u32 debug_info; if (pass) { printf("%s: CALIBRATION PASSED\n", __FILE__); @@ -3644,15 +3555,19 @@ static void debug_mem_calibrate(int pass) */ static void hc_initialize_rom_data(void) { + unsigned int nelem = 0; + const u32 *rom_init; u32 i, addr; + socfpga_get_seq_inst_init(&rom_init, &nelem); addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET; - for (i = 0; i < ARRAY_SIZE(inst_rom_init); i++) - writel(inst_rom_init[i], addr + (i << 2)); + for (i = 0; i < nelem; i++) + writel(rom_init[i], addr + (i << 2)); + socfpga_get_seq_ac_init(&rom_init, &nelem); addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET; - for (i = 0; i < ARRAY_SIZE(ac_rom_init); i++) - writel(ac_rom_init[i], addr + (i << 2)); + for (i = 0; i < nelem; i++) + writel(rom_init[i], addr + (i << 2)); } /** @@ -3663,7 +3578,7 @@ static void hc_initialize_rom_data(void) static void initialize_reg_file(void) { /* Initialize the register file with the correct data */ - writel(REG_FILE_INIT_SEQ_SIGNATURE, &sdr_reg_file->signature); + writel(misccfg->reg_file_init_seq_signature, &sdr_reg_file->signature); writel(0, &sdr_reg_file->debug_data_addr); writel(0, &sdr_reg_file->cur_stage); writel(0, &sdr_reg_file->fom); @@ -3679,13 +3594,13 @@ static void initialize_reg_file(void) */ static void initialize_hps_phy(void) { - uint32_t reg; + u32 reg; /* * Tracking also gets configured here because it's in the * same register. */ - uint32_t trk_sample_count = 7500; - uint32_t trk_long_idle_sample_count = (10 << 16) | 100; + u32 trk_sample_count = 7500; + u32 trk_long_idle_sample_count = (10 << 16) | 100; /* * Format is number of outer loops in the 16 MSB, sample * count in 16 LSB. @@ -3734,7 +3649,7 @@ static void initialize_tracking(void) * Compute usable version of value in case we skip full * computation later. */ - writel(DIV_ROUND_UP(IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP) - 1, + writel(DIV_ROUND_UP(iocfg->delay_per_opa_tap, iocfg->delay_per_dchain_tap) - 1, &sdr_reg_file->dtaps_per_ptap); /* trk_sample_count */ @@ -3753,15 +3668,15 @@ static void initialize_tracking(void) &sdr_reg_file->delays); /* mux delay */ - writel((RW_MGR_IDLE << 24) | (RW_MGR_ACTIVATE_1 << 16) | - (RW_MGR_SGLE_READ << 8) | (RW_MGR_PRECHARGE_ALL << 0), + writel((rwcfg->idle << 24) | (rwcfg->activate_1 << 16) | + (rwcfg->sgle_read << 8) | (rwcfg->precharge_all << 0), &sdr_reg_file->trk_rw_mgr_addr); - writel(RW_MGR_MEM_IF_READ_DQS_WIDTH, + writel(rwcfg->mem_if_read_dqs_width, &sdr_reg_file->trk_read_dqs_width); /* trefi [7:0] */ - writel((RW_MGR_REFRESH_ALL << 24) | (1000 << 0), + writel((rwcfg->refresh_all << 24) | (1000 << 0), &sdr_reg_file->trk_rfsh); } @@ -3769,7 +3684,7 @@ int sdram_calibration_full(void) { struct param_type my_param; struct gbl_type my_gbl; - uint32_t pass; + u32 pass; memset(&my_param, 0, sizeof(my_param)); memset(&my_gbl, 0, sizeof(my_gbl)); @@ -3777,6 +3692,10 @@ int sdram_calibration_full(void) param = &my_param; gbl = &my_gbl; + rwcfg = socfpga_get_sdram_rwmgr_config(); + iocfg = socfpga_get_sdram_io_config(); + misccfg = socfpga_get_sdram_misc_config(); + /* Set the calibration enabled by default */ gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT; /* @@ -3801,25 +3720,25 @@ int sdram_calibration_full(void) debug("%s:%d\n", __func__, __LINE__); debug_cond(DLEVEL == 1, "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ", - RW_MGR_MEM_NUMBER_OF_RANKS, RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM, - RW_MGR_MEM_DQ_PER_READ_DQS, RW_MGR_MEM_DQ_PER_WRITE_DQS, - RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS, - RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS); + rwcfg->mem_number_of_ranks, rwcfg->mem_number_of_cs_per_dimm, + rwcfg->mem_dq_per_read_dqs, rwcfg->mem_dq_per_write_dqs, + rwcfg->mem_virtual_groups_per_read_dqs, + rwcfg->mem_virtual_groups_per_write_dqs); debug_cond(DLEVEL == 1, "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ", - RW_MGR_MEM_IF_READ_DQS_WIDTH, RW_MGR_MEM_IF_WRITE_DQS_WIDTH, - RW_MGR_MEM_DATA_WIDTH, RW_MGR_MEM_DATA_MASK_WIDTH, - IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP); + rwcfg->mem_if_read_dqs_width, rwcfg->mem_if_write_dqs_width, + rwcfg->mem_data_width, rwcfg->mem_data_mask_width, + iocfg->delay_per_opa_tap, iocfg->delay_per_dchain_tap); debug_cond(DLEVEL == 1, "dtap_dqsen_delay=%u, dll=%u", - IO_DELAY_PER_DQS_EN_DCHAIN_TAP, IO_DLL_CHAIN_LENGTH); + iocfg->delay_per_dqs_en_dchain_tap, iocfg->dll_chain_length); debug_cond(DLEVEL == 1, "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ", - IO_DQS_EN_PHASE_MAX, IO_DQDQS_OUT_PHASE_MAX, - IO_DQS_EN_DELAY_MAX, IO_DQS_IN_DELAY_MAX); + iocfg->dqs_en_phase_max, iocfg->dqdqs_out_phase_max, + iocfg->dqs_en_delay_max, iocfg->dqs_in_delay_max); debug_cond(DLEVEL == 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ", - IO_IO_IN_DELAY_MAX, IO_IO_OUT1_DELAY_MAX, - IO_IO_OUT2_DELAY_MAX); + iocfg->io_in_delay_max, iocfg->io_out1_delay_max, + iocfg->io_out2_delay_max); debug_cond(DLEVEL == 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n", - IO_DQS_IN_RESERVE, IO_DQS_OUT_RESERVE); + iocfg->dqs_in_reserve, iocfg->dqs_out_reserve); hc_initialize_rom_data();