]> git.kernelconcepts.de Git - karo-tx-uboot.git/blob - drivers/ddr/altera/sequencer.c
ddr: altera: Clean up run_mem_calibrate()
[karo-tx-uboot.git] / drivers / ddr / altera / sequencer.c
1 /*
2  * Copyright Altera Corporation (C) 2012-2015
3  *
4  * SPDX-License-Identifier:    BSD-3-Clause
5  */
6
7 #include <common.h>
8 #include <asm/io.h>
9 #include <asm/arch/sdram.h>
10 #include "sequencer.h"
11 #include "sequencer_auto.h"
12 #include "sequencer_auto_ac_init.h"
13 #include "sequencer_auto_inst_init.h"
14 #include "sequencer_defines.h"
15
16 static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs =
17         (struct socfpga_sdr_rw_load_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800);
18
19 static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs =
20         (struct socfpga_sdr_rw_load_jump_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00);
21
22 static struct socfpga_sdr_reg_file *sdr_reg_file =
23         (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS;
24
25 static struct socfpga_sdr_scc_mgr *sdr_scc_mgr =
26         (struct socfpga_sdr_scc_mgr *)(SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00);
27
28 static struct socfpga_phy_mgr_cmd *phy_mgr_cmd =
29         (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS;
30
31 static struct socfpga_phy_mgr_cfg *phy_mgr_cfg =
32         (struct socfpga_phy_mgr_cfg *)(SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40);
33
34 static struct socfpga_data_mgr *data_mgr =
35         (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS;
36
37 static struct socfpga_sdr_ctrl *sdr_ctrl =
38         (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS;
39
40 #define DELTA_D         1
41
42 /*
43  * In order to reduce ROM size, most of the selectable calibration steps are
44  * decided at compile time based on the user's calibration mode selection,
45  * as captured by the STATIC_CALIB_STEPS selection below.
46  *
47  * However, to support simulation-time selection of fast simulation mode, where
48  * we skip everything except the bare minimum, we need a few of the steps to
49  * be dynamic.  In those cases, we either use the DYNAMIC_CALIB_STEPS for the
50  * check, which is based on the rtl-supplied value, or we dynamically compute
51  * the value to use based on the dynamically-chosen calibration mode
52  */
53
54 #define DLEVEL 0
55 #define STATIC_IN_RTL_SIM 0
56 #define STATIC_SKIP_DELAY_LOOPS 0
57
58 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \
59         STATIC_SKIP_DELAY_LOOPS)
60
61 /* calibration steps requested by the rtl */
62 uint16_t dyn_calib_steps;
63
64 /*
65  * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option
66  * instead of static, we use boolean logic to select between
67  * non-skip and skip values
68  *
69  * The mask is set to include all bits when not-skipping, but is
70  * zero when skipping
71  */
72
73 uint16_t skip_delay_mask;       /* mask off bits when skipping/not-skipping */
74
75 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \
76         ((non_skip_value) & skip_delay_mask)
77
78 struct gbl_type *gbl;
79 struct param_type *param;
80 uint32_t curr_shadow_reg;
81
82 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn,
83         uint32_t write_group, uint32_t use_dm,
84         uint32_t all_correct, uint32_t *bit_chk, uint32_t all_ranks);
85
86 static void set_failing_group_stage(uint32_t group, uint32_t stage,
87         uint32_t substage)
88 {
89         /*
90          * Only set the global stage if there was not been any other
91          * failing group
92          */
93         if (gbl->error_stage == CAL_STAGE_NIL)  {
94                 gbl->error_substage = substage;
95                 gbl->error_stage = stage;
96                 gbl->error_group = group;
97         }
98 }
99
100 static void reg_file_set_group(u16 set_group)
101 {
102         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16);
103 }
104
105 static void reg_file_set_stage(u8 set_stage)
106 {
107         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff);
108 }
109
110 static void reg_file_set_sub_stage(u8 set_sub_stage)
111 {
112         set_sub_stage &= 0xff;
113         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8);
114 }
115
116 static void phy_mgr_initialize(void)
117 {
118         debug("%s:%d\n", __func__, __LINE__);
119         /* USER calibration has control over path to memory */
120         /*
121          * In Hard PHY this is a 2-bit control:
122          * 0: AFI Mux Select
123          * 1: DDIO Mux Select
124          */
125         writel(0x3, &phy_mgr_cfg->mux_sel);
126
127         /* USER memory clock is not stable we begin initialization  */
128         writel(0, &phy_mgr_cfg->reset_mem_stbl);
129
130         /* USER calibration status all set to zero */
131         writel(0, &phy_mgr_cfg->cal_status);
132
133         writel(0, &phy_mgr_cfg->cal_debug_info);
134
135         if ((dyn_calib_steps & CALIB_SKIP_ALL) != CALIB_SKIP_ALL) {
136                 param->read_correct_mask_vg  = ((uint32_t)1 <<
137                         (RW_MGR_MEM_DQ_PER_READ_DQS /
138                         RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1;
139                 param->write_correct_mask_vg = ((uint32_t)1 <<
140                         (RW_MGR_MEM_DQ_PER_READ_DQS /
141                         RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1;
142                 param->read_correct_mask     = ((uint32_t)1 <<
143                         RW_MGR_MEM_DQ_PER_READ_DQS) - 1;
144                 param->write_correct_mask    = ((uint32_t)1 <<
145                         RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1;
146                 param->dm_correct_mask       = ((uint32_t)1 <<
147                         (RW_MGR_MEM_DATA_WIDTH / RW_MGR_MEM_DATA_MASK_WIDTH))
148                         - 1;
149         }
150 }
151
152 static void set_rank_and_odt_mask(uint32_t rank, uint32_t odt_mode)
153 {
154         uint32_t odt_mask_0 = 0;
155         uint32_t odt_mask_1 = 0;
156         uint32_t cs_and_odt_mask;
157
158         if (odt_mode == RW_MGR_ODT_MODE_READ_WRITE) {
159                 if (RW_MGR_MEM_NUMBER_OF_RANKS == 1) {
160                         /*
161                          * 1 Rank
162                          * Read: ODT = 0
163                          * Write: ODT = 1
164                          */
165                         odt_mask_0 = 0x0;
166                         odt_mask_1 = 0x1;
167                 } else if (RW_MGR_MEM_NUMBER_OF_RANKS == 2) {
168                         /* 2 Ranks */
169                         if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1) {
170                                 /* - Dual-Slot , Single-Rank
171                                  * (1 chip-select per DIMM)
172                                  * OR
173                                  * - RDIMM, 4 total CS (2 CS per DIMM)
174                                  * means 2 DIMM
175                                  * Since MEM_NUMBER_OF_RANKS is 2 they are
176                                  * both single rank
177                                  * with 2 CS each (special for RDIMM)
178                                  * Read: Turn on ODT on the opposite rank
179                                  * Write: Turn on ODT on all ranks
180                                  */
181                                 odt_mask_0 = 0x3 & ~(1 << rank);
182                                 odt_mask_1 = 0x3;
183                         } else {
184                                 /*
185                                  * USER - Single-Slot , Dual-rank DIMMs
186                                  * (2 chip-selects per DIMM)
187                                  * USER Read: Turn on ODT off on all ranks
188                                  * USER Write: Turn on ODT on active rank
189                                  */
190                                 odt_mask_0 = 0x0;
191                                 odt_mask_1 = 0x3 & (1 << rank);
192                         }
193                 } else {
194                         /* 4 Ranks
195                          * Read:
196                          * ----------+-----------------------+
197                          *           |                       |
198                          *           |         ODT           |
199                          * Read From +-----------------------+
200                          *   Rank    |  3  |  2  |  1  |  0  |
201                          * ----------+-----+-----+-----+-----+
202                          *     0     |  0  |  1  |  0  |  0  |
203                          *     1     |  1  |  0  |  0  |  0  |
204                          *     2     |  0  |  0  |  0  |  1  |
205                          *     3     |  0  |  0  |  1  |  0  |
206                          * ----------+-----+-----+-----+-----+
207                          *
208                          * Write:
209                          * ----------+-----------------------+
210                          *           |                       |
211                          *           |         ODT           |
212                          * Write To  +-----------------------+
213                          *   Rank    |  3  |  2  |  1  |  0  |
214                          * ----------+-----+-----+-----+-----+
215                          *     0     |  0  |  1  |  0  |  1  |
216                          *     1     |  1  |  0  |  1  |  0  |
217                          *     2     |  0  |  1  |  0  |  1  |
218                          *     3     |  1  |  0  |  1  |  0  |
219                          * ----------+-----+-----+-----+-----+
220                          */
221                         switch (rank) {
222                         case 0:
223                                 odt_mask_0 = 0x4;
224                                 odt_mask_1 = 0x5;
225                                 break;
226                         case 1:
227                                 odt_mask_0 = 0x8;
228                                 odt_mask_1 = 0xA;
229                                 break;
230                         case 2:
231                                 odt_mask_0 = 0x1;
232                                 odt_mask_1 = 0x5;
233                                 break;
234                         case 3:
235                                 odt_mask_0 = 0x2;
236                                 odt_mask_1 = 0xA;
237                                 break;
238                         }
239                 }
240         } else {
241                 odt_mask_0 = 0x0;
242                 odt_mask_1 = 0x0;
243         }
244
245         cs_and_odt_mask =
246                 (0xFF & ~(1 << rank)) |
247                 ((0xFF & odt_mask_0) << 8) |
248                 ((0xFF & odt_mask_1) << 16);
249         writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS |
250                                 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
251 }
252
253 /**
254  * scc_mgr_set() - Set SCC Manager register
255  * @off:        Base offset in SCC Manager space
256  * @grp:        Read/Write group
257  * @val:        Value to be set
258  *
259  * This function sets the SCC Manager (Scan Chain Control Manager) register.
260  */
261 static void scc_mgr_set(u32 off, u32 grp, u32 val)
262 {
263         writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2));
264 }
265
266 /**
267  * scc_mgr_initialize() - Initialize SCC Manager registers
268  *
269  * Initialize SCC Manager registers.
270  */
271 static void scc_mgr_initialize(void)
272 {
273         /*
274          * Clear register file for HPS. 16 (2^4) is the size of the
275          * full register file in the scc mgr:
276          *      RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS +
277          *                             MEM_IF_READ_DQS_WIDTH - 1);
278          */
279         int i;
280
281         for (i = 0; i < 16; i++) {
282                 debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u\n",
283                            __func__, __LINE__, i);
284                 scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, 0, i);
285         }
286 }
287
288 static void scc_mgr_set_dqdqs_output_phase(uint32_t write_group, uint32_t phase)
289 {
290         scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase);
291 }
292
293 static void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, uint32_t delay)
294 {
295         scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay);
296 }
297
298 static void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase)
299 {
300         scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase);
301 }
302
303 static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay)
304 {
305         scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay);
306 }
307
308 static void scc_mgr_set_dqs_io_in_delay(uint32_t delay)
309 {
310         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
311                     delay);
312 }
313
314 static void scc_mgr_set_dq_in_delay(uint32_t dq_in_group, uint32_t delay)
315 {
316         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay);
317 }
318
319 static void scc_mgr_set_dq_out1_delay(uint32_t dq_in_group, uint32_t delay)
320 {
321         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay);
322 }
323
324 static void scc_mgr_set_dqs_out1_delay(uint32_t delay)
325 {
326         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
327                     delay);
328 }
329
330 static void scc_mgr_set_dm_out1_delay(uint32_t dm, uint32_t delay)
331 {
332         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET,
333                     RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + dm,
334                     delay);
335 }
336
337 /* load up dqs config settings */
338 static void scc_mgr_load_dqs(uint32_t dqs)
339 {
340         writel(dqs, &sdr_scc_mgr->dqs_ena);
341 }
342
343 /* load up dqs io config settings */
344 static void scc_mgr_load_dqs_io(void)
345 {
346         writel(0, &sdr_scc_mgr->dqs_io_ena);
347 }
348
349 /* load up dq config settings */
350 static void scc_mgr_load_dq(uint32_t dq_in_group)
351 {
352         writel(dq_in_group, &sdr_scc_mgr->dq_ena);
353 }
354
355 /* load up dm config settings */
356 static void scc_mgr_load_dm(uint32_t dm)
357 {
358         writel(dm, &sdr_scc_mgr->dm_ena);
359 }
360
361 /**
362  * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks
363  * @off:        Base offset in SCC Manager space
364  * @grp:        Read/Write group
365  * @val:        Value to be set
366  * @update:     If non-zero, trigger SCC Manager update for all ranks
367  *
368  * This function sets the SCC Manager (Scan Chain Control Manager) register
369  * and optionally triggers the SCC update for all ranks.
370  */
371 static void scc_mgr_set_all_ranks(const u32 off, const u32 grp, const u32 val,
372                                   const int update)
373 {
374         u32 r;
375
376         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
377              r += NUM_RANKS_PER_SHADOW_REG) {
378                 scc_mgr_set(off, grp, val);
379
380                 if (update || (r == 0)) {
381                         writel(grp, &sdr_scc_mgr->dqs_ena);
382                         writel(0, &sdr_scc_mgr->update);
383                 }
384         }
385 }
386
387 static void scc_mgr_set_dqs_en_phase_all_ranks(u32 read_group, u32 phase)
388 {
389         /*
390          * USER although the h/w doesn't support different phases per
391          * shadow register, for simplicity our scc manager modeling
392          * keeps different phase settings per shadow reg, and it's
393          * important for us to keep them in sync to match h/w.
394          * for efficiency, the scan chain update should occur only
395          * once to sr0.
396          */
397         scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_PHASE_OFFSET,
398                               read_group, phase, 0);
399 }
400
401 static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group,
402                                                      uint32_t phase)
403 {
404         /*
405          * USER although the h/w doesn't support different phases per
406          * shadow register, for simplicity our scc manager modeling
407          * keeps different phase settings per shadow reg, and it's
408          * important for us to keep them in sync to match h/w.
409          * for efficiency, the scan chain update should occur only
410          * once to sr0.
411          */
412         scc_mgr_set_all_ranks(SCC_MGR_DQDQS_OUT_PHASE_OFFSET,
413                               write_group, phase, 0);
414 }
415
416 static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group,
417                                                uint32_t delay)
418 {
419         /*
420          * In shadow register mode, the T11 settings are stored in
421          * registers in the core, which are updated by the DQS_ENA
422          * signals. Not issuing the SCC_MGR_UPD command allows us to
423          * save lots of rank switching overhead, by calling
424          * select_shadow_regs_for_update with update_scan_chains
425          * set to 0.
426          */
427         scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_DELAY_OFFSET,
428                               read_group, delay, 1);
429         writel(0, &sdr_scc_mgr->update);
430 }
431
432 /**
433  * scc_mgr_set_oct_out1_delay() - Set OCT output delay
434  * @write_group:        Write group
435  * @delay:              Delay value
436  *
437  * This function sets the OCT output delay in SCC manager.
438  */
439 static void scc_mgr_set_oct_out1_delay(const u32 write_group, const u32 delay)
440 {
441         const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
442                           RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
443         const int base = write_group * ratio;
444         int i;
445         /*
446          * Load the setting in the SCC manager
447          * Although OCT affects only write data, the OCT delay is controlled
448          * by the DQS logic block which is instantiated once per read group.
449          * For protocols where a write group consists of multiple read groups,
450          * the setting must be set multiple times.
451          */
452         for (i = 0; i < ratio; i++)
453                 scc_mgr_set(SCC_MGR_OCT_OUT1_DELAY_OFFSET, base + i, delay);
454 }
455
456 /**
457  * scc_mgr_set_hhp_extras() - Set HHP extras.
458  *
459  * Load the fixed setting in the SCC manager HHP extras.
460  */
461 static void scc_mgr_set_hhp_extras(void)
462 {
463         /*
464          * Load the fixed setting in the SCC manager
465          * bits: 0:0 = 1'b1     - DQS bypass
466          * bits: 1:1 = 1'b1     - DQ bypass
467          * bits: 4:2 = 3'b001   - rfifo_mode
468          * bits: 6:5 = 2'b01    - rfifo clock_select
469          * bits: 7:7 = 1'b0     - separate gating from ungating setting
470          * bits: 8:8 = 1'b0     - separate OE from Output delay setting
471          */
472         const u32 value = (0 << 8) | (0 << 7) | (1 << 5) |
473                           (1 << 2) | (1 << 1) | (1 << 0);
474         const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS |
475                          SCC_MGR_HHP_GLOBALS_OFFSET |
476                          SCC_MGR_HHP_EXTRAS_OFFSET;
477
478         debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n",
479                    __func__, __LINE__);
480         writel(value, addr);
481         debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n",
482                    __func__, __LINE__);
483 }
484
485 /**
486  * scc_mgr_zero_all() - Zero all DQS config
487  *
488  * Zero all DQS config.
489  */
490 static void scc_mgr_zero_all(void)
491 {
492         int i, r;
493
494         /*
495          * USER Zero all DQS config settings, across all groups and all
496          * shadow registers
497          */
498         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
499              r += NUM_RANKS_PER_SHADOW_REG) {
500                 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
501                         /*
502                          * The phases actually don't exist on a per-rank basis,
503                          * but there's no harm updating them several times, so
504                          * let's keep the code simple.
505                          */
506                         scc_mgr_set_dqs_bus_in_delay(i, IO_DQS_IN_RESERVE);
507                         scc_mgr_set_dqs_en_phase(i, 0);
508                         scc_mgr_set_dqs_en_delay(i, 0);
509                 }
510
511                 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
512                         scc_mgr_set_dqdqs_output_phase(i, 0);
513                         /* Arria V/Cyclone V don't have out2. */
514                         scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE);
515                 }
516         }
517
518         /* Multicast to all DQS group enables. */
519         writel(0xff, &sdr_scc_mgr->dqs_ena);
520         writel(0, &sdr_scc_mgr->update);
521 }
522
523 /**
524  * scc_set_bypass_mode() - Set bypass mode and trigger SCC update
525  * @write_group:        Write group
526  *
527  * Set bypass mode and trigger SCC update.
528  */
529 static void scc_set_bypass_mode(const u32 write_group)
530 {
531         /* Multicast to all DQ enables. */
532         writel(0xff, &sdr_scc_mgr->dq_ena);
533         writel(0xff, &sdr_scc_mgr->dm_ena);
534
535         /* Update current DQS IO enable. */
536         writel(0, &sdr_scc_mgr->dqs_io_ena);
537
538         /* Update the DQS logic. */
539         writel(write_group, &sdr_scc_mgr->dqs_ena);
540
541         /* Hit update. */
542         writel(0, &sdr_scc_mgr->update);
543 }
544
545 /**
546  * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group
547  * @write_group:        Write group
548  *
549  * Load DQS settings for Write Group, do not trigger SCC update.
550  */
551 static void scc_mgr_load_dqs_for_write_group(const u32 write_group)
552 {
553         const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
554                           RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
555         const int base = write_group * ratio;
556         int i;
557         /*
558          * Load the setting in the SCC manager
559          * Although OCT affects only write data, the OCT delay is controlled
560          * by the DQS logic block which is instantiated once per read group.
561          * For protocols where a write group consists of multiple read groups,
562          * the setting must be set multiple times.
563          */
564         for (i = 0; i < ratio; i++)
565                 writel(base + i, &sdr_scc_mgr->dqs_ena);
566 }
567
568 /**
569  * scc_mgr_zero_group() - Zero all configs for a group
570  *
571  * Zero DQ, DM, DQS and OCT configs for a group.
572  */
573 static void scc_mgr_zero_group(const u32 write_group, const int out_only)
574 {
575         int i, r;
576
577         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
578              r += NUM_RANKS_PER_SHADOW_REG) {
579                 /* Zero all DQ config settings. */
580                 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
581                         scc_mgr_set_dq_out1_delay(i, 0);
582                         if (!out_only)
583                                 scc_mgr_set_dq_in_delay(i, 0);
584                 }
585
586                 /* Multicast to all DQ enables. */
587                 writel(0xff, &sdr_scc_mgr->dq_ena);
588
589                 /* Zero all DM config settings. */
590                 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
591                         scc_mgr_set_dm_out1_delay(i, 0);
592
593                 /* Multicast to all DM enables. */
594                 writel(0xff, &sdr_scc_mgr->dm_ena);
595
596                 /* Zero all DQS IO settings. */
597                 if (!out_only)
598                         scc_mgr_set_dqs_io_in_delay(0);
599
600                 /* Arria V/Cyclone V don't have out2. */
601                 scc_mgr_set_dqs_out1_delay(IO_DQS_OUT_RESERVE);
602                 scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE);
603                 scc_mgr_load_dqs_for_write_group(write_group);
604
605                 /* Multicast to all DQS IO enables (only 1 in total). */
606                 writel(0, &sdr_scc_mgr->dqs_io_ena);
607
608                 /* Hit update to zero everything. */
609                 writel(0, &sdr_scc_mgr->update);
610         }
611 }
612
613 /*
614  * apply and load a particular input delay for the DQ pins in a group
615  * group_bgn is the index of the first dq pin (in the write group)
616  */
617 static void scc_mgr_apply_group_dq_in_delay(uint32_t group_bgn, uint32_t delay)
618 {
619         uint32_t i, p;
620
621         for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) {
622                 scc_mgr_set_dq_in_delay(p, delay);
623                 scc_mgr_load_dq(p);
624         }
625 }
626
627 /**
628  * scc_mgr_apply_group_dq_out1_delay() - Apply and load an output delay for the DQ pins in a group
629  * @delay:              Delay value
630  *
631  * Apply and load a particular output delay for the DQ pins in a group.
632  */
633 static void scc_mgr_apply_group_dq_out1_delay(const u32 delay)
634 {
635         int i;
636
637         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
638                 scc_mgr_set_dq_out1_delay(i, delay);
639                 scc_mgr_load_dq(i);
640         }
641 }
642
643 /* apply and load a particular output delay for the DM pins in a group */
644 static void scc_mgr_apply_group_dm_out1_delay(uint32_t delay1)
645 {
646         uint32_t i;
647
648         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
649                 scc_mgr_set_dm_out1_delay(i, delay1);
650                 scc_mgr_load_dm(i);
651         }
652 }
653
654
655 /* apply and load delay on both DQS and OCT out1 */
656 static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group,
657                                                     uint32_t delay)
658 {
659         scc_mgr_set_dqs_out1_delay(delay);
660         scc_mgr_load_dqs_io();
661
662         scc_mgr_set_oct_out1_delay(write_group, delay);
663         scc_mgr_load_dqs_for_write_group(write_group);
664 }
665
666 /**
667  * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side: DQ, DM, DQS, OCT
668  * @write_group:        Write group
669  * @delay:              Delay value
670  *
671  * Apply a delay to the entire output side: DQ, DM, DQS, OCT.
672  */
673 static void scc_mgr_apply_group_all_out_delay_add(const u32 write_group,
674                                                   const u32 delay)
675 {
676         u32 i, new_delay;
677
678         /* DQ shift */
679         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++)
680                 scc_mgr_load_dq(i);
681
682         /* DM shift */
683         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
684                 scc_mgr_load_dm(i);
685
686         /* DQS shift */
687         new_delay = READ_SCC_DQS_IO_OUT2_DELAY + delay;
688         if (new_delay > IO_IO_OUT2_DELAY_MAX) {
689                 debug_cond(DLEVEL == 1,
690                            "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
691                            __func__, __LINE__, write_group, delay, new_delay,
692                            IO_IO_OUT2_DELAY_MAX,
693                            new_delay - IO_IO_OUT2_DELAY_MAX);
694                 new_delay -= IO_IO_OUT2_DELAY_MAX;
695                 scc_mgr_set_dqs_out1_delay(new_delay);
696         }
697
698         scc_mgr_load_dqs_io();
699
700         /* OCT shift */
701         new_delay = READ_SCC_OCT_OUT2_DELAY + delay;
702         if (new_delay > IO_IO_OUT2_DELAY_MAX) {
703                 debug_cond(DLEVEL == 1,
704                            "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
705                            __func__, __LINE__, write_group, delay,
706                            new_delay, IO_IO_OUT2_DELAY_MAX,
707                            new_delay - IO_IO_OUT2_DELAY_MAX);
708                 new_delay -= IO_IO_OUT2_DELAY_MAX;
709                 scc_mgr_set_oct_out1_delay(write_group, new_delay);
710         }
711
712         scc_mgr_load_dqs_for_write_group(write_group);
713 }
714
715 /**
716  * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side to all ranks
717  * @write_group:        Write group
718  * @delay:              Delay value
719  *
720  * Apply a delay to the entire output side (DQ, DM, DQS, OCT) to all ranks.
721  */
722 static void
723 scc_mgr_apply_group_all_out_delay_add_all_ranks(const u32 write_group,
724                                                 const u32 delay)
725 {
726         int r;
727
728         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
729              r += NUM_RANKS_PER_SHADOW_REG) {
730                 scc_mgr_apply_group_all_out_delay_add(write_group, delay);
731                 writel(0, &sdr_scc_mgr->update);
732         }
733 }
734
735 /**
736  * set_jump_as_return() - Return instruction optimization
737  *
738  * Optimization used to recover some slots in ddr3 inst_rom could be
739  * applied to other protocols if we wanted to
740  */
741 static void set_jump_as_return(void)
742 {
743         /*
744          * To save space, we replace return with jump to special shared
745          * RETURN instruction so we set the counter to large value so that
746          * we always jump.
747          */
748         writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0);
749         writel(RW_MGR_RETURN, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
750 }
751
752 /*
753  * should always use constants as argument to ensure all computations are
754  * performed at compile time
755  */
756 static void delay_for_n_mem_clocks(const uint32_t clocks)
757 {
758         uint32_t afi_clocks;
759         uint8_t inner = 0;
760         uint8_t outer = 0;
761         uint16_t c_loop = 0;
762
763         debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks);
764
765
766         afi_clocks = (clocks + AFI_RATE_RATIO-1) / AFI_RATE_RATIO;
767         /* scale (rounding up) to get afi clocks */
768
769         /*
770          * Note, we don't bother accounting for being off a little bit
771          * because of a few extra instructions in outer loops
772          * Note, the loops have a test at the end, and do the test before
773          * the decrement, and so always perform the loop
774          * 1 time more than the counter value
775          */
776         if (afi_clocks == 0) {
777                 ;
778         } else if (afi_clocks <= 0x100) {
779                 inner = afi_clocks-1;
780                 outer = 0;
781                 c_loop = 0;
782         } else if (afi_clocks <= 0x10000) {
783                 inner = 0xff;
784                 outer = (afi_clocks-1) >> 8;
785                 c_loop = 0;
786         } else {
787                 inner = 0xff;
788                 outer = 0xff;
789                 c_loop = (afi_clocks-1) >> 16;
790         }
791
792         /*
793          * rom instructions are structured as follows:
794          *
795          *    IDLE_LOOP2: jnz cntr0, TARGET_A
796          *    IDLE_LOOP1: jnz cntr1, TARGET_B
797          *                return
798          *
799          * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and
800          * TARGET_B is set to IDLE_LOOP2 as well
801          *
802          * if we have no outer loop, though, then we can use IDLE_LOOP1 only,
803          * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely
804          *
805          * a little confusing, but it helps save precious space in the inst_rom
806          * and sequencer rom and keeps the delays more accurate and reduces
807          * overhead
808          */
809         if (afi_clocks <= 0x100) {
810                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
811                         &sdr_rw_load_mgr_regs->load_cntr1);
812
813                 writel(RW_MGR_IDLE_LOOP1,
814                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
815
816                 writel(RW_MGR_IDLE_LOOP1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
817                                           RW_MGR_RUN_SINGLE_GROUP_OFFSET);
818         } else {
819                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
820                         &sdr_rw_load_mgr_regs->load_cntr0);
821
822                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer),
823                         &sdr_rw_load_mgr_regs->load_cntr1);
824
825                 writel(RW_MGR_IDLE_LOOP2,
826                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
827
828                 writel(RW_MGR_IDLE_LOOP2,
829                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
830
831                 /* hack to get around compiler not being smart enough */
832                 if (afi_clocks <= 0x10000) {
833                         /* only need to run once */
834                         writel(RW_MGR_IDLE_LOOP2, SDR_PHYGRP_RWMGRGRP_ADDRESS |
835                                                   RW_MGR_RUN_SINGLE_GROUP_OFFSET);
836                 } else {
837                         do {
838                                 writel(RW_MGR_IDLE_LOOP2,
839                                         SDR_PHYGRP_RWMGRGRP_ADDRESS |
840                                         RW_MGR_RUN_SINGLE_GROUP_OFFSET);
841                         } while (c_loop-- != 0);
842                 }
843         }
844         debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks);
845 }
846
847 /**
848  * rw_mgr_mem_init_load_regs() - Load instruction registers
849  * @cntr0:      Counter 0 value
850  * @cntr1:      Counter 1 value
851  * @cntr2:      Counter 2 value
852  * @jump:       Jump instruction value
853  *
854  * Load instruction registers.
855  */
856 static void rw_mgr_mem_init_load_regs(u32 cntr0, u32 cntr1, u32 cntr2, u32 jump)
857 {
858         uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
859                            RW_MGR_RUN_SINGLE_GROUP_OFFSET;
860
861         /* Load counters */
862         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr0),
863                &sdr_rw_load_mgr_regs->load_cntr0);
864         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr1),
865                &sdr_rw_load_mgr_regs->load_cntr1);
866         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr2),
867                &sdr_rw_load_mgr_regs->load_cntr2);
868
869         /* Load jump address */
870         writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
871         writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add1);
872         writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
873
874         /* Execute count instruction */
875         writel(jump, grpaddr);
876 }
877
878 /**
879  * rw_mgr_mem_load_user() - Load user calibration values
880  * @fin1:       Final instruction 1
881  * @fin2:       Final instruction 2
882  * @precharge:  If 1, precharge the banks at the end
883  *
884  * Load user calibration values and optionally precharge the banks.
885  */
886 static void rw_mgr_mem_load_user(const u32 fin1, const u32 fin2,
887                                  const int precharge)
888 {
889         u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
890                       RW_MGR_RUN_SINGLE_GROUP_OFFSET;
891         u32 r;
892
893         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
894                 if (param->skip_ranks[r]) {
895                         /* request to skip the rank */
896                         continue;
897                 }
898
899                 /* set rank */
900                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
901
902                 /* precharge all banks ... */
903                 if (precharge)
904                         writel(RW_MGR_PRECHARGE_ALL, grpaddr);
905
906                 /*
907                  * USER Use Mirror-ed commands for odd ranks if address
908                  * mirrorring is on
909                  */
910                 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) {
911                         set_jump_as_return();
912                         writel(RW_MGR_MRS2_MIRR, grpaddr);
913                         delay_for_n_mem_clocks(4);
914                         set_jump_as_return();
915                         writel(RW_MGR_MRS3_MIRR, grpaddr);
916                         delay_for_n_mem_clocks(4);
917                         set_jump_as_return();
918                         writel(RW_MGR_MRS1_MIRR, grpaddr);
919                         delay_for_n_mem_clocks(4);
920                         set_jump_as_return();
921                         writel(fin1, grpaddr);
922                 } else {
923                         set_jump_as_return();
924                         writel(RW_MGR_MRS2, grpaddr);
925                         delay_for_n_mem_clocks(4);
926                         set_jump_as_return();
927                         writel(RW_MGR_MRS3, grpaddr);
928                         delay_for_n_mem_clocks(4);
929                         set_jump_as_return();
930                         writel(RW_MGR_MRS1, grpaddr);
931                         set_jump_as_return();
932                         writel(fin2, grpaddr);
933                 }
934
935                 if (precharge)
936                         continue;
937
938                 set_jump_as_return();
939                 writel(RW_MGR_ZQCL, grpaddr);
940
941                 /* tZQinit = tDLLK = 512 ck cycles */
942                 delay_for_n_mem_clocks(512);
943         }
944 }
945
946 static void rw_mgr_mem_initialize(void)
947 {
948         debug("%s:%d\n", __func__, __LINE__);
949
950         /* The reset / cke part of initialization is broadcasted to all ranks */
951         writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
952                                 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
953
954         /*
955          * Here's how you load register for a loop
956          * Counters are located @ 0x800
957          * Jump address are located @ 0xC00
958          * For both, registers 0 to 3 are selected using bits 3 and 2, like
959          * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C
960          * I know this ain't pretty, but Avalon bus throws away the 2 least
961          * significant bits
962          */
963
964         /* start with memory RESET activated */
965
966         /* tINIT = 200us */
967
968         /*
969          * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles
970          * If a and b are the number of iteration in 2 nested loops
971          * it takes the following number of cycles to complete the operation:
972          * number_of_cycles = ((2 + n) * a + 2) * b
973          * where n is the number of instruction in the inner loop
974          * One possible solution is n = 0 , a = 256 , b = 106 => a = FF,
975          * b = 6A
976          */
977         rw_mgr_mem_init_load_regs(SEQ_TINIT_CNTR0_VAL, SEQ_TINIT_CNTR1_VAL,
978                                   SEQ_TINIT_CNTR2_VAL,
979                                   RW_MGR_INIT_RESET_0_CKE_0);
980
981         /* indicate that memory is stable */
982         writel(1, &phy_mgr_cfg->reset_mem_stbl);
983
984         /*
985          * transition the RESET to high
986          * Wait for 500us
987          */
988
989         /*
990          * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles
991          * If a and b are the number of iteration in 2 nested loops
992          * it takes the following number of cycles to complete the operation
993          * number_of_cycles = ((2 + n) * a + 2) * b
994          * where n is the number of instruction in the inner loop
995          * One possible solution is n = 2 , a = 131 , b = 256 => a = 83,
996          * b = FF
997          */
998         rw_mgr_mem_init_load_regs(SEQ_TRESET_CNTR0_VAL, SEQ_TRESET_CNTR1_VAL,
999                                   SEQ_TRESET_CNTR2_VAL,
1000                                   RW_MGR_INIT_RESET_1_CKE_0);
1001
1002         /* bring up clock enable */
1003
1004         /* tXRP < 250 ck cycles */
1005         delay_for_n_mem_clocks(250);
1006
1007         rw_mgr_mem_load_user(RW_MGR_MRS0_DLL_RESET_MIRR, RW_MGR_MRS0_DLL_RESET,
1008                              0);
1009 }
1010
1011 /*
1012  * At the end of calibration we have to program the user settings in, and
1013  * USER  hand off the memory to the user.
1014  */
1015 static void rw_mgr_mem_handoff(void)
1016 {
1017         rw_mgr_mem_load_user(RW_MGR_MRS0_USER_MIRR, RW_MGR_MRS0_USER, 1);
1018         /*
1019          * USER  need to wait tMOD (12CK or 15ns) time before issuing
1020          * other commands, but we will have plenty of NIOS cycles before
1021          * actual handoff so its okay.
1022          */
1023 }
1024
1025 /*
1026  * performs a guaranteed read on the patterns we are going to use during a
1027  * read test to ensure memory works
1028  */
1029 static uint32_t rw_mgr_mem_calibrate_read_test_patterns(uint32_t rank_bgn,
1030         uint32_t group, uint32_t num_tries, uint32_t *bit_chk,
1031         uint32_t all_ranks)
1032 {
1033         uint32_t r, vg;
1034         uint32_t correct_mask_vg;
1035         uint32_t tmp_bit_chk;
1036         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1037                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1038         uint32_t addr;
1039         uint32_t base_rw_mgr;
1040
1041         *bit_chk = param->read_correct_mask;
1042         correct_mask_vg = param->read_correct_mask_vg;
1043
1044         for (r = rank_bgn; r < rank_end; r++) {
1045                 if (param->skip_ranks[r])
1046                         /* request to skip the rank */
1047                         continue;
1048
1049                 /* set rank */
1050                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1051
1052                 /* Load up a constant bursts of read commands */
1053                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1054                 writel(RW_MGR_GUARANTEED_READ,
1055                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1056
1057                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1058                 writel(RW_MGR_GUARANTEED_READ_CONT,
1059                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1060
1061                 tmp_bit_chk = 0;
1062                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) {
1063                         /* reset the fifos to get pointers to known state */
1064
1065                         writel(0, &phy_mgr_cmd->fifo_reset);
1066                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1067                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1068
1069                         tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS
1070                                 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS);
1071
1072                         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1073                         writel(RW_MGR_GUARANTEED_READ, addr +
1074                                ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS +
1075                                 vg) << 2));
1076
1077                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1078                         tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & (~base_rw_mgr));
1079
1080                         if (vg == 0)
1081                                 break;
1082                 }
1083                 *bit_chk &= tmp_bit_chk;
1084         }
1085
1086         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1087         writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2));
1088
1089         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1090         debug_cond(DLEVEL == 1, "%s:%d test_load_patterns(%u,ALL) => (%u == %u) =>\
1091                    %lu\n", __func__, __LINE__, group, *bit_chk, param->read_correct_mask,
1092                    (long unsigned int)(*bit_chk == param->read_correct_mask));
1093         return *bit_chk == param->read_correct_mask;
1094 }
1095
1096 static uint32_t rw_mgr_mem_calibrate_read_test_patterns_all_ranks
1097         (uint32_t group, uint32_t num_tries, uint32_t *bit_chk)
1098 {
1099         return rw_mgr_mem_calibrate_read_test_patterns(0, group,
1100                 num_tries, bit_chk, 1);
1101 }
1102
1103 /* load up the patterns we are going to use during a read test */
1104 static void rw_mgr_mem_calibrate_read_load_patterns(uint32_t rank_bgn,
1105         uint32_t all_ranks)
1106 {
1107         uint32_t r;
1108         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1109                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1110
1111         debug("%s:%d\n", __func__, __LINE__);
1112         for (r = rank_bgn; r < rank_end; r++) {
1113                 if (param->skip_ranks[r])
1114                         /* request to skip the rank */
1115                         continue;
1116
1117                 /* set rank */
1118                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1119
1120                 /* Load up a constant bursts */
1121                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1122
1123                 writel(RW_MGR_GUARANTEED_WRITE_WAIT0,
1124                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1125
1126                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1127
1128                 writel(RW_MGR_GUARANTEED_WRITE_WAIT1,
1129                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1130
1131                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2);
1132
1133                 writel(RW_MGR_GUARANTEED_WRITE_WAIT2,
1134                         &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1135
1136                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3);
1137
1138                 writel(RW_MGR_GUARANTEED_WRITE_WAIT3,
1139                         &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1140
1141                 writel(RW_MGR_GUARANTEED_WRITE, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1142                                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
1143         }
1144
1145         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1146 }
1147
1148 /*
1149  * try a read and see if it returns correct data back. has dummy reads
1150  * inserted into the mix used to align dqs enable. has more thorough checks
1151  * than the regular read test.
1152  */
1153 static uint32_t rw_mgr_mem_calibrate_read_test(uint32_t rank_bgn, uint32_t group,
1154         uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk,
1155         uint32_t all_groups, uint32_t all_ranks)
1156 {
1157         uint32_t r, vg;
1158         uint32_t correct_mask_vg;
1159         uint32_t tmp_bit_chk;
1160         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1161                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1162         uint32_t addr;
1163         uint32_t base_rw_mgr;
1164
1165         *bit_chk = param->read_correct_mask;
1166         correct_mask_vg = param->read_correct_mask_vg;
1167
1168         uint32_t quick_read_mode = (((STATIC_CALIB_STEPS) &
1169                 CALIB_SKIP_DELAY_SWEEPS) && ENABLE_SUPER_QUICK_CALIBRATION);
1170
1171         for (r = rank_bgn; r < rank_end; r++) {
1172                 if (param->skip_ranks[r])
1173                         /* request to skip the rank */
1174                         continue;
1175
1176                 /* set rank */
1177                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1178
1179                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1);
1180
1181                 writel(RW_MGR_READ_B2B_WAIT1,
1182                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1183
1184                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2);
1185                 writel(RW_MGR_READ_B2B_WAIT2,
1186                         &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1187
1188                 if (quick_read_mode)
1189                         writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0);
1190                         /* need at least two (1+1) reads to capture failures */
1191                 else if (all_groups)
1192                         writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0);
1193                 else
1194                         writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0);
1195
1196                 writel(RW_MGR_READ_B2B,
1197                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1198                 if (all_groups)
1199                         writel(RW_MGR_MEM_IF_READ_DQS_WIDTH *
1200                                RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1,
1201                                &sdr_rw_load_mgr_regs->load_cntr3);
1202                 else
1203                         writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3);
1204
1205                 writel(RW_MGR_READ_B2B,
1206                         &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1207
1208                 tmp_bit_chk = 0;
1209                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) {
1210                         /* reset the fifos to get pointers to known state */
1211                         writel(0, &phy_mgr_cmd->fifo_reset);
1212                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1213                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1214
1215                         tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS
1216                                 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS);
1217
1218                         if (all_groups)
1219                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_ALL_GROUPS_OFFSET;
1220                         else
1221                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1222
1223                         writel(RW_MGR_READ_B2B, addr +
1224                                ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS +
1225                                vg) << 2));
1226
1227                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1228                         tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr));
1229
1230                         if (vg == 0)
1231                                 break;
1232                 }
1233                 *bit_chk &= tmp_bit_chk;
1234         }
1235
1236         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1237         writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2));
1238
1239         if (all_correct) {
1240                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1241                 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ALL,%u) =>\
1242                            (%u == %u) => %lu", __func__, __LINE__, group,
1243                            all_groups, *bit_chk, param->read_correct_mask,
1244                            (long unsigned int)(*bit_chk ==
1245                            param->read_correct_mask));
1246                 return *bit_chk == param->read_correct_mask;
1247         } else  {
1248                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1249                 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ONE,%u) =>\
1250                            (%u != %lu) => %lu\n", __func__, __LINE__,
1251                            group, all_groups, *bit_chk, (long unsigned int)0,
1252                            (long unsigned int)(*bit_chk != 0x00));
1253                 return *bit_chk != 0x00;
1254         }
1255 }
1256
1257 static uint32_t rw_mgr_mem_calibrate_read_test_all_ranks(uint32_t group,
1258         uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk,
1259         uint32_t all_groups)
1260 {
1261         return rw_mgr_mem_calibrate_read_test(0, group, num_tries, all_correct,
1262                                               bit_chk, all_groups, 1);
1263 }
1264
1265 static void rw_mgr_incr_vfifo(uint32_t grp, uint32_t *v)
1266 {
1267         writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy);
1268         (*v)++;
1269 }
1270
1271 static void rw_mgr_decr_vfifo(uint32_t grp, uint32_t *v)
1272 {
1273         uint32_t i;
1274
1275         for (i = 0; i < VFIFO_SIZE-1; i++)
1276                 rw_mgr_incr_vfifo(grp, v);
1277 }
1278
1279 static int find_vfifo_read(uint32_t grp, uint32_t *bit_chk)
1280 {
1281         uint32_t  v;
1282         uint32_t fail_cnt = 0;
1283         uint32_t test_status;
1284
1285         for (v = 0; v < VFIFO_SIZE; ) {
1286                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo %u\n",
1287                            __func__, __LINE__, v);
1288                 test_status = rw_mgr_mem_calibrate_read_test_all_ranks
1289                         (grp, 1, PASS_ONE_BIT, bit_chk, 0);
1290                 if (!test_status) {
1291                         fail_cnt++;
1292
1293                         if (fail_cnt == 2)
1294                                 break;
1295                 }
1296
1297                 /* fiddle with FIFO */
1298                 rw_mgr_incr_vfifo(grp, &v);
1299         }
1300
1301         if (v >= VFIFO_SIZE) {
1302                 /* no failing read found!! Something must have gone wrong */
1303                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo failed\n",
1304                            __func__, __LINE__);
1305                 return 0;
1306         } else {
1307                 return v;
1308         }
1309 }
1310
1311 static int find_working_phase(uint32_t *grp, uint32_t *bit_chk,
1312                               uint32_t dtaps_per_ptap, uint32_t *work_bgn,
1313                               uint32_t *v, uint32_t *d, uint32_t *p,
1314                               uint32_t *i, uint32_t *max_working_cnt)
1315 {
1316         uint32_t found_begin = 0;
1317         uint32_t tmp_delay = 0;
1318         uint32_t test_status;
1319
1320         for (*d = 0; *d <= dtaps_per_ptap; (*d)++, tmp_delay +=
1321                 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
1322                 *work_bgn = tmp_delay;
1323                 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
1324
1325                 for (*i = 0; *i < VFIFO_SIZE; (*i)++) {
1326                         for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_bgn +=
1327                                 IO_DELAY_PER_OPA_TAP) {
1328                                 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p);
1329
1330                                 test_status =
1331                                 rw_mgr_mem_calibrate_read_test_all_ranks
1332                                 (*grp, 1, PASS_ONE_BIT, bit_chk, 0);
1333
1334                                 if (test_status) {
1335                                         *max_working_cnt = 1;
1336                                         found_begin = 1;
1337                                         break;
1338                                 }
1339                         }
1340
1341                         if (found_begin)
1342                                 break;
1343
1344                         if (*p > IO_DQS_EN_PHASE_MAX)
1345                                 /* fiddle with FIFO */
1346                                 rw_mgr_incr_vfifo(*grp, v);
1347                 }
1348
1349                 if (found_begin)
1350                         break;
1351         }
1352
1353         if (*i >= VFIFO_SIZE) {
1354                 /* cannot find working solution */
1355                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/\
1356                            ptap/dtap\n", __func__, __LINE__);
1357                 return 0;
1358         } else {
1359                 return 1;
1360         }
1361 }
1362
1363 static void sdr_backup_phase(uint32_t *grp, uint32_t *bit_chk,
1364                              uint32_t *work_bgn, uint32_t *v, uint32_t *d,
1365                              uint32_t *p, uint32_t *max_working_cnt)
1366 {
1367         uint32_t found_begin = 0;
1368         uint32_t tmp_delay;
1369
1370         /* Special case code for backing up a phase */
1371         if (*p == 0) {
1372                 *p = IO_DQS_EN_PHASE_MAX;
1373                 rw_mgr_decr_vfifo(*grp, v);
1374         } else {
1375                 (*p)--;
1376         }
1377         tmp_delay = *work_bgn - IO_DELAY_PER_OPA_TAP;
1378         scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p);
1379
1380         for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_bgn;
1381                 (*d)++, tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
1382                 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
1383
1384                 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1,
1385                                                              PASS_ONE_BIT,
1386                                                              bit_chk, 0)) {
1387                         found_begin = 1;
1388                         *work_bgn = tmp_delay;
1389                         break;
1390                 }
1391         }
1392
1393         /* We have found a working dtap before the ptap found above */
1394         if (found_begin == 1)
1395                 (*max_working_cnt)++;
1396
1397         /*
1398          * Restore VFIFO to old state before we decremented it
1399          * (if needed).
1400          */
1401         (*p)++;
1402         if (*p > IO_DQS_EN_PHASE_MAX) {
1403                 *p = 0;
1404                 rw_mgr_incr_vfifo(*grp, v);
1405         }
1406
1407         scc_mgr_set_dqs_en_delay_all_ranks(*grp, 0);
1408 }
1409
1410 static int sdr_nonworking_phase(uint32_t *grp, uint32_t *bit_chk,
1411                              uint32_t *work_bgn, uint32_t *v, uint32_t *d,
1412                              uint32_t *p, uint32_t *i, uint32_t *max_working_cnt,
1413                              uint32_t *work_end)
1414 {
1415         uint32_t found_end = 0;
1416
1417         (*p)++;
1418         *work_end += IO_DELAY_PER_OPA_TAP;
1419         if (*p > IO_DQS_EN_PHASE_MAX) {
1420                 /* fiddle with FIFO */
1421                 *p = 0;
1422                 rw_mgr_incr_vfifo(*grp, v);
1423         }
1424
1425         for (; *i < VFIFO_SIZE + 1; (*i)++) {
1426                 for (; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_end
1427                         += IO_DELAY_PER_OPA_TAP) {
1428                         scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p);
1429
1430                         if (!rw_mgr_mem_calibrate_read_test_all_ranks
1431                                 (*grp, 1, PASS_ONE_BIT, bit_chk, 0)) {
1432                                 found_end = 1;
1433                                 break;
1434                         } else {
1435                                 (*max_working_cnt)++;
1436                         }
1437                 }
1438
1439                 if (found_end)
1440                         break;
1441
1442                 if (*p > IO_DQS_EN_PHASE_MAX) {
1443                         /* fiddle with FIFO */
1444                         rw_mgr_incr_vfifo(*grp, v);
1445                         *p = 0;
1446                 }
1447         }
1448
1449         if (*i >= VFIFO_SIZE + 1) {
1450                 /* cannot see edge of failing read */
1451                 debug_cond(DLEVEL == 2, "%s:%d sdr_nonworking_phase: end:\
1452                            failed\n", __func__, __LINE__);
1453                 return 0;
1454         } else {
1455                 return 1;
1456         }
1457 }
1458
1459 static int sdr_find_window_centre(uint32_t *grp, uint32_t *bit_chk,
1460                                   uint32_t *work_bgn, uint32_t *v, uint32_t *d,
1461                                   uint32_t *p, uint32_t *work_mid,
1462                                   uint32_t *work_end)
1463 {
1464         int i;
1465         int tmp_delay = 0;
1466
1467         *work_mid = (*work_bgn + *work_end) / 2;
1468
1469         debug_cond(DLEVEL == 2, "work_bgn=%d work_end=%d work_mid=%d\n",
1470                    *work_bgn, *work_end, *work_mid);
1471         /* Get the middle delay to be less than a VFIFO delay */
1472         for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX;
1473                 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP)
1474                 ;
1475         debug_cond(DLEVEL == 2, "vfifo ptap delay %d\n", tmp_delay);
1476         while (*work_mid > tmp_delay)
1477                 *work_mid -= tmp_delay;
1478         debug_cond(DLEVEL == 2, "new work_mid %d\n", *work_mid);
1479
1480         tmp_delay = 0;
1481         for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX && tmp_delay < *work_mid;
1482                 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP)
1483                 ;
1484         tmp_delay -= IO_DELAY_PER_OPA_TAP;
1485         debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", (*p) - 1, tmp_delay);
1486         for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_mid; (*d)++,
1487                 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP)
1488                 ;
1489         debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", *d, tmp_delay);
1490
1491         scc_mgr_set_dqs_en_phase_all_ranks(*grp, (*p) - 1);
1492         scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
1493
1494         /*
1495          * push vfifo until we can successfully calibrate. We can do this
1496          * because the largest possible margin in 1 VFIFO cycle.
1497          */
1498         for (i = 0; i < VFIFO_SIZE; i++) {
1499                 debug_cond(DLEVEL == 2, "find_dqs_en_phase: center: vfifo=%u\n",
1500                            *v);
1501                 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1,
1502                                                              PASS_ONE_BIT,
1503                                                              bit_chk, 0)) {
1504                         break;
1505                 }
1506
1507                 /* fiddle with FIFO */
1508                 rw_mgr_incr_vfifo(*grp, v);
1509         }
1510
1511         if (i >= VFIFO_SIZE) {
1512                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center: \
1513                            failed\n", __func__, __LINE__);
1514                 return 0;
1515         } else {
1516                 return 1;
1517         }
1518 }
1519
1520 /* find a good dqs enable to use */
1521 static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(uint32_t grp)
1522 {
1523         uint32_t v, d, p, i;
1524         uint32_t max_working_cnt;
1525         uint32_t bit_chk;
1526         uint32_t dtaps_per_ptap;
1527         uint32_t work_bgn, work_mid, work_end;
1528         uint32_t found_passing_read, found_failing_read, initial_failing_dtap;
1529
1530         debug("%s:%d %u\n", __func__, __LINE__, grp);
1531
1532         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
1533
1534         scc_mgr_set_dqs_en_delay_all_ranks(grp, 0);
1535         scc_mgr_set_dqs_en_phase_all_ranks(grp, 0);
1536
1537         /* ************************************************************** */
1538         /* * Step 0 : Determine number of delay taps for each phase tap * */
1539         dtaps_per_ptap = IO_DELAY_PER_OPA_TAP/IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1540
1541         /* ********************************************************* */
1542         /* * Step 1 : First push vfifo until we get a failing read * */
1543         v = find_vfifo_read(grp, &bit_chk);
1544
1545         max_working_cnt = 0;
1546
1547         /* ******************************************************** */
1548         /* * step 2: find first working phase, increment in ptaps * */
1549         work_bgn = 0;
1550         if (find_working_phase(&grp, &bit_chk, dtaps_per_ptap, &work_bgn, &v, &d,
1551                                 &p, &i, &max_working_cnt) == 0)
1552                 return 0;
1553
1554         work_end = work_bgn;
1555
1556         /*
1557          * If d is 0 then the working window covers a phase tap and
1558          * we can follow the old procedure otherwise, we've found the beginning,
1559          * and we need to increment the dtaps until we find the end.
1560          */
1561         if (d == 0) {
1562                 /* ********************************************************* */
1563                 /* * step 3a: if we have room, back off by one and
1564                 increment in dtaps * */
1565
1566                 sdr_backup_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p,
1567                                  &max_working_cnt);
1568
1569                 /* ********************************************************* */
1570                 /* * step 4a: go forward from working phase to non working
1571                 phase, increment in ptaps * */
1572                 if (sdr_nonworking_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p,
1573                                          &i, &max_working_cnt, &work_end) == 0)
1574                         return 0;
1575
1576                 /* ********************************************************* */
1577                 /* * step 5a:  back off one from last, increment in dtaps  * */
1578
1579                 /* Special case code for backing up a phase */
1580                 if (p == 0) {
1581                         p = IO_DQS_EN_PHASE_MAX;
1582                         rw_mgr_decr_vfifo(grp, &v);
1583                 } else {
1584                         p = p - 1;
1585                 }
1586
1587                 work_end -= IO_DELAY_PER_OPA_TAP;
1588                 scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1589
1590                 /* * The actual increment of dtaps is done outside of
1591                 the if/else loop to share code */
1592                 d = 0;
1593
1594                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p: \
1595                            vfifo=%u ptap=%u\n", __func__, __LINE__,
1596                            v, p);
1597         } else {
1598                 /* ******************************************************* */
1599                 /* * step 3-5b:  Find the right edge of the window using
1600                 delay taps   * */
1601                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase:vfifo=%u \
1602                            ptap=%u dtap=%u bgn=%u\n", __func__, __LINE__,
1603                            v, p, d, work_bgn);
1604
1605                 work_end = work_bgn;
1606
1607                 /* * The actual increment of dtaps is done outside of the
1608                 if/else loop to share code */
1609
1610                 /* Only here to counterbalance a subtract later on which is
1611                 not needed if this branch of the algorithm is taken */
1612                 max_working_cnt++;
1613         }
1614
1615         /* The dtap increment to find the failing edge is done here */
1616         for (; d <= IO_DQS_EN_DELAY_MAX; d++, work_end +=
1617                 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
1618                         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \
1619                                    end-2: dtap=%u\n", __func__, __LINE__, d);
1620                         scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1621
1622                         if (!rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1623                                                                       PASS_ONE_BIT,
1624                                                                       &bit_chk, 0)) {
1625                                 break;
1626                         }
1627         }
1628
1629         /* Go back to working dtap */
1630         if (d != 0)
1631                 work_end -= IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1632
1633         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p/d: vfifo=%u \
1634                    ptap=%u dtap=%u end=%u\n", __func__, __LINE__,
1635                    v, p, d-1, work_end);
1636
1637         if (work_end < work_bgn) {
1638                 /* nil range */
1639                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: end-2: \
1640                            failed\n", __func__, __LINE__);
1641                 return 0;
1642         }
1643
1644         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: found range [%u,%u]\n",
1645                    __func__, __LINE__, work_bgn, work_end);
1646
1647         /* *************************************************************** */
1648         /*
1649          * * We need to calculate the number of dtaps that equal a ptap
1650          * * To do that we'll back up a ptap and re-find the edge of the
1651          * * window using dtaps
1652          */
1653
1654         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: calculate dtaps_per_ptap \
1655                    for tracking\n", __func__, __LINE__);
1656
1657         /* Special case code for backing up a phase */
1658         if (p == 0) {
1659                 p = IO_DQS_EN_PHASE_MAX;
1660                 rw_mgr_decr_vfifo(grp, &v);
1661                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \
1662                            cycle/phase: v=%u p=%u\n", __func__, __LINE__,
1663                            v, p);
1664         } else {
1665                 p = p - 1;
1666                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \
1667                            phase only: v=%u p=%u", __func__, __LINE__,
1668                            v, p);
1669         }
1670
1671         scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1672
1673         /*
1674          * Increase dtap until we first see a passing read (in case the
1675          * window is smaller than a ptap),
1676          * and then a failing read to mark the edge of the window again
1677          */
1678
1679         /* Find a passing read */
1680         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find passing read\n",
1681                    __func__, __LINE__);
1682         found_passing_read = 0;
1683         found_failing_read = 0;
1684         initial_failing_dtap = d;
1685         for (; d <= IO_DQS_EN_DELAY_MAX; d++) {
1686                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: testing \
1687                            read d=%u\n", __func__, __LINE__, d);
1688                 scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1689
1690                 if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1691                                                              PASS_ONE_BIT,
1692                                                              &bit_chk, 0)) {
1693                         found_passing_read = 1;
1694                         break;
1695                 }
1696         }
1697
1698         if (found_passing_read) {
1699                 /* Find a failing read */
1700                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find failing \
1701                            read\n", __func__, __LINE__);
1702                 for (d = d + 1; d <= IO_DQS_EN_DELAY_MAX; d++) {
1703                         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \
1704                                    testing read d=%u\n", __func__, __LINE__, d);
1705                         scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1706
1707                         if (!rw_mgr_mem_calibrate_read_test_all_ranks
1708                                 (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) {
1709                                 found_failing_read = 1;
1710                                 break;
1711                         }
1712                 }
1713         } else {
1714                 debug_cond(DLEVEL == 1, "%s:%d find_dqs_en_phase: failed to \
1715                            calculate dtaps", __func__, __LINE__);
1716                 debug_cond(DLEVEL == 1, "per ptap. Fall back on static value\n");
1717         }
1718
1719         /*
1720          * The dynamically calculated dtaps_per_ptap is only valid if we
1721          * found a passing/failing read. If we didn't, it means d hit the max
1722          * (IO_DQS_EN_DELAY_MAX). Otherwise, dtaps_per_ptap retains its
1723          * statically calculated value.
1724          */
1725         if (found_passing_read && found_failing_read)
1726                 dtaps_per_ptap = d - initial_failing_dtap;
1727
1728         writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap);
1729         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: dtaps_per_ptap=%u \
1730                    - %u = %u",  __func__, __LINE__, d,
1731                    initial_failing_dtap, dtaps_per_ptap);
1732
1733         /* ******************************************** */
1734         /* * step 6:  Find the centre of the window   * */
1735         if (sdr_find_window_centre(&grp, &bit_chk, &work_bgn, &v, &d, &p,
1736                                    &work_mid, &work_end) == 0)
1737                 return 0;
1738
1739         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center found: \
1740                    vfifo=%u ptap=%u dtap=%u\n", __func__, __LINE__,
1741                    v, p-1, d);
1742         return 1;
1743 }
1744
1745 /*
1746  * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different
1747  * dq_in_delay values
1748  */
1749 static uint32_t
1750 rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay
1751 (uint32_t write_group, uint32_t read_group, uint32_t test_bgn)
1752 {
1753         uint32_t found;
1754         uint32_t i;
1755         uint32_t p;
1756         uint32_t d;
1757         uint32_t r;
1758
1759         const uint32_t delay_step = IO_IO_IN_DELAY_MAX /
1760                 (RW_MGR_MEM_DQ_PER_READ_DQS-1);
1761                 /* we start at zero, so have one less dq to devide among */
1762
1763         debug("%s:%d (%u,%u,%u)", __func__, __LINE__, write_group, read_group,
1764               test_bgn);
1765
1766         /* try different dq_in_delays since the dq path is shorter than dqs */
1767
1768         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
1769              r += NUM_RANKS_PER_SHADOW_REG) {
1770                 for (i = 0, p = test_bgn, d = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++, d += delay_step) {
1771                         debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_\
1772                                    vfifo_find_dqs_", __func__, __LINE__);
1773                         debug_cond(DLEVEL == 1, "en_phase_sweep_dq_in_delay: g=%u/%u ",
1774                                write_group, read_group);
1775                         debug_cond(DLEVEL == 1, "r=%u, i=%u p=%u d=%u\n", r, i , p, d);
1776                         scc_mgr_set_dq_in_delay(p, d);
1777                         scc_mgr_load_dq(p);
1778                 }
1779                 writel(0, &sdr_scc_mgr->update);
1780         }
1781
1782         found = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(read_group);
1783
1784         debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_vfifo_find_dqs_\
1785                    en_phase_sweep_dq", __func__, __LINE__);
1786         debug_cond(DLEVEL == 1, "_in_delay: g=%u/%u found=%u; Reseting delay \
1787                    chain to zero\n", write_group, read_group, found);
1788
1789         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
1790              r += NUM_RANKS_PER_SHADOW_REG) {
1791                 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS;
1792                         i++, p++) {
1793                         scc_mgr_set_dq_in_delay(p, 0);
1794                         scc_mgr_load_dq(p);
1795                 }
1796                 writel(0, &sdr_scc_mgr->update);
1797         }
1798
1799         return found;
1800 }
1801
1802 /* per-bit deskew DQ and center */
1803 static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn,
1804         uint32_t write_group, uint32_t read_group, uint32_t test_bgn,
1805         uint32_t use_read_test, uint32_t update_fom)
1806 {
1807         uint32_t i, p, d, min_index;
1808         /*
1809          * Store these as signed since there are comparisons with
1810          * signed numbers.
1811          */
1812         uint32_t bit_chk;
1813         uint32_t sticky_bit_chk;
1814         int32_t left_edge[RW_MGR_MEM_DQ_PER_READ_DQS];
1815         int32_t right_edge[RW_MGR_MEM_DQ_PER_READ_DQS];
1816         int32_t final_dq[RW_MGR_MEM_DQ_PER_READ_DQS];
1817         int32_t mid;
1818         int32_t orig_mid_min, mid_min;
1819         int32_t new_dqs, start_dqs, start_dqs_en, shift_dq, final_dqs,
1820                 final_dqs_en;
1821         int32_t dq_margin, dqs_margin;
1822         uint32_t stop;
1823         uint32_t temp_dq_in_delay1, temp_dq_in_delay2;
1824         uint32_t addr;
1825
1826         debug("%s:%d: %u %u", __func__, __LINE__, read_group, test_bgn);
1827
1828         addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_IN_DELAY_OFFSET;
1829         start_dqs = readl(addr + (read_group << 2));
1830         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS)
1831                 start_dqs_en = readl(addr + ((read_group << 2)
1832                                      - IO_DQS_EN_DELAY_OFFSET));
1833
1834         /* set the left and right edge of each bit to an illegal value */
1835         /* use (IO_IO_IN_DELAY_MAX + 1) as an illegal value */
1836         sticky_bit_chk = 0;
1837         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
1838                 left_edge[i]  = IO_IO_IN_DELAY_MAX + 1;
1839                 right_edge[i] = IO_IO_IN_DELAY_MAX + 1;
1840         }
1841
1842         /* Search for the left edge of the window for each bit */
1843         for (d = 0; d <= IO_IO_IN_DELAY_MAX; d++) {
1844                 scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, d);
1845
1846                 writel(0, &sdr_scc_mgr->update);
1847
1848                 /*
1849                  * Stop searching when the read test doesn't pass AND when
1850                  * we've seen a passing read on every bit.
1851                  */
1852                 if (use_read_test) {
1853                         stop = !rw_mgr_mem_calibrate_read_test(rank_bgn,
1854                                 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT,
1855                                 &bit_chk, 0, 0);
1856                 } else {
1857                         rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
1858                                                         0, PASS_ONE_BIT,
1859                                                         &bit_chk, 0);
1860                         bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS *
1861                                 (read_group - (write_group *
1862                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
1863                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH)));
1864                         stop = (bit_chk == 0);
1865                 }
1866                 sticky_bit_chk = sticky_bit_chk | bit_chk;
1867                 stop = stop && (sticky_bit_chk == param->read_correct_mask);
1868                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(left): dtap=%u => %u == %u \
1869                            && %u", __func__, __LINE__, d,
1870                            sticky_bit_chk,
1871                         param->read_correct_mask, stop);
1872
1873                 if (stop == 1) {
1874                         break;
1875                 } else {
1876                         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
1877                                 if (bit_chk & 1) {
1878                                         /* Remember a passing test as the
1879                                         left_edge */
1880                                         left_edge[i] = d;
1881                                 } else {
1882                                         /* If a left edge has not been seen yet,
1883                                         then a future passing test will mark
1884                                         this edge as the right edge */
1885                                         if (left_edge[i] ==
1886                                                 IO_IO_IN_DELAY_MAX + 1) {
1887                                                 right_edge[i] = -(d + 1);
1888                                         }
1889                                 }
1890                                 bit_chk = bit_chk >> 1;
1891                         }
1892                 }
1893         }
1894
1895         /* Reset DQ delay chains to 0 */
1896         scc_mgr_apply_group_dq_in_delay(test_bgn, 0);
1897         sticky_bit_chk = 0;
1898         for (i = RW_MGR_MEM_DQ_PER_READ_DQS - 1;; i--) {
1899                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \
1900                            %d right_edge[%u]: %d\n", __func__, __LINE__,
1901                            i, left_edge[i], i, right_edge[i]);
1902
1903                 /*
1904                  * Check for cases where we haven't found the left edge,
1905                  * which makes our assignment of the the right edge invalid.
1906                  * Reset it to the illegal value.
1907                  */
1908                 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) && (
1909                         right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) {
1910                         right_edge[i] = IO_IO_IN_DELAY_MAX + 1;
1911                         debug_cond(DLEVEL == 2, "%s:%d vfifo_center: reset \
1912                                    right_edge[%u]: %d\n", __func__, __LINE__,
1913                                    i, right_edge[i]);
1914                 }
1915
1916                 /*
1917                  * Reset sticky bit (except for bits where we have seen
1918                  * both the left and right edge).
1919                  */
1920                 sticky_bit_chk = sticky_bit_chk << 1;
1921                 if ((left_edge[i] != IO_IO_IN_DELAY_MAX + 1) &&
1922                     (right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) {
1923                         sticky_bit_chk = sticky_bit_chk | 1;
1924                 }
1925
1926                 if (i == 0)
1927                         break;
1928         }
1929
1930         /* Search for the right edge of the window for each bit */
1931         for (d = 0; d <= IO_DQS_IN_DELAY_MAX - start_dqs; d++) {
1932                 scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs);
1933                 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
1934                         uint32_t delay = d + start_dqs_en;
1935                         if (delay > IO_DQS_EN_DELAY_MAX)
1936                                 delay = IO_DQS_EN_DELAY_MAX;
1937                         scc_mgr_set_dqs_en_delay(read_group, delay);
1938                 }
1939                 scc_mgr_load_dqs(read_group);
1940
1941                 writel(0, &sdr_scc_mgr->update);
1942
1943                 /*
1944                  * Stop searching when the read test doesn't pass AND when
1945                  * we've seen a passing read on every bit.
1946                  */
1947                 if (use_read_test) {
1948                         stop = !rw_mgr_mem_calibrate_read_test(rank_bgn,
1949                                 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT,
1950                                 &bit_chk, 0, 0);
1951                 } else {
1952                         rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
1953                                                         0, PASS_ONE_BIT,
1954                                                         &bit_chk, 0);
1955                         bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS *
1956                                 (read_group - (write_group *
1957                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
1958                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH)));
1959                         stop = (bit_chk == 0);
1960                 }
1961                 sticky_bit_chk = sticky_bit_chk | bit_chk;
1962                 stop = stop && (sticky_bit_chk == param->read_correct_mask);
1963
1964                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(right): dtap=%u => %u == \
1965                            %u && %u", __func__, __LINE__, d,
1966                            sticky_bit_chk, param->read_correct_mask, stop);
1967
1968                 if (stop == 1) {
1969                         break;
1970                 } else {
1971                         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
1972                                 if (bit_chk & 1) {
1973                                         /* Remember a passing test as
1974                                         the right_edge */
1975                                         right_edge[i] = d;
1976                                 } else {
1977                                         if (d != 0) {
1978                                                 /* If a right edge has not been
1979                                                 seen yet, then a future passing
1980                                                 test will mark this edge as the
1981                                                 left edge */
1982                                                 if (right_edge[i] ==
1983                                                 IO_IO_IN_DELAY_MAX + 1) {
1984                                                         left_edge[i] = -(d + 1);
1985                                                 }
1986                                         } else {
1987                                                 /* d = 0 failed, but it passed
1988                                                 when testing the left edge,
1989                                                 so it must be marginal,
1990                                                 set it to -1 */
1991                                                 if (right_edge[i] ==
1992                                                         IO_IO_IN_DELAY_MAX + 1 &&
1993                                                         left_edge[i] !=
1994                                                         IO_IO_IN_DELAY_MAX
1995                                                         + 1) {
1996                                                         right_edge[i] = -1;
1997                                                 }
1998                                                 /* If a right edge has not been
1999                                                 seen yet, then a future passing
2000                                                 test will mark this edge as the
2001                                                 left edge */
2002                                                 else if (right_edge[i] ==
2003                                                         IO_IO_IN_DELAY_MAX +
2004                                                         1) {
2005                                                         left_edge[i] = -(d + 1);
2006                                                 }
2007                                         }
2008                                 }
2009
2010                                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center[r,\
2011                                            d=%u]: ", __func__, __LINE__, d);
2012                                 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d ",
2013                                            (int)(bit_chk & 1), i, left_edge[i]);
2014                                 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2015                                            right_edge[i]);
2016                                 bit_chk = bit_chk >> 1;
2017                         }
2018                 }
2019         }
2020
2021         /* Check that all bits have a window */
2022         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
2023                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \
2024                            %d right_edge[%u]: %d", __func__, __LINE__,
2025                            i, left_edge[i], i, right_edge[i]);
2026                 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) || (right_edge[i]
2027                         == IO_IO_IN_DELAY_MAX + 1)) {
2028                         /*
2029                          * Restore delay chain settings before letting the loop
2030                          * in rw_mgr_mem_calibrate_vfifo to retry different
2031                          * dqs/ck relationships.
2032                          */
2033                         scc_mgr_set_dqs_bus_in_delay(read_group, start_dqs);
2034                         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2035                                 scc_mgr_set_dqs_en_delay(read_group,
2036                                                          start_dqs_en);
2037                         }
2038                         scc_mgr_load_dqs(read_group);
2039                         writel(0, &sdr_scc_mgr->update);
2040
2041                         debug_cond(DLEVEL == 1, "%s:%d vfifo_center: failed to \
2042                                    find edge [%u]: %d %d", __func__, __LINE__,
2043                                    i, left_edge[i], right_edge[i]);
2044                         if (use_read_test) {
2045                                 set_failing_group_stage(read_group *
2046                                         RW_MGR_MEM_DQ_PER_READ_DQS + i,
2047                                         CAL_STAGE_VFIFO,
2048                                         CAL_SUBSTAGE_VFIFO_CENTER);
2049                         } else {
2050                                 set_failing_group_stage(read_group *
2051                                         RW_MGR_MEM_DQ_PER_READ_DQS + i,
2052                                         CAL_STAGE_VFIFO_AFTER_WRITES,
2053                                         CAL_SUBSTAGE_VFIFO_CENTER);
2054                         }
2055                         return 0;
2056                 }
2057         }
2058
2059         /* Find middle of window for each DQ bit */
2060         mid_min = left_edge[0] - right_edge[0];
2061         min_index = 0;
2062         for (i = 1; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
2063                 mid = left_edge[i] - right_edge[i];
2064                 if (mid < mid_min) {
2065                         mid_min = mid;
2066                         min_index = i;
2067                 }
2068         }
2069
2070         /*
2071          * -mid_min/2 represents the amount that we need to move DQS.
2072          * If mid_min is odd and positive we'll need to add one to
2073          * make sure the rounding in further calculations is correct
2074          * (always bias to the right), so just add 1 for all positive values.
2075          */
2076         if (mid_min > 0)
2077                 mid_min++;
2078
2079         mid_min = mid_min / 2;
2080
2081         debug_cond(DLEVEL == 1, "%s:%d vfifo_center: mid_min=%d (index=%u)\n",
2082                    __func__, __LINE__, mid_min, min_index);
2083
2084         /* Determine the amount we can change DQS (which is -mid_min) */
2085         orig_mid_min = mid_min;
2086         new_dqs = start_dqs - mid_min;
2087         if (new_dqs > IO_DQS_IN_DELAY_MAX)
2088                 new_dqs = IO_DQS_IN_DELAY_MAX;
2089         else if (new_dqs < 0)
2090                 new_dqs = 0;
2091
2092         mid_min = start_dqs - new_dqs;
2093         debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n",
2094                    mid_min, new_dqs);
2095
2096         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2097                 if (start_dqs_en - mid_min > IO_DQS_EN_DELAY_MAX)
2098                         mid_min += start_dqs_en - mid_min - IO_DQS_EN_DELAY_MAX;
2099                 else if (start_dqs_en - mid_min < 0)
2100                         mid_min += start_dqs_en - mid_min;
2101         }
2102         new_dqs = start_dqs - mid_min;
2103
2104         debug_cond(DLEVEL == 1, "vfifo_center: start_dqs=%d start_dqs_en=%d \
2105                    new_dqs=%d mid_min=%d\n", start_dqs,
2106                    IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1,
2107                    new_dqs, mid_min);
2108
2109         /* Initialize data for export structures */
2110         dqs_margin = IO_IO_IN_DELAY_MAX + 1;
2111         dq_margin  = IO_IO_IN_DELAY_MAX + 1;
2112
2113         /* add delay to bring centre of all DQ windows to the same "level" */
2114         for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) {
2115                 /* Use values before divide by 2 to reduce round off error */
2116                 shift_dq = (left_edge[i] - right_edge[i] -
2117                         (left_edge[min_index] - right_edge[min_index]))/2  +
2118                         (orig_mid_min - mid_min);
2119
2120                 debug_cond(DLEVEL == 2, "vfifo_center: before: \
2121                            shift_dq[%u]=%d\n", i, shift_dq);
2122
2123                 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET;
2124                 temp_dq_in_delay1 = readl(addr + (p << 2));
2125                 temp_dq_in_delay2 = readl(addr + (i << 2));
2126
2127                 if (shift_dq + (int32_t)temp_dq_in_delay1 >
2128                         (int32_t)IO_IO_IN_DELAY_MAX) {
2129                         shift_dq = (int32_t)IO_IO_IN_DELAY_MAX - temp_dq_in_delay2;
2130                 } else if (shift_dq + (int32_t)temp_dq_in_delay1 < 0) {
2131                         shift_dq = -(int32_t)temp_dq_in_delay1;
2132                 }
2133                 debug_cond(DLEVEL == 2, "vfifo_center: after: \
2134                            shift_dq[%u]=%d\n", i, shift_dq);
2135                 final_dq[i] = temp_dq_in_delay1 + shift_dq;
2136                 scc_mgr_set_dq_in_delay(p, final_dq[i]);
2137                 scc_mgr_load_dq(p);
2138
2139                 debug_cond(DLEVEL == 2, "vfifo_center: margin[%u]=[%d,%d]\n", i,
2140                            left_edge[i] - shift_dq + (-mid_min),
2141                            right_edge[i] + shift_dq - (-mid_min));
2142                 /* To determine values for export structures */
2143                 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin)
2144                         dq_margin = left_edge[i] - shift_dq + (-mid_min);
2145
2146                 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin)
2147                         dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2148         }
2149
2150         final_dqs = new_dqs;
2151         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS)
2152                 final_dqs_en = start_dqs_en - mid_min;
2153
2154         /* Move DQS-en */
2155         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2156                 scc_mgr_set_dqs_en_delay(read_group, final_dqs_en);
2157                 scc_mgr_load_dqs(read_group);
2158         }
2159
2160         /* Move DQS */
2161         scc_mgr_set_dqs_bus_in_delay(read_group, final_dqs);
2162         scc_mgr_load_dqs(read_group);
2163         debug_cond(DLEVEL == 2, "%s:%d vfifo_center: dq_margin=%d \
2164                    dqs_margin=%d", __func__, __LINE__,
2165                    dq_margin, dqs_margin);
2166
2167         /*
2168          * Do not remove this line as it makes sure all of our decisions
2169          * have been applied. Apply the update bit.
2170          */
2171         writel(0, &sdr_scc_mgr->update);
2172
2173         return (dq_margin >= 0) && (dqs_margin >= 0);
2174 }
2175
2176 /*
2177  * calibrate the read valid prediction FIFO.
2178  *
2179  *  - read valid prediction will consist of finding a good DQS enable phase,
2180  * DQS enable delay, DQS input phase, and DQS input delay.
2181  *  - we also do a per-bit deskew on the DQ lines.
2182  */
2183 static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t read_group,
2184                                            uint32_t test_bgn)
2185 {
2186         uint32_t p, d, rank_bgn, sr;
2187         uint32_t dtaps_per_ptap;
2188         uint32_t bit_chk;
2189         uint32_t grp_calibrated;
2190         uint32_t write_group, write_test_bgn;
2191         uint32_t failed_substage;
2192
2193         debug("%s:%d: %u %u\n", __func__, __LINE__, read_group, test_bgn);
2194
2195         /* update info for sims */
2196         reg_file_set_stage(CAL_STAGE_VFIFO);
2197
2198         write_group = read_group;
2199         write_test_bgn = test_bgn;
2200
2201         /* USER Determine number of delay taps for each phase tap */
2202         dtaps_per_ptap = DIV_ROUND_UP(IO_DELAY_PER_OPA_TAP,
2203                                       IO_DELAY_PER_DQS_EN_DCHAIN_TAP) - 1;
2204
2205         /* update info for sims */
2206         reg_file_set_group(read_group);
2207
2208         grp_calibrated = 0;
2209
2210         reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ);
2211         failed_substage = CAL_SUBSTAGE_GUARANTEED_READ;
2212
2213         for (d = 0; d <= dtaps_per_ptap && grp_calibrated == 0; d += 2) {
2214                 /*
2215                  * In RLDRAMX we may be messing the delay of pins in
2216                  * the same write group but outside of the current read
2217                  * the group, but that's ok because we haven't
2218                  * calibrated output side yet.
2219                  */
2220                 if (d > 0) {
2221                         scc_mgr_apply_group_all_out_delay_add_all_ranks(
2222                                                                 write_group, d);
2223                 }
2224
2225                 for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX && grp_calibrated == 0;
2226                         p++) {
2227                         /* set a particular dqdqs phase */
2228                         scc_mgr_set_dqdqs_output_phase_all_ranks(read_group, p);
2229
2230                         debug_cond(DLEVEL == 1, "%s:%d calibrate_vfifo: g=%u \
2231                                    p=%u d=%u\n", __func__, __LINE__,
2232                                    read_group, p, d);
2233
2234                         /*
2235                          * Load up the patterns used by read calibration
2236                          * using current DQDQS phase.
2237                          */
2238                         rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2239                         if (!(gbl->phy_debug_mode_flags &
2240                                 PHY_DEBUG_DISABLE_GUARANTEED_READ)) {
2241                                 if (!rw_mgr_mem_calibrate_read_test_patterns_all_ranks
2242                                     (read_group, 1, &bit_chk)) {
2243                                         debug_cond(DLEVEL == 1, "%s:%d Guaranteed read test failed:",
2244                                                    __func__, __LINE__);
2245                                         debug_cond(DLEVEL == 1, " g=%u p=%u d=%u\n",
2246                                                    read_group, p, d);
2247                                         break;
2248                                 }
2249                         }
2250
2251 /* case:56390 */
2252                         grp_calibrated = 1;
2253                 if (rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay
2254                     (write_group, read_group, test_bgn)) {
2255                                 /*
2256                                  * USER Read per-bit deskew can be done on a
2257                                  * per shadow register basis.
2258                                  */
2259                                 for (rank_bgn = 0, sr = 0;
2260                                         rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
2261                                         rank_bgn += NUM_RANKS_PER_SHADOW_REG,
2262                                         ++sr) {
2263                                         /*
2264                                          * Determine if this set of ranks
2265                                          * should be skipped entirely.
2266                                          */
2267                                         if (!param->skip_shadow_regs[sr]) {
2268                                                 /*
2269                                                  * If doing read after write
2270                                                  * calibration, do not update
2271                                                  * FOM, now - do it then.
2272                                                  */
2273                                         if (!rw_mgr_mem_calibrate_vfifo_center
2274                                                 (rank_bgn, write_group,
2275                                                 read_group, test_bgn, 1, 0)) {
2276                                                         grp_calibrated = 0;
2277                                                         failed_substage =
2278                                                 CAL_SUBSTAGE_VFIFO_CENTER;
2279                                                 }
2280                                         }
2281                                 }
2282                         } else {
2283                                 grp_calibrated = 0;
2284                                 failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE;
2285                         }
2286                 }
2287         }
2288
2289         if (grp_calibrated == 0) {
2290                 set_failing_group_stage(write_group, CAL_STAGE_VFIFO,
2291                                         failed_substage);
2292                 return 0;
2293         }
2294
2295         /*
2296          * Reset the delay chains back to zero if they have moved > 1
2297          * (check for > 1 because loop will increase d even when pass in
2298          * first case).
2299          */
2300         if (d > 2)
2301                 scc_mgr_zero_group(write_group, 1);
2302
2303         return 1;
2304 }
2305
2306 /* VFIFO Calibration -- Read Deskew Calibration after write deskew */
2307 static uint32_t rw_mgr_mem_calibrate_vfifo_end(uint32_t read_group,
2308                                                uint32_t test_bgn)
2309 {
2310         uint32_t rank_bgn, sr;
2311         uint32_t grp_calibrated;
2312         uint32_t write_group;
2313
2314         debug("%s:%d %u %u", __func__, __LINE__, read_group, test_bgn);
2315
2316         /* update info for sims */
2317
2318         reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES);
2319         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
2320
2321         write_group = read_group;
2322
2323         /* update info for sims */
2324         reg_file_set_group(read_group);
2325
2326         grp_calibrated = 1;
2327         /* Read per-bit deskew can be done on a per shadow register basis */
2328         for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
2329                 rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) {
2330                 /* Determine if this set of ranks should be skipped entirely */
2331                 if (!param->skip_shadow_regs[sr]) {
2332                 /* This is the last calibration round, update FOM here */
2333                         if (!rw_mgr_mem_calibrate_vfifo_center(rank_bgn,
2334                                                                 write_group,
2335                                                                 read_group,
2336                                                                 test_bgn, 0,
2337                                                                 1)) {
2338                                 grp_calibrated = 0;
2339                         }
2340                 }
2341         }
2342
2343
2344         if (grp_calibrated == 0) {
2345                 set_failing_group_stage(write_group,
2346                                         CAL_STAGE_VFIFO_AFTER_WRITES,
2347                                         CAL_SUBSTAGE_VFIFO_CENTER);
2348                 return 0;
2349         }
2350
2351         return 1;
2352 }
2353
2354 /* Calibrate LFIFO to find smallest read latency */
2355 static uint32_t rw_mgr_mem_calibrate_lfifo(void)
2356 {
2357         uint32_t found_one;
2358         uint32_t bit_chk;
2359
2360         debug("%s:%d\n", __func__, __LINE__);
2361
2362         /* update info for sims */
2363         reg_file_set_stage(CAL_STAGE_LFIFO);
2364         reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY);
2365
2366         /* Load up the patterns used by read calibration for all ranks */
2367         rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2368         found_one = 0;
2369
2370         do {
2371                 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2372                 debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u",
2373                            __func__, __LINE__, gbl->curr_read_lat);
2374
2375                 if (!rw_mgr_mem_calibrate_read_test_all_ranks(0,
2376                                                               NUM_READ_TESTS,
2377                                                               PASS_ALL_BITS,
2378                                                               &bit_chk, 1)) {
2379                         break;
2380                 }
2381
2382                 found_one = 1;
2383                 /* reduce read latency and see if things are working */
2384                 /* correctly */
2385                 gbl->curr_read_lat--;
2386         } while (gbl->curr_read_lat > 0);
2387
2388         /* reset the fifos to get pointers to known state */
2389
2390         writel(0, &phy_mgr_cmd->fifo_reset);
2391
2392         if (found_one) {
2393                 /* add a fudge factor to the read latency that was determined */
2394                 gbl->curr_read_lat += 2;
2395                 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2396                 debug_cond(DLEVEL == 2, "%s:%d lfifo: success: using \
2397                            read_lat=%u\n", __func__, __LINE__,
2398                            gbl->curr_read_lat);
2399                 return 1;
2400         } else {
2401                 set_failing_group_stage(0xff, CAL_STAGE_LFIFO,
2402                                         CAL_SUBSTAGE_READ_LATENCY);
2403
2404                 debug_cond(DLEVEL == 2, "%s:%d lfifo: failed at initial \
2405                            read_lat=%u\n", __func__, __LINE__,
2406                            gbl->curr_read_lat);
2407                 return 0;
2408         }
2409 }
2410
2411 /*
2412  * issue write test command.
2413  * two variants are provided. one that just tests a write pattern and
2414  * another that tests datamask functionality.
2415  */
2416 static void rw_mgr_mem_calibrate_write_test_issue(uint32_t group,
2417                                                   uint32_t test_dm)
2418 {
2419         uint32_t mcc_instruction;
2420         uint32_t quick_write_mode = (((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES) &&
2421                 ENABLE_SUPER_QUICK_CALIBRATION);
2422         uint32_t rw_wl_nop_cycles;
2423         uint32_t addr;
2424
2425         /*
2426          * Set counter and jump addresses for the right
2427          * number of NOP cycles.
2428          * The number of supported NOP cycles can range from -1 to infinity
2429          * Three different cases are handled:
2430          *
2431          * 1. For a number of NOP cycles greater than 0, the RW Mgr looping
2432          *    mechanism will be used to insert the right number of NOPs
2433          *
2434          * 2. For a number of NOP cycles equals to 0, the micro-instruction
2435          *    issuing the write command will jump straight to the
2436          *    micro-instruction that turns on DQS (for DDRx), or outputs write
2437          *    data (for RLD), skipping
2438          *    the NOP micro-instruction all together
2439          *
2440          * 3. A number of NOP cycles equal to -1 indicates that DQS must be
2441          *    turned on in the same micro-instruction that issues the write
2442          *    command. Then we need
2443          *    to directly jump to the micro-instruction that sends out the data
2444          *
2445          * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters
2446          *       (2 and 3). One jump-counter (0) is used to perform multiple
2447          *       write-read operations.
2448          *       one counter left to issue this command in "multiple-group" mode
2449          */
2450
2451         rw_wl_nop_cycles = gbl->rw_wl_nop_cycles;
2452
2453         if (rw_wl_nop_cycles == -1) {
2454                 /*
2455                  * CNTR 2 - We want to execute the special write operation that
2456                  * turns on DQS right away and then skip directly to the
2457                  * instruction that sends out the data. We set the counter to a
2458                  * large number so that the jump is always taken.
2459                  */
2460                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
2461
2462                 /* CNTR 3 - Not used */
2463                 if (test_dm) {
2464                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1;
2465                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA,
2466                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2467                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
2468                                &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2469                 } else {
2470                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0_WL_1;
2471                         writel(RW_MGR_LFSR_WR_RD_BANK_0_DATA,
2472                                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2473                         writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
2474                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2475                 }
2476         } else if (rw_wl_nop_cycles == 0) {
2477                 /*
2478                  * CNTR 2 - We want to skip the NOP operation and go straight
2479                  * to the DQS enable instruction. We set the counter to a large
2480                  * number so that the jump is always taken.
2481                  */
2482                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
2483
2484                 /* CNTR 3 - Not used */
2485                 if (test_dm) {
2486                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
2487                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS,
2488                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2489                 } else {
2490                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
2491                         writel(RW_MGR_LFSR_WR_RD_BANK_0_DQS,
2492                                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2493                 }
2494         } else {
2495                 /*
2496                  * CNTR 2 - In this case we want to execute the next instruction
2497                  * and NOT take the jump. So we set the counter to 0. The jump
2498                  * address doesn't count.
2499                  */
2500                 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2);
2501                 writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2502
2503                 /*
2504                  * CNTR 3 - Set the nop counter to the number of cycles we
2505                  * need to loop for, minus 1.
2506                  */
2507                 writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3);
2508                 if (test_dm) {
2509                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
2510                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
2511                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2512                 } else {
2513                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
2514                         writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
2515                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2516                 }
2517         }
2518
2519         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
2520                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
2521
2522         if (quick_write_mode)
2523                 writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0);
2524         else
2525                 writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0);
2526
2527         writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
2528
2529         /*
2530          * CNTR 1 - This is used to ensure enough time elapses
2531          * for read data to come back.
2532          */
2533         writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1);
2534
2535         if (test_dm) {
2536                 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT,
2537                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
2538         } else {
2539                 writel(RW_MGR_LFSR_WR_RD_BANK_0_WAIT,
2540                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
2541         }
2542
2543         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
2544         writel(mcc_instruction, addr + (group << 2));
2545 }
2546
2547 /* Test writes, can check for a single bit pass or multiple bit pass */
2548 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn,
2549         uint32_t write_group, uint32_t use_dm, uint32_t all_correct,
2550         uint32_t *bit_chk, uint32_t all_ranks)
2551 {
2552         uint32_t r;
2553         uint32_t correct_mask_vg;
2554         uint32_t tmp_bit_chk;
2555         uint32_t vg;
2556         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
2557                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
2558         uint32_t addr_rw_mgr;
2559         uint32_t base_rw_mgr;
2560
2561         *bit_chk = param->write_correct_mask;
2562         correct_mask_vg = param->write_correct_mask_vg;
2563
2564         for (r = rank_bgn; r < rank_end; r++) {
2565                 if (param->skip_ranks[r]) {
2566                         /* request to skip the rank */
2567                         continue;
2568                 }
2569
2570                 /* set rank */
2571                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
2572
2573                 tmp_bit_chk = 0;
2574                 addr_rw_mgr = SDR_PHYGRP_RWMGRGRP_ADDRESS;
2575                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS-1; ; vg--) {
2576                         /* reset the fifos to get pointers to known state */
2577                         writel(0, &phy_mgr_cmd->fifo_reset);
2578
2579                         tmp_bit_chk = tmp_bit_chk <<
2580                                 (RW_MGR_MEM_DQ_PER_WRITE_DQS /
2581                                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS);
2582                         rw_mgr_mem_calibrate_write_test_issue(write_group *
2583                                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS+vg,
2584                                 use_dm);
2585
2586                         base_rw_mgr = readl(addr_rw_mgr);
2587                         tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr));
2588                         if (vg == 0)
2589                                 break;
2590                 }
2591                 *bit_chk &= tmp_bit_chk;
2592         }
2593
2594         if (all_correct) {
2595                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
2596                 debug_cond(DLEVEL == 2, "write_test(%u,%u,ALL) : %u == \
2597                            %u => %lu", write_group, use_dm,
2598                            *bit_chk, param->write_correct_mask,
2599                            (long unsigned int)(*bit_chk ==
2600                            param->write_correct_mask));
2601                 return *bit_chk == param->write_correct_mask;
2602         } else {
2603                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
2604                 debug_cond(DLEVEL == 2, "write_test(%u,%u,ONE) : %u != ",
2605                        write_group, use_dm, *bit_chk);
2606                 debug_cond(DLEVEL == 2, "%lu" " => %lu", (long unsigned int)0,
2607                         (long unsigned int)(*bit_chk != 0));
2608                 return *bit_chk != 0x00;
2609         }
2610 }
2611
2612 /*
2613  * center all windows. do per-bit-deskew to possibly increase size of
2614  * certain windows.
2615  */
2616 static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn,
2617         uint32_t write_group, uint32_t test_bgn)
2618 {
2619         uint32_t i, p, min_index;
2620         int32_t d;
2621         /*
2622          * Store these as signed since there are comparisons with
2623          * signed numbers.
2624          */
2625         uint32_t bit_chk;
2626         uint32_t sticky_bit_chk;
2627         int32_t left_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
2628         int32_t right_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
2629         int32_t mid;
2630         int32_t mid_min, orig_mid_min;
2631         int32_t new_dqs, start_dqs, shift_dq;
2632         int32_t dq_margin, dqs_margin, dm_margin;
2633         uint32_t stop;
2634         uint32_t temp_dq_out1_delay;
2635         uint32_t addr;
2636
2637         debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn);
2638
2639         dm_margin = 0;
2640
2641         addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET;
2642         start_dqs = readl(addr +
2643                           (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2));
2644
2645         /* per-bit deskew */
2646
2647         /*
2648          * set the left and right edge of each bit to an illegal value
2649          * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value.
2650          */
2651         sticky_bit_chk = 0;
2652         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2653                 left_edge[i]  = IO_IO_OUT1_DELAY_MAX + 1;
2654                 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1;
2655         }
2656
2657         /* Search for the left edge of the window for each bit */
2658         for (d = 0; d <= IO_IO_OUT1_DELAY_MAX; d++) {
2659                 scc_mgr_apply_group_dq_out1_delay(write_group, d);
2660
2661                 writel(0, &sdr_scc_mgr->update);
2662
2663                 /*
2664                  * Stop searching when the read test doesn't pass AND when
2665                  * we've seen a passing read on every bit.
2666                  */
2667                 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
2668                         0, PASS_ONE_BIT, &bit_chk, 0);
2669                 sticky_bit_chk = sticky_bit_chk | bit_chk;
2670                 stop = stop && (sticky_bit_chk == param->write_correct_mask);
2671                 debug_cond(DLEVEL == 2, "write_center(left): dtap=%d => %u \
2672                            == %u && %u [bit_chk= %u ]\n",
2673                         d, sticky_bit_chk, param->write_correct_mask,
2674                         stop, bit_chk);
2675
2676                 if (stop == 1) {
2677                         break;
2678                 } else {
2679                         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2680                                 if (bit_chk & 1) {
2681                                         /*
2682                                          * Remember a passing test as the
2683                                          * left_edge.
2684                                          */
2685                                         left_edge[i] = d;
2686                                 } else {
2687                                         /*
2688                                          * If a left edge has not been seen
2689                                          * yet, then a future passing test will
2690                                          * mark this edge as the right edge.
2691                                          */
2692                                         if (left_edge[i] ==
2693                                                 IO_IO_OUT1_DELAY_MAX + 1) {
2694                                                 right_edge[i] = -(d + 1);
2695                                         }
2696                                 }
2697                                 debug_cond(DLEVEL == 2, "write_center[l,d=%d):", d);
2698                                 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d",
2699                                            (int)(bit_chk & 1), i, left_edge[i]);
2700                                 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2701                                        right_edge[i]);
2702                                 bit_chk = bit_chk >> 1;
2703                         }
2704                 }
2705         }
2706
2707         /* Reset DQ delay chains to 0 */
2708         scc_mgr_apply_group_dq_out1_delay(0);
2709         sticky_bit_chk = 0;
2710         for (i = RW_MGR_MEM_DQ_PER_WRITE_DQS - 1;; i--) {
2711                 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \
2712                            %d right_edge[%u]: %d\n", __func__, __LINE__,
2713                            i, left_edge[i], i, right_edge[i]);
2714
2715                 /*
2716                  * Check for cases where we haven't found the left edge,
2717                  * which makes our assignment of the the right edge invalid.
2718                  * Reset it to the illegal value.
2719                  */
2720                 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) &&
2721                     (right_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) {
2722                         right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1;
2723                         debug_cond(DLEVEL == 2, "%s:%d write_center: reset \
2724                                    right_edge[%u]: %d\n", __func__, __LINE__,
2725                                    i, right_edge[i]);
2726                 }
2727
2728                 /*
2729                  * Reset sticky bit (except for bits where we have
2730                  * seen the left edge).
2731                  */
2732                 sticky_bit_chk = sticky_bit_chk << 1;
2733                 if ((left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1))
2734                         sticky_bit_chk = sticky_bit_chk | 1;
2735
2736                 if (i == 0)
2737                         break;
2738         }
2739
2740         /* Search for the right edge of the window for each bit */
2741         for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - start_dqs; d++) {
2742                 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
2743                                                         d + start_dqs);
2744
2745                 writel(0, &sdr_scc_mgr->update);
2746
2747                 /*
2748                  * Stop searching when the read test doesn't pass AND when
2749                  * we've seen a passing read on every bit.
2750                  */
2751                 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
2752                         0, PASS_ONE_BIT, &bit_chk, 0);
2753
2754                 sticky_bit_chk = sticky_bit_chk | bit_chk;
2755                 stop = stop && (sticky_bit_chk == param->write_correct_mask);
2756
2757                 debug_cond(DLEVEL == 2, "write_center (right): dtap=%u => %u == \
2758                            %u && %u\n", d, sticky_bit_chk,
2759                            param->write_correct_mask, stop);
2760
2761                 if (stop == 1) {
2762                         if (d == 0) {
2763                                 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS;
2764                                         i++) {
2765                                         /* d = 0 failed, but it passed when
2766                                         testing the left edge, so it must be
2767                                         marginal, set it to -1 */
2768                                         if (right_edge[i] ==
2769                                                 IO_IO_OUT1_DELAY_MAX + 1 &&
2770                                                 left_edge[i] !=
2771                                                 IO_IO_OUT1_DELAY_MAX + 1) {
2772                                                 right_edge[i] = -1;
2773                                         }
2774                                 }
2775                         }
2776                         break;
2777                 } else {
2778                         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2779                                 if (bit_chk & 1) {
2780                                         /*
2781                                          * Remember a passing test as
2782                                          * the right_edge.
2783                                          */
2784                                         right_edge[i] = d;
2785                                 } else {
2786                                         if (d != 0) {
2787                                                 /*
2788                                                  * If a right edge has not
2789                                                  * been seen yet, then a future
2790                                                  * passing test will mark this
2791                                                  * edge as the left edge.
2792                                                  */
2793                                                 if (right_edge[i] ==
2794                                                     IO_IO_OUT1_DELAY_MAX + 1)
2795                                                         left_edge[i] = -(d + 1);
2796                                         } else {
2797                                                 /*
2798                                                  * d = 0 failed, but it passed
2799                                                  * when testing the left edge,
2800                                                  * so it must be marginal, set
2801                                                  * it to -1.
2802                                                  */
2803                                                 if (right_edge[i] ==
2804                                                     IO_IO_OUT1_DELAY_MAX + 1 &&
2805                                                     left_edge[i] !=
2806                                                     IO_IO_OUT1_DELAY_MAX + 1)
2807                                                         right_edge[i] = -1;
2808                                                 /*
2809                                                  * If a right edge has not been
2810                                                  * seen yet, then a future
2811                                                  * passing test will mark this
2812                                                  * edge as the left edge.
2813                                                  */
2814                                                 else if (right_edge[i] ==
2815                                                         IO_IO_OUT1_DELAY_MAX +
2816                                                         1)
2817                                                         left_edge[i] = -(d + 1);
2818                                         }
2819                                 }
2820                                 debug_cond(DLEVEL == 2, "write_center[r,d=%d):", d);
2821                                 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d",
2822                                            (int)(bit_chk & 1), i, left_edge[i]);
2823                                 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2824                                            right_edge[i]);
2825                                 bit_chk = bit_chk >> 1;
2826                         }
2827                 }
2828         }
2829
2830         /* Check that all bits have a window */
2831         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2832                 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \
2833                            %d right_edge[%u]: %d", __func__, __LINE__,
2834                            i, left_edge[i], i, right_edge[i]);
2835                 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) ||
2836                     (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1)) {
2837                         set_failing_group_stage(test_bgn + i,
2838                                                 CAL_STAGE_WRITES,
2839                                                 CAL_SUBSTAGE_WRITES_CENTER);
2840                         return 0;
2841                 }
2842         }
2843
2844         /* Find middle of window for each DQ bit */
2845         mid_min = left_edge[0] - right_edge[0];
2846         min_index = 0;
2847         for (i = 1; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2848                 mid = left_edge[i] - right_edge[i];
2849                 if (mid < mid_min) {
2850                         mid_min = mid;
2851                         min_index = i;
2852                 }
2853         }
2854
2855         /*
2856          * -mid_min/2 represents the amount that we need to move DQS.
2857          * If mid_min is odd and positive we'll need to add one to
2858          * make sure the rounding in further calculations is correct
2859          * (always bias to the right), so just add 1 for all positive values.
2860          */
2861         if (mid_min > 0)
2862                 mid_min++;
2863         mid_min = mid_min / 2;
2864         debug_cond(DLEVEL == 1, "%s:%d write_center: mid_min=%d\n", __func__,
2865                    __LINE__, mid_min);
2866
2867         /* Determine the amount we can change DQS (which is -mid_min) */
2868         orig_mid_min = mid_min;
2869         new_dqs = start_dqs;
2870         mid_min = 0;
2871         debug_cond(DLEVEL == 1, "%s:%d write_center: start_dqs=%d new_dqs=%d \
2872                    mid_min=%d\n", __func__, __LINE__, start_dqs, new_dqs, mid_min);
2873         /* Initialize data for export structures */
2874         dqs_margin = IO_IO_OUT1_DELAY_MAX + 1;
2875         dq_margin  = IO_IO_OUT1_DELAY_MAX + 1;
2876
2877         /* add delay to bring centre of all DQ windows to the same "level" */
2878         for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) {
2879                 /* Use values before divide by 2 to reduce round off error */
2880                 shift_dq = (left_edge[i] - right_edge[i] -
2881                         (left_edge[min_index] - right_edge[min_index]))/2  +
2882                 (orig_mid_min - mid_min);
2883
2884                 debug_cond(DLEVEL == 2, "%s:%d write_center: before: shift_dq \
2885                            [%u]=%d\n", __func__, __LINE__, i, shift_dq);
2886
2887                 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET;
2888                 temp_dq_out1_delay = readl(addr + (i << 2));
2889                 if (shift_dq + (int32_t)temp_dq_out1_delay >
2890                         (int32_t)IO_IO_OUT1_DELAY_MAX) {
2891                         shift_dq = (int32_t)IO_IO_OUT1_DELAY_MAX - temp_dq_out1_delay;
2892                 } else if (shift_dq + (int32_t)temp_dq_out1_delay < 0) {
2893                         shift_dq = -(int32_t)temp_dq_out1_delay;
2894                 }
2895                 debug_cond(DLEVEL == 2, "write_center: after: shift_dq[%u]=%d\n",
2896                            i, shift_dq);
2897                 scc_mgr_set_dq_out1_delay(i, temp_dq_out1_delay + shift_dq);
2898                 scc_mgr_load_dq(i);
2899
2900                 debug_cond(DLEVEL == 2, "write_center: margin[%u]=[%d,%d]\n", i,
2901                            left_edge[i] - shift_dq + (-mid_min),
2902                            right_edge[i] + shift_dq - (-mid_min));
2903                 /* To determine values for export structures */
2904                 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin)
2905                         dq_margin = left_edge[i] - shift_dq + (-mid_min);
2906
2907                 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin)
2908                         dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2909         }
2910
2911         /* Move DQS */
2912         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
2913         writel(0, &sdr_scc_mgr->update);
2914
2915         /* Centre DM */
2916         debug_cond(DLEVEL == 2, "%s:%d write_center: DM\n", __func__, __LINE__);
2917
2918         /*
2919          * set the left and right edge of each bit to an illegal value,
2920          * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value,
2921          */
2922         left_edge[0]  = IO_IO_OUT1_DELAY_MAX + 1;
2923         right_edge[0] = IO_IO_OUT1_DELAY_MAX + 1;
2924         int32_t bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2925         int32_t end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2926         int32_t bgn_best = IO_IO_OUT1_DELAY_MAX + 1;
2927         int32_t end_best = IO_IO_OUT1_DELAY_MAX + 1;
2928         int32_t win_best = 0;
2929
2930         /* Search for the/part of the window with DM shift */
2931         for (d = IO_IO_OUT1_DELAY_MAX; d >= 0; d -= DELTA_D) {
2932                 scc_mgr_apply_group_dm_out1_delay(d);
2933                 writel(0, &sdr_scc_mgr->update);
2934
2935                 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1,
2936                                                     PASS_ALL_BITS, &bit_chk,
2937                                                     0)) {
2938                         /* USE Set current end of the window */
2939                         end_curr = -d;
2940                         /*
2941                          * If a starting edge of our window has not been seen
2942                          * this is our current start of the DM window.
2943                          */
2944                         if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1)
2945                                 bgn_curr = -d;
2946
2947                         /*
2948                          * If current window is bigger than best seen.
2949                          * Set best seen to be current window.
2950                          */
2951                         if ((end_curr-bgn_curr+1) > win_best) {
2952                                 win_best = end_curr-bgn_curr+1;
2953                                 bgn_best = bgn_curr;
2954                                 end_best = end_curr;
2955                         }
2956                 } else {
2957                         /* We just saw a failing test. Reset temp edge */
2958                         bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2959                         end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2960                         }
2961                 }
2962
2963
2964         /* Reset DM delay chains to 0 */
2965         scc_mgr_apply_group_dm_out1_delay(0);
2966
2967         /*
2968          * Check to see if the current window nudges up aganist 0 delay.
2969          * If so we need to continue the search by shifting DQS otherwise DQS
2970          * search begins as a new search. */
2971         if (end_curr != 0) {
2972                 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2973                 end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2974         }
2975
2976         /* Search for the/part of the window with DQS shifts */
2977         for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - new_dqs; d += DELTA_D) {
2978                 /*
2979                  * Note: This only shifts DQS, so are we limiting ourselve to
2980                  * width of DQ unnecessarily.
2981                  */
2982                 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
2983                                                         d + new_dqs);
2984
2985                 writel(0, &sdr_scc_mgr->update);
2986                 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1,
2987                                                     PASS_ALL_BITS, &bit_chk,
2988                                                     0)) {
2989                         /* USE Set current end of the window */
2990                         end_curr = d;
2991                         /*
2992                          * If a beginning edge of our window has not been seen
2993                          * this is our current begin of the DM window.
2994                          */
2995                         if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1)
2996                                 bgn_curr = d;
2997
2998                         /*
2999                          * If current window is bigger than best seen. Set best
3000                          * seen to be current window.
3001                          */
3002                         if ((end_curr-bgn_curr+1) > win_best) {
3003                                 win_best = end_curr-bgn_curr+1;
3004                                 bgn_best = bgn_curr;
3005                                 end_best = end_curr;
3006                         }
3007                 } else {
3008                         /* We just saw a failing test. Reset temp edge */
3009                         bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
3010                         end_curr = IO_IO_OUT1_DELAY_MAX + 1;
3011
3012                         /* Early exit optimization: if ther remaining delay
3013                         chain space is less than already seen largest window
3014                         we can exit */
3015                         if ((win_best-1) >
3016                                 (IO_IO_OUT1_DELAY_MAX - new_dqs - d)) {
3017                                         break;
3018                                 }
3019                         }
3020                 }
3021
3022         /* assign left and right edge for cal and reporting; */
3023         left_edge[0] = -1*bgn_best;
3024         right_edge[0] = end_best;
3025
3026         debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n", __func__,
3027                    __LINE__, left_edge[0], right_edge[0]);
3028
3029         /* Move DQS (back to orig) */
3030         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
3031
3032         /* Move DM */
3033
3034         /* Find middle of window for the DM bit */
3035         mid = (left_edge[0] - right_edge[0]) / 2;
3036
3037         /* only move right, since we are not moving DQS/DQ */
3038         if (mid < 0)
3039                 mid = 0;
3040
3041         /* dm_marign should fail if we never find a window */
3042         if (win_best == 0)
3043                 dm_margin = -1;
3044         else
3045                 dm_margin = left_edge[0] - mid;
3046
3047         scc_mgr_apply_group_dm_out1_delay(mid);
3048         writel(0, &sdr_scc_mgr->update);
3049
3050         debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d mid=%d \
3051                    dm_margin=%d\n", __func__, __LINE__, left_edge[0],
3052                    right_edge[0], mid, dm_margin);
3053         /* Export values */
3054         gbl->fom_out += dq_margin + dqs_margin;
3055
3056         debug_cond(DLEVEL == 2, "%s:%d write_center: dq_margin=%d \
3057                    dqs_margin=%d dm_margin=%d\n", __func__, __LINE__,
3058                    dq_margin, dqs_margin, dm_margin);
3059
3060         /*
3061          * Do not remove this line as it makes sure all of our
3062          * decisions have been applied.
3063          */
3064         writel(0, &sdr_scc_mgr->update);
3065         return (dq_margin >= 0) && (dqs_margin >= 0) && (dm_margin >= 0);
3066 }
3067
3068 /* calibrate the write operations */
3069 static uint32_t rw_mgr_mem_calibrate_writes(uint32_t rank_bgn, uint32_t g,
3070         uint32_t test_bgn)
3071 {
3072         /* update info for sims */
3073         debug("%s:%d %u %u\n", __func__, __LINE__, g, test_bgn);
3074
3075         reg_file_set_stage(CAL_STAGE_WRITES);
3076         reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER);
3077
3078         reg_file_set_group(g);
3079
3080         if (!rw_mgr_mem_calibrate_writes_center(rank_bgn, g, test_bgn)) {
3081                 set_failing_group_stage(g, CAL_STAGE_WRITES,
3082                                         CAL_SUBSTAGE_WRITES_CENTER);
3083                 return 0;
3084         }
3085
3086         return 1;
3087 }
3088
3089 /* precharge all banks and activate row 0 in bank "000..." and bank "111..." */
3090 static void mem_precharge_and_activate(void)
3091 {
3092         uint32_t r;
3093
3094         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
3095                 if (param->skip_ranks[r]) {
3096                         /* request to skip the rank */
3097                         continue;
3098                 }
3099
3100                 /* set rank */
3101                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
3102
3103                 /* precharge all banks ... */
3104                 writel(RW_MGR_PRECHARGE_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3105                                              RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3106
3107                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0);
3108                 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT1,
3109                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
3110
3111                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1);
3112                 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT2,
3113                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
3114
3115                 /* activate rows */
3116                 writel(RW_MGR_ACTIVATE_0_AND_1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3117                                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3118         }
3119 }
3120
3121 /* Configure various memory related parameters. */
3122 static void mem_config(void)
3123 {
3124         uint32_t rlat, wlat;
3125         uint32_t rw_wl_nop_cycles;
3126         uint32_t max_latency;
3127
3128         debug("%s:%d\n", __func__, __LINE__);
3129         /* read in write and read latency */
3130         wlat = readl(&data_mgr->t_wl_add);
3131         wlat += readl(&data_mgr->mem_t_add);
3132
3133         /* WL for hard phy does not include additive latency */
3134
3135         /*
3136          * add addtional write latency to offset the address/command extra
3137          * clock cycle. We change the AC mux setting causing AC to be delayed
3138          * by one mem clock cycle. Only do this for DDR3
3139          */
3140         wlat = wlat + 1;
3141
3142         rlat = readl(&data_mgr->t_rl_add);
3143
3144         rw_wl_nop_cycles = wlat - 2;
3145         gbl->rw_wl_nop_cycles = rw_wl_nop_cycles;
3146
3147         /*
3148          * For AV/CV, lfifo is hardened and always runs at full rate so
3149          * max latency in AFI clocks, used here, is correspondingly smaller.
3150          */
3151         max_latency = (1<<MAX_LATENCY_COUNT_WIDTH)/1 - 1;
3152         /* configure for a burst length of 8 */
3153
3154         /* write latency */
3155         /* Adjust Write Latency for Hard PHY */
3156         wlat = wlat + 1;
3157
3158         /* set a pretty high read latency initially */
3159         gbl->curr_read_lat = rlat + 16;
3160
3161         if (gbl->curr_read_lat > max_latency)
3162                 gbl->curr_read_lat = max_latency;
3163
3164         writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3165
3166         /* advertise write latency */
3167         gbl->curr_write_lat = wlat;
3168         writel(wlat - 2, &phy_mgr_cfg->afi_wlat);
3169
3170         /* initialize bit slips */
3171         mem_precharge_and_activate();
3172 }
3173
3174 /* Set VFIFO and LFIFO to instant-on settings in skip calibration mode */
3175 static void mem_skip_calibrate(void)
3176 {
3177         uint32_t vfifo_offset;
3178         uint32_t i, j, r;
3179
3180         debug("%s:%d\n", __func__, __LINE__);
3181         /* Need to update every shadow register set used by the interface */
3182         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
3183                 r += NUM_RANKS_PER_SHADOW_REG) {
3184                 /*
3185                  * Set output phase alignment settings appropriate for
3186                  * skip calibration.
3187                  */
3188                 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3189                         scc_mgr_set_dqs_en_phase(i, 0);
3190 #if IO_DLL_CHAIN_LENGTH == 6
3191                         scc_mgr_set_dqdqs_output_phase(i, 6);
3192 #else
3193                         scc_mgr_set_dqdqs_output_phase(i, 7);
3194 #endif
3195                         /*
3196                          * Case:33398
3197                          *
3198                          * Write data arrives to the I/O two cycles before write
3199                          * latency is reached (720 deg).
3200                          *   -> due to bit-slip in a/c bus
3201                          *   -> to allow board skew where dqs is longer than ck
3202                          *      -> how often can this happen!?
3203                          *      -> can claim back some ptaps for high freq
3204                          *       support if we can relax this, but i digress...
3205                          *
3206                          * The write_clk leads mem_ck by 90 deg
3207                          * The minimum ptap of the OPA is 180 deg
3208                          * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay
3209                          * The write_clk is always delayed by 2 ptaps
3210                          *
3211                          * Hence, to make DQS aligned to CK, we need to delay
3212                          * DQS by:
3213                          *    (720 - 90 - 180 - 2 * (360 / IO_DLL_CHAIN_LENGTH))
3214                          *
3215                          * Dividing the above by (360 / IO_DLL_CHAIN_LENGTH)
3216                          * gives us the number of ptaps, which simplies to:
3217                          *
3218                          *    (1.25 * IO_DLL_CHAIN_LENGTH - 2)
3219                          */
3220                         scc_mgr_set_dqdqs_output_phase(i, (1.25 *
3221                                 IO_DLL_CHAIN_LENGTH - 2));
3222                 }
3223                 writel(0xff, &sdr_scc_mgr->dqs_ena);
3224                 writel(0xff, &sdr_scc_mgr->dqs_io_ena);
3225
3226                 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
3227                         writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3228                                   SCC_MGR_GROUP_COUNTER_OFFSET);
3229                 }
3230                 writel(0xff, &sdr_scc_mgr->dq_ena);
3231                 writel(0xff, &sdr_scc_mgr->dm_ena);
3232                 writel(0, &sdr_scc_mgr->update);
3233         }
3234
3235         /* Compensate for simulation model behaviour */
3236         for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3237                 scc_mgr_set_dqs_bus_in_delay(i, 10);
3238                 scc_mgr_load_dqs(i);
3239         }
3240         writel(0, &sdr_scc_mgr->update);
3241
3242         /*
3243          * ArriaV has hard FIFOs that can only be initialized by incrementing
3244          * in sequencer.
3245          */
3246         vfifo_offset = CALIB_VFIFO_OFFSET;
3247         for (j = 0; j < vfifo_offset; j++) {
3248                 writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy);
3249         }
3250         writel(0, &phy_mgr_cmd->fifo_reset);
3251
3252         /*
3253          * For ACV with hard lfifo, we get the skip-cal setting from
3254          * generation-time constant.
3255          */
3256         gbl->curr_read_lat = CALIB_LFIFO_OFFSET;
3257         writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3258 }
3259
3260 /* Memory calibration entry point */
3261 static uint32_t mem_calibrate(void)
3262 {
3263         uint32_t i;
3264         uint32_t rank_bgn, sr;
3265         uint32_t write_group, write_test_bgn;
3266         uint32_t read_group, read_test_bgn;
3267         uint32_t run_groups, current_run;
3268         uint32_t failing_groups = 0;
3269         uint32_t group_failed = 0;
3270         uint32_t sr_failed = 0;
3271
3272         debug("%s:%d\n", __func__, __LINE__);
3273         /* Initialize the data settings */
3274
3275         gbl->error_substage = CAL_SUBSTAGE_NIL;
3276         gbl->error_stage = CAL_STAGE_NIL;
3277         gbl->error_group = 0xff;
3278         gbl->fom_in = 0;
3279         gbl->fom_out = 0;
3280
3281         mem_config();
3282
3283         for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3284                 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3285                           SCC_MGR_GROUP_COUNTER_OFFSET);
3286                 /* Only needed once to set all groups, pins, DQ, DQS, DM. */
3287                 if (i == 0)
3288                         scc_mgr_set_hhp_extras();
3289
3290                 scc_set_bypass_mode(i);
3291         }
3292
3293         if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) {
3294                 /*
3295                  * Set VFIFO and LFIFO to instant-on settings in skip
3296                  * calibration mode.
3297                  */
3298                 mem_skip_calibrate();
3299         } else {
3300                 for (i = 0; i < NUM_CALIB_REPEAT; i++) {
3301                         /*
3302                          * Zero all delay chain/phase settings for all
3303                          * groups and all shadow register sets.
3304                          */
3305                         scc_mgr_zero_all();
3306
3307                         run_groups = ~param->skip_groups;
3308
3309                         for (write_group = 0, write_test_bgn = 0; write_group
3310                                 < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; write_group++,
3311                                 write_test_bgn += RW_MGR_MEM_DQ_PER_WRITE_DQS) {
3312                                 /* Initialized the group failure */
3313                                 group_failed = 0;
3314
3315                                 current_run = run_groups & ((1 <<
3316                                         RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1);
3317                                 run_groups = run_groups >>
3318                                         RW_MGR_NUM_DQS_PER_WRITE_GROUP;
3319
3320                                 if (current_run == 0)
3321                                         continue;
3322
3323                                 writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS |
3324                                                     SCC_MGR_GROUP_COUNTER_OFFSET);
3325                                 scc_mgr_zero_group(write_group, 0);
3326
3327                                 for (read_group = write_group *
3328                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
3329                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3330                                         read_test_bgn = 0;
3331                                         read_group < (write_group + 1) *
3332                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
3333                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH &&
3334                                         group_failed == 0;
3335                                         read_group++, read_test_bgn +=
3336                                         RW_MGR_MEM_DQ_PER_READ_DQS) {
3337                                         /* Calibrate the VFIFO */
3338                                         if (!((STATIC_CALIB_STEPS) &
3339                                                 CALIB_SKIP_VFIFO)) {
3340                                                 if (!rw_mgr_mem_calibrate_vfifo
3341                                                         (read_group,
3342                                                         read_test_bgn)) {
3343                                                         group_failed = 1;
3344
3345                                                         if (!(gbl->
3346                                                         phy_debug_mode_flags &
3347                                                 PHY_DEBUG_SWEEP_ALL_GROUPS)) {
3348                                                                 return 0;
3349                                                         }
3350                                                 }
3351                                         }
3352                                 }
3353
3354                                 /* Calibrate the output side */
3355                                 if (group_failed == 0)  {
3356                                         for (rank_bgn = 0, sr = 0; rank_bgn
3357                                                 < RW_MGR_MEM_NUMBER_OF_RANKS;
3358                                                 rank_bgn +=
3359                                                 NUM_RANKS_PER_SHADOW_REG,
3360                                                 ++sr) {
3361                                                 sr_failed = 0;
3362                                                 if (!((STATIC_CALIB_STEPS) &
3363                                                 CALIB_SKIP_WRITES)) {
3364                                                         if ((STATIC_CALIB_STEPS)
3365                                                 & CALIB_SKIP_DELAY_SWEEPS) {
3366                                                 /* not needed in quick mode! */
3367                                                         } else {
3368                                                 /*
3369                                                  * Determine if this set of
3370                                                  * ranks should be skipped
3371                                                  * entirely.
3372                                                  */
3373                                         if (!param->skip_shadow_regs[sr]) {
3374                                                 if (!rw_mgr_mem_calibrate_writes
3375                                                 (rank_bgn, write_group,
3376                                                 write_test_bgn)) {
3377                                                         sr_failed = 1;
3378                                                         if (!(gbl->
3379                                                         phy_debug_mode_flags &
3380                                                 PHY_DEBUG_SWEEP_ALL_GROUPS)) {
3381                                                                 return 0;
3382                                                                         }
3383                                                                         }
3384                                                                 }
3385                                                         }
3386                                                 }
3387                                                 if (sr_failed != 0)
3388                                                         group_failed = 1;
3389                                         }
3390                                 }
3391
3392                                 if (group_failed == 0) {
3393                                         for (read_group = write_group *
3394                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
3395                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3396                                         read_test_bgn = 0;
3397                                                 read_group < (write_group + 1)
3398                                                 * RW_MGR_MEM_IF_READ_DQS_WIDTH
3399                                                 / RW_MGR_MEM_IF_WRITE_DQS_WIDTH &&
3400                                                 group_failed == 0;
3401                                                 read_group++, read_test_bgn +=
3402                                                 RW_MGR_MEM_DQ_PER_READ_DQS) {
3403                                                 if (!((STATIC_CALIB_STEPS) &
3404                                                         CALIB_SKIP_WRITES)) {
3405                                         if (!rw_mgr_mem_calibrate_vfifo_end
3406                                                 (read_group, read_test_bgn)) {
3407                                                         group_failed = 1;
3408
3409                                                 if (!(gbl->phy_debug_mode_flags
3410                                                 & PHY_DEBUG_SWEEP_ALL_GROUPS)) {
3411                                                                 return 0;
3412                                                                 }
3413                                                         }
3414                                                 }
3415                                         }
3416                                 }
3417
3418                                 if (group_failed != 0)
3419                                         failing_groups++;
3420                         }
3421
3422                         /*
3423                          * USER If there are any failing groups then report
3424                          * the failure.
3425                          */
3426                         if (failing_groups != 0)
3427                                 return 0;
3428
3429                         /* Calibrate the LFIFO */
3430                         if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_LFIFO)) {
3431                                 /*
3432                                  * If we're skipping groups as part of debug,
3433                                  * don't calibrate LFIFO.
3434                                  */
3435                                 if (param->skip_groups == 0) {
3436                                         if (!rw_mgr_mem_calibrate_lfifo())
3437                                                 return 0;
3438                                 }
3439                         }
3440                 }
3441         }
3442
3443         /*
3444          * Do not remove this line as it makes sure all of our decisions
3445          * have been applied.
3446          */
3447         writel(0, &sdr_scc_mgr->update);
3448         return 1;
3449 }
3450
3451 /**
3452  * run_mem_calibrate() - Perform memory calibration
3453  *
3454  * This function triggers the entire memory calibration procedure.
3455  */
3456 static int run_mem_calibrate(void)
3457 {
3458         int pass;
3459
3460         debug("%s:%d\n", __func__, __LINE__);
3461
3462         /* Reset pass/fail status shown on afi_cal_success/fail */
3463         writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status);
3464
3465         /* Stop tracking manager. */
3466         clrbits_le32(&sdr_ctrl->ctrl_cfg, 1 << 22);
3467
3468         phy_mgr_initialize();
3469         rw_mgr_mem_initialize();
3470
3471         /* Perform the actual memory calibration. */
3472         pass = mem_calibrate();
3473
3474         mem_precharge_and_activate();
3475         writel(0, &phy_mgr_cmd->fifo_reset);
3476
3477         /* Handoff. */
3478         rw_mgr_mem_handoff();
3479         /*
3480          * In Hard PHY this is a 2-bit control:
3481          * 0: AFI Mux Select
3482          * 1: DDIO Mux Select
3483          */
3484         writel(0x2, &phy_mgr_cfg->mux_sel);
3485
3486         /* Start tracking manager. */
3487         setbits_le32(&sdr_ctrl->ctrl_cfg, 1 << 22);
3488
3489         return pass;
3490 }
3491
3492 /**
3493  * debug_mem_calibrate() - Report result of memory calibration
3494  * @pass:       Value indicating whether calibration passed or failed
3495  *
3496  * This function reports the results of the memory calibration
3497  * and writes debug information into the register file.
3498  */
3499 static void debug_mem_calibrate(int pass)
3500 {
3501         uint32_t debug_info;
3502
3503         if (pass) {
3504                 printf("%s: CALIBRATION PASSED\n", __FILE__);
3505
3506                 gbl->fom_in /= 2;
3507                 gbl->fom_out /= 2;
3508
3509                 if (gbl->fom_in > 0xff)
3510                         gbl->fom_in = 0xff;
3511
3512                 if (gbl->fom_out > 0xff)
3513                         gbl->fom_out = 0xff;
3514
3515                 /* Update the FOM in the register file */
3516                 debug_info = gbl->fom_in;
3517                 debug_info |= gbl->fom_out << 8;
3518                 writel(debug_info, &sdr_reg_file->fom);
3519
3520                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3521                 writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status);
3522         } else {
3523                 printf("%s: CALIBRATION FAILED\n", __FILE__);
3524
3525                 debug_info = gbl->error_stage;
3526                 debug_info |= gbl->error_substage << 8;
3527                 debug_info |= gbl->error_group << 16;
3528
3529                 writel(debug_info, &sdr_reg_file->failing_stage);
3530                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3531                 writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status);
3532
3533                 /* Update the failing group/stage in the register file */
3534                 debug_info = gbl->error_stage;
3535                 debug_info |= gbl->error_substage << 8;
3536                 debug_info |= gbl->error_group << 16;
3537                 writel(debug_info, &sdr_reg_file->failing_stage);
3538         }
3539
3540         printf("%s: Calibration complete\n", __FILE__);
3541 }
3542
3543 /**
3544  * hc_initialize_rom_data() - Initialize ROM data
3545  *
3546  * Initialize ROM data.
3547  */
3548 static void hc_initialize_rom_data(void)
3549 {
3550         u32 i, addr;
3551
3552         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET;
3553         for (i = 0; i < ARRAY_SIZE(inst_rom_init); i++)
3554                 writel(inst_rom_init[i], addr + (i << 2));
3555
3556         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET;
3557         for (i = 0; i < ARRAY_SIZE(ac_rom_init); i++)
3558                 writel(ac_rom_init[i], addr + (i << 2));
3559 }
3560
3561 /**
3562  * initialize_reg_file() - Initialize SDR register file
3563  *
3564  * Initialize SDR register file.
3565  */
3566 static void initialize_reg_file(void)
3567 {
3568         /* Initialize the register file with the correct data */
3569         writel(REG_FILE_INIT_SEQ_SIGNATURE, &sdr_reg_file->signature);
3570         writel(0, &sdr_reg_file->debug_data_addr);
3571         writel(0, &sdr_reg_file->cur_stage);
3572         writel(0, &sdr_reg_file->fom);
3573         writel(0, &sdr_reg_file->failing_stage);
3574         writel(0, &sdr_reg_file->debug1);
3575         writel(0, &sdr_reg_file->debug2);
3576 }
3577
3578 /**
3579  * initialize_hps_phy() - Initialize HPS PHY
3580  *
3581  * Initialize HPS PHY.
3582  */
3583 static void initialize_hps_phy(void)
3584 {
3585         uint32_t reg;
3586         /*
3587          * Tracking also gets configured here because it's in the
3588          * same register.
3589          */
3590         uint32_t trk_sample_count = 7500;
3591         uint32_t trk_long_idle_sample_count = (10 << 16) | 100;
3592         /*
3593          * Format is number of outer loops in the 16 MSB, sample
3594          * count in 16 LSB.
3595          */
3596
3597         reg = 0;
3598         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2);
3599         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1);
3600         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1);
3601         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1);
3602         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0);
3603         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1);
3604         /*
3605          * This field selects the intrinsic latency to RDATA_EN/FULL path.
3606          * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles.
3607          */
3608         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0);
3609         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET(
3610                 trk_sample_count);
3611         writel(reg, &sdr_ctrl->phy_ctrl0);
3612
3613         reg = 0;
3614         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET(
3615                 trk_sample_count >>
3616                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH);
3617         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET(
3618                 trk_long_idle_sample_count);
3619         writel(reg, &sdr_ctrl->phy_ctrl1);
3620
3621         reg = 0;
3622         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET(
3623                 trk_long_idle_sample_count >>
3624                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH);
3625         writel(reg, &sdr_ctrl->phy_ctrl2);
3626 }
3627
3628 /**
3629  * initialize_tracking() - Initialize tracking
3630  *
3631  * Initialize the register file with usable initial data.
3632  */
3633 static void initialize_tracking(void)
3634 {
3635         /*
3636          * Initialize the register file with the correct data.
3637          * Compute usable version of value in case we skip full
3638          * computation later.
3639          */
3640         writel(DIV_ROUND_UP(IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP) - 1,
3641                &sdr_reg_file->dtaps_per_ptap);
3642
3643         /* trk_sample_count */
3644         writel(7500, &sdr_reg_file->trk_sample_count);
3645
3646         /* longidle outer loop [15:0] */
3647         writel((10 << 16) | (100 << 0), &sdr_reg_file->trk_longidle);
3648
3649         /*
3650          * longidle sample count [31:24]
3651          * trfc, worst case of 933Mhz 4Gb [23:16]
3652          * trcd, worst case [15:8]
3653          * vfifo wait [7:0]
3654          */
3655         writel((243 << 24) | (14 << 16) | (10 << 8) | (4 << 0),
3656                &sdr_reg_file->delays);
3657
3658         /* mux delay */
3659         writel((RW_MGR_IDLE << 24) | (RW_MGR_ACTIVATE_1 << 16) |
3660                (RW_MGR_SGLE_READ << 8) | (RW_MGR_PRECHARGE_ALL << 0),
3661                &sdr_reg_file->trk_rw_mgr_addr);
3662
3663         writel(RW_MGR_MEM_IF_READ_DQS_WIDTH,
3664                &sdr_reg_file->trk_read_dqs_width);
3665
3666         /* trefi [7:0] */
3667         writel((RW_MGR_REFRESH_ALL << 24) | (1000 << 0),
3668                &sdr_reg_file->trk_rfsh);
3669 }
3670
3671 int sdram_calibration_full(void)
3672 {
3673         struct param_type my_param;
3674         struct gbl_type my_gbl;
3675         uint32_t pass;
3676
3677         memset(&my_param, 0, sizeof(my_param));
3678         memset(&my_gbl, 0, sizeof(my_gbl));
3679
3680         param = &my_param;
3681         gbl = &my_gbl;
3682
3683         /* Set the calibration enabled by default */
3684         gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT;
3685         /*
3686          * Only sweep all groups (regardless of fail state) by default
3687          * Set enabled read test by default.
3688          */
3689 #if DISABLE_GUARANTEED_READ
3690         gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ;
3691 #endif
3692         /* Initialize the register file */
3693         initialize_reg_file();
3694
3695         /* Initialize any PHY CSR */
3696         initialize_hps_phy();
3697
3698         scc_mgr_initialize();
3699
3700         initialize_tracking();
3701
3702         printf("%s: Preparing to start memory calibration\n", __FILE__);
3703
3704         debug("%s:%d\n", __func__, __LINE__);
3705         debug_cond(DLEVEL == 1,
3706                    "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ",
3707                    RW_MGR_MEM_NUMBER_OF_RANKS, RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM,
3708                    RW_MGR_MEM_DQ_PER_READ_DQS, RW_MGR_MEM_DQ_PER_WRITE_DQS,
3709                    RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS,
3710                    RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS);
3711         debug_cond(DLEVEL == 1,
3712                    "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ",
3713                    RW_MGR_MEM_IF_READ_DQS_WIDTH, RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3714                    RW_MGR_MEM_DATA_WIDTH, RW_MGR_MEM_DATA_MASK_WIDTH,
3715                    IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP);
3716         debug_cond(DLEVEL == 1, "dtap_dqsen_delay=%u, dll=%u",
3717                    IO_DELAY_PER_DQS_EN_DCHAIN_TAP, IO_DLL_CHAIN_LENGTH);
3718         debug_cond(DLEVEL == 1, "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ",
3719                    IO_DQS_EN_PHASE_MAX, IO_DQDQS_OUT_PHASE_MAX,
3720                    IO_DQS_EN_DELAY_MAX, IO_DQS_IN_DELAY_MAX);
3721         debug_cond(DLEVEL == 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ",
3722                    IO_IO_IN_DELAY_MAX, IO_IO_OUT1_DELAY_MAX,
3723                    IO_IO_OUT2_DELAY_MAX);
3724         debug_cond(DLEVEL == 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n",
3725                    IO_DQS_IN_RESERVE, IO_DQS_OUT_RESERVE);
3726
3727         hc_initialize_rom_data();
3728
3729         /* update info for sims */
3730         reg_file_set_stage(CAL_STAGE_NIL);
3731         reg_file_set_group(0);
3732
3733         /*
3734          * Load global needed for those actions that require
3735          * some dynamic calibration support.
3736          */
3737         dyn_calib_steps = STATIC_CALIB_STEPS;
3738         /*
3739          * Load global to allow dynamic selection of delay loop settings
3740          * based on calibration mode.
3741          */
3742         if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS))
3743                 skip_delay_mask = 0xff;
3744         else
3745                 skip_delay_mask = 0x0;
3746
3747         pass = run_mem_calibrate();
3748         debug_mem_calibrate(pass);
3749         return pass;
3750 }