]> git.kernelconcepts.de Git - karo-tx-uboot.git/blob - drivers/ddr/altera/sequencer.c
karo: fdt: fix panel-dpi support
[karo-tx-uboot.git] / drivers / ddr / altera / sequencer.c
1 /*
2  * Copyright Altera Corporation (C) 2012-2015
3  *
4  * SPDX-License-Identifier:    BSD-3-Clause
5  */
6
7 #include <common.h>
8 #include <asm/io.h>
9 #include <asm/arch/sdram.h>
10 #include <errno.h>
11 #include "sequencer.h"
12
13 static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs =
14         (struct socfpga_sdr_rw_load_manager *)
15                 (SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800);
16 static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs =
17         (struct socfpga_sdr_rw_load_jump_manager *)
18                 (SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00);
19 static struct socfpga_sdr_reg_file *sdr_reg_file =
20         (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS;
21 static struct socfpga_sdr_scc_mgr *sdr_scc_mgr =
22         (struct socfpga_sdr_scc_mgr *)
23                 (SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00);
24 static struct socfpga_phy_mgr_cmd *phy_mgr_cmd =
25         (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS;
26 static struct socfpga_phy_mgr_cfg *phy_mgr_cfg =
27         (struct socfpga_phy_mgr_cfg *)
28                 (SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40);
29 static struct socfpga_data_mgr *data_mgr =
30         (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS;
31 static struct socfpga_sdr_ctrl *sdr_ctrl =
32         (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS;
33
34 const struct socfpga_sdram_rw_mgr_config *rwcfg;
35 const struct socfpga_sdram_io_config *iocfg;
36 const struct socfpga_sdram_misc_config *misccfg;
37
38 #define DELTA_D         1
39
40 /*
41  * In order to reduce ROM size, most of the selectable calibration steps are
42  * decided at compile time based on the user's calibration mode selection,
43  * as captured by the STATIC_CALIB_STEPS selection below.
44  *
45  * However, to support simulation-time selection of fast simulation mode, where
46  * we skip everything except the bare minimum, we need a few of the steps to
47  * be dynamic.  In those cases, we either use the DYNAMIC_CALIB_STEPS for the
48  * check, which is based on the rtl-supplied value, or we dynamically compute
49  * the value to use based on the dynamically-chosen calibration mode
50  */
51
52 #define DLEVEL 0
53 #define STATIC_IN_RTL_SIM 0
54 #define STATIC_SKIP_DELAY_LOOPS 0
55
56 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \
57         STATIC_SKIP_DELAY_LOOPS)
58
59 /* calibration steps requested by the rtl */
60 u16 dyn_calib_steps;
61
62 /*
63  * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option
64  * instead of static, we use boolean logic to select between
65  * non-skip and skip values
66  *
67  * The mask is set to include all bits when not-skipping, but is
68  * zero when skipping
69  */
70
71 u16 skip_delay_mask;    /* mask off bits when skipping/not-skipping */
72
73 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \
74         ((non_skip_value) & skip_delay_mask)
75
76 struct gbl_type *gbl;
77 struct param_type *param;
78
79 static void set_failing_group_stage(u32 group, u32 stage,
80         u32 substage)
81 {
82         /*
83          * Only set the global stage if there was not been any other
84          * failing group
85          */
86         if (gbl->error_stage == CAL_STAGE_NIL)  {
87                 gbl->error_substage = substage;
88                 gbl->error_stage = stage;
89                 gbl->error_group = group;
90         }
91 }
92
93 static void reg_file_set_group(u16 set_group)
94 {
95         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16);
96 }
97
98 static void reg_file_set_stage(u8 set_stage)
99 {
100         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff);
101 }
102
103 static void reg_file_set_sub_stage(u8 set_sub_stage)
104 {
105         set_sub_stage &= 0xff;
106         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8);
107 }
108
109 /**
110  * phy_mgr_initialize() - Initialize PHY Manager
111  *
112  * Initialize PHY Manager.
113  */
114 static void phy_mgr_initialize(void)
115 {
116         u32 ratio;
117
118         debug("%s:%d\n", __func__, __LINE__);
119         /* Calibration has control over path to memory */
120         /*
121          * In Hard PHY this is a 2-bit control:
122          * 0: AFI Mux Select
123          * 1: DDIO Mux Select
124          */
125         writel(0x3, &phy_mgr_cfg->mux_sel);
126
127         /* USER memory clock is not stable we begin initialization  */
128         writel(0, &phy_mgr_cfg->reset_mem_stbl);
129
130         /* USER calibration status all set to zero */
131         writel(0, &phy_mgr_cfg->cal_status);
132
133         writel(0, &phy_mgr_cfg->cal_debug_info);
134
135         /* Init params only if we do NOT skip calibration. */
136         if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL)
137                 return;
138
139         ratio = rwcfg->mem_dq_per_read_dqs /
140                 rwcfg->mem_virtual_groups_per_read_dqs;
141         param->read_correct_mask_vg = (1 << ratio) - 1;
142         param->write_correct_mask_vg = (1 << ratio) - 1;
143         param->read_correct_mask = (1 << rwcfg->mem_dq_per_read_dqs) - 1;
144         param->write_correct_mask = (1 << rwcfg->mem_dq_per_write_dqs) - 1;
145 }
146
147 /**
148  * set_rank_and_odt_mask() - Set Rank and ODT mask
149  * @rank:       Rank mask
150  * @odt_mode:   ODT mode, OFF or READ_WRITE
151  *
152  * Set Rank and ODT mask (On-Die Termination).
153  */
154 static void set_rank_and_odt_mask(const u32 rank, const u32 odt_mode)
155 {
156         u32 odt_mask_0 = 0;
157         u32 odt_mask_1 = 0;
158         u32 cs_and_odt_mask;
159
160         if (odt_mode == RW_MGR_ODT_MODE_OFF) {
161                 odt_mask_0 = 0x0;
162                 odt_mask_1 = 0x0;
163         } else {        /* RW_MGR_ODT_MODE_READ_WRITE */
164                 switch (rwcfg->mem_number_of_ranks) {
165                 case 1: /* 1 Rank */
166                         /* Read: ODT = 0 ; Write: ODT = 1 */
167                         odt_mask_0 = 0x0;
168                         odt_mask_1 = 0x1;
169                         break;
170                 case 2: /* 2 Ranks */
171                         if (rwcfg->mem_number_of_cs_per_dimm == 1) {
172                                 /*
173                                  * - Dual-Slot , Single-Rank (1 CS per DIMM)
174                                  *   OR
175                                  * - RDIMM, 4 total CS (2 CS per DIMM, 2 DIMM)
176                                  *
177                                  * Since MEM_NUMBER_OF_RANKS is 2, they
178                                  * are both single rank with 2 CS each
179                                  * (special for RDIMM).
180                                  *
181                                  * Read: Turn on ODT on the opposite rank
182                                  * Write: Turn on ODT on all ranks
183                                  */
184                                 odt_mask_0 = 0x3 & ~(1 << rank);
185                                 odt_mask_1 = 0x3;
186                         } else {
187                                 /*
188                                  * - Single-Slot , Dual-Rank (2 CS per DIMM)
189                                  *
190                                  * Read: Turn on ODT off on all ranks
191                                  * Write: Turn on ODT on active rank
192                                  */
193                                 odt_mask_0 = 0x0;
194                                 odt_mask_1 = 0x3 & (1 << rank);
195                         }
196                         break;
197                 case 4: /* 4 Ranks */
198                         /* Read:
199                          * ----------+-----------------------+
200                          *           |         ODT           |
201                          * Read From +-----------------------+
202                          *   Rank    |  3  |  2  |  1  |  0  |
203                          * ----------+-----+-----+-----+-----+
204                          *     0     |  0  |  1  |  0  |  0  |
205                          *     1     |  1  |  0  |  0  |  0  |
206                          *     2     |  0  |  0  |  0  |  1  |
207                          *     3     |  0  |  0  |  1  |  0  |
208                          * ----------+-----+-----+-----+-----+
209                          *
210                          * Write:
211                          * ----------+-----------------------+
212                          *           |         ODT           |
213                          * Write To  +-----------------------+
214                          *   Rank    |  3  |  2  |  1  |  0  |
215                          * ----------+-----+-----+-----+-----+
216                          *     0     |  0  |  1  |  0  |  1  |
217                          *     1     |  1  |  0  |  1  |  0  |
218                          *     2     |  0  |  1  |  0  |  1  |
219                          *     3     |  1  |  0  |  1  |  0  |
220                          * ----------+-----+-----+-----+-----+
221                          */
222                         switch (rank) {
223                         case 0:
224                                 odt_mask_0 = 0x4;
225                                 odt_mask_1 = 0x5;
226                                 break;
227                         case 1:
228                                 odt_mask_0 = 0x8;
229                                 odt_mask_1 = 0xA;
230                                 break;
231                         case 2:
232                                 odt_mask_0 = 0x1;
233                                 odt_mask_1 = 0x5;
234                                 break;
235                         case 3:
236                                 odt_mask_0 = 0x2;
237                                 odt_mask_1 = 0xA;
238                                 break;
239                         }
240                         break;
241                 }
242         }
243
244         cs_and_odt_mask = (0xFF & ~(1 << rank)) |
245                           ((0xFF & odt_mask_0) << 8) |
246                           ((0xFF & odt_mask_1) << 16);
247         writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS |
248                                 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
249 }
250
251 /**
252  * scc_mgr_set() - Set SCC Manager register
253  * @off:        Base offset in SCC Manager space
254  * @grp:        Read/Write group
255  * @val:        Value to be set
256  *
257  * This function sets the SCC Manager (Scan Chain Control Manager) register.
258  */
259 static void scc_mgr_set(u32 off, u32 grp, u32 val)
260 {
261         writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2));
262 }
263
264 /**
265  * scc_mgr_initialize() - Initialize SCC Manager registers
266  *
267  * Initialize SCC Manager registers.
268  */
269 static void scc_mgr_initialize(void)
270 {
271         /*
272          * Clear register file for HPS. 16 (2^4) is the size of the
273          * full register file in the scc mgr:
274          *      RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS +
275          *                             MEM_IF_READ_DQS_WIDTH - 1);
276          */
277         int i;
278
279         for (i = 0; i < 16; i++) {
280                 debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u\n",
281                            __func__, __LINE__, i);
282                 scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, 0, i);
283         }
284 }
285
286 static void scc_mgr_set_dqdqs_output_phase(u32 write_group, u32 phase)
287 {
288         scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase);
289 }
290
291 static void scc_mgr_set_dqs_bus_in_delay(u32 read_group, u32 delay)
292 {
293         scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay);
294 }
295
296 static void scc_mgr_set_dqs_en_phase(u32 read_group, u32 phase)
297 {
298         scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase);
299 }
300
301 static void scc_mgr_set_dqs_en_delay(u32 read_group, u32 delay)
302 {
303         scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay);
304 }
305
306 static void scc_mgr_set_dqs_io_in_delay(u32 delay)
307 {
308         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, rwcfg->mem_dq_per_write_dqs,
309                     delay);
310 }
311
312 static void scc_mgr_set_dq_in_delay(u32 dq_in_group, u32 delay)
313 {
314         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay);
315 }
316
317 static void scc_mgr_set_dq_out1_delay(u32 dq_in_group, u32 delay)
318 {
319         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay);
320 }
321
322 static void scc_mgr_set_dqs_out1_delay(u32 delay)
323 {
324         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, rwcfg->mem_dq_per_write_dqs,
325                     delay);
326 }
327
328 static void scc_mgr_set_dm_out1_delay(u32 dm, u32 delay)
329 {
330         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET,
331                     rwcfg->mem_dq_per_write_dqs + 1 + dm,
332                     delay);
333 }
334
335 /* load up dqs config settings */
336 static void scc_mgr_load_dqs(u32 dqs)
337 {
338         writel(dqs, &sdr_scc_mgr->dqs_ena);
339 }
340
341 /* load up dqs io config settings */
342 static void scc_mgr_load_dqs_io(void)
343 {
344         writel(0, &sdr_scc_mgr->dqs_io_ena);
345 }
346
347 /* load up dq config settings */
348 static void scc_mgr_load_dq(u32 dq_in_group)
349 {
350         writel(dq_in_group, &sdr_scc_mgr->dq_ena);
351 }
352
353 /* load up dm config settings */
354 static void scc_mgr_load_dm(u32 dm)
355 {
356         writel(dm, &sdr_scc_mgr->dm_ena);
357 }
358
359 /**
360  * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks
361  * @off:        Base offset in SCC Manager space
362  * @grp:        Read/Write group
363  * @val:        Value to be set
364  * @update:     If non-zero, trigger SCC Manager update for all ranks
365  *
366  * This function sets the SCC Manager (Scan Chain Control Manager) register
367  * and optionally triggers the SCC update for all ranks.
368  */
369 static void scc_mgr_set_all_ranks(const u32 off, const u32 grp, const u32 val,
370                                   const int update)
371 {
372         u32 r;
373
374         for (r = 0; r < rwcfg->mem_number_of_ranks;
375              r += NUM_RANKS_PER_SHADOW_REG) {
376                 scc_mgr_set(off, grp, val);
377
378                 if (update || (r == 0)) {
379                         writel(grp, &sdr_scc_mgr->dqs_ena);
380                         writel(0, &sdr_scc_mgr->update);
381                 }
382         }
383 }
384
385 static void scc_mgr_set_dqs_en_phase_all_ranks(u32 read_group, u32 phase)
386 {
387         /*
388          * USER although the h/w doesn't support different phases per
389          * shadow register, for simplicity our scc manager modeling
390          * keeps different phase settings per shadow reg, and it's
391          * important for us to keep them in sync to match h/w.
392          * for efficiency, the scan chain update should occur only
393          * once to sr0.
394          */
395         scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_PHASE_OFFSET,
396                               read_group, phase, 0);
397 }
398
399 static void scc_mgr_set_dqdqs_output_phase_all_ranks(u32 write_group,
400                                                      u32 phase)
401 {
402         /*
403          * USER although the h/w doesn't support different phases per
404          * shadow register, for simplicity our scc manager modeling
405          * keeps different phase settings per shadow reg, and it's
406          * important for us to keep them in sync to match h/w.
407          * for efficiency, the scan chain update should occur only
408          * once to sr0.
409          */
410         scc_mgr_set_all_ranks(SCC_MGR_DQDQS_OUT_PHASE_OFFSET,
411                               write_group, phase, 0);
412 }
413
414 static void scc_mgr_set_dqs_en_delay_all_ranks(u32 read_group,
415                                                u32 delay)
416 {
417         /*
418          * In shadow register mode, the T11 settings are stored in
419          * registers in the core, which are updated by the DQS_ENA
420          * signals. Not issuing the SCC_MGR_UPD command allows us to
421          * save lots of rank switching overhead, by calling
422          * select_shadow_regs_for_update with update_scan_chains
423          * set to 0.
424          */
425         scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_DELAY_OFFSET,
426                               read_group, delay, 1);
427         writel(0, &sdr_scc_mgr->update);
428 }
429
430 /**
431  * scc_mgr_set_oct_out1_delay() - Set OCT output delay
432  * @write_group:        Write group
433  * @delay:              Delay value
434  *
435  * This function sets the OCT output delay in SCC manager.
436  */
437 static void scc_mgr_set_oct_out1_delay(const u32 write_group, const u32 delay)
438 {
439         const int ratio = rwcfg->mem_if_read_dqs_width /
440                           rwcfg->mem_if_write_dqs_width;
441         const int base = write_group * ratio;
442         int i;
443         /*
444          * Load the setting in the SCC manager
445          * Although OCT affects only write data, the OCT delay is controlled
446          * by the DQS logic block which is instantiated once per read group.
447          * For protocols where a write group consists of multiple read groups,
448          * the setting must be set multiple times.
449          */
450         for (i = 0; i < ratio; i++)
451                 scc_mgr_set(SCC_MGR_OCT_OUT1_DELAY_OFFSET, base + i, delay);
452 }
453
454 /**
455  * scc_mgr_set_hhp_extras() - Set HHP extras.
456  *
457  * Load the fixed setting in the SCC manager HHP extras.
458  */
459 static void scc_mgr_set_hhp_extras(void)
460 {
461         /*
462          * Load the fixed setting in the SCC manager
463          * bits: 0:0 = 1'b1     - DQS bypass
464          * bits: 1:1 = 1'b1     - DQ bypass
465          * bits: 4:2 = 3'b001   - rfifo_mode
466          * bits: 6:5 = 2'b01    - rfifo clock_select
467          * bits: 7:7 = 1'b0     - separate gating from ungating setting
468          * bits: 8:8 = 1'b0     - separate OE from Output delay setting
469          */
470         const u32 value = (0 << 8) | (0 << 7) | (1 << 5) |
471                           (1 << 2) | (1 << 1) | (1 << 0);
472         const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS |
473                          SCC_MGR_HHP_GLOBALS_OFFSET |
474                          SCC_MGR_HHP_EXTRAS_OFFSET;
475
476         debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n",
477                    __func__, __LINE__);
478         writel(value, addr);
479         debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n",
480                    __func__, __LINE__);
481 }
482
483 /**
484  * scc_mgr_zero_all() - Zero all DQS config
485  *
486  * Zero all DQS config.
487  */
488 static void scc_mgr_zero_all(void)
489 {
490         int i, r;
491
492         /*
493          * USER Zero all DQS config settings, across all groups and all
494          * shadow registers
495          */
496         for (r = 0; r < rwcfg->mem_number_of_ranks;
497              r += NUM_RANKS_PER_SHADOW_REG) {
498                 for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) {
499                         /*
500                          * The phases actually don't exist on a per-rank basis,
501                          * but there's no harm updating them several times, so
502                          * let's keep the code simple.
503                          */
504                         scc_mgr_set_dqs_bus_in_delay(i, iocfg->dqs_in_reserve);
505                         scc_mgr_set_dqs_en_phase(i, 0);
506                         scc_mgr_set_dqs_en_delay(i, 0);
507                 }
508
509                 for (i = 0; i < rwcfg->mem_if_write_dqs_width; i++) {
510                         scc_mgr_set_dqdqs_output_phase(i, 0);
511                         /* Arria V/Cyclone V don't have out2. */
512                         scc_mgr_set_oct_out1_delay(i, iocfg->dqs_out_reserve);
513                 }
514         }
515
516         /* Multicast to all DQS group enables. */
517         writel(0xff, &sdr_scc_mgr->dqs_ena);
518         writel(0, &sdr_scc_mgr->update);
519 }
520
521 /**
522  * scc_set_bypass_mode() - Set bypass mode and trigger SCC update
523  * @write_group:        Write group
524  *
525  * Set bypass mode and trigger SCC update.
526  */
527 static void scc_set_bypass_mode(const u32 write_group)
528 {
529         /* Multicast to all DQ enables. */
530         writel(0xff, &sdr_scc_mgr->dq_ena);
531         writel(0xff, &sdr_scc_mgr->dm_ena);
532
533         /* Update current DQS IO enable. */
534         writel(0, &sdr_scc_mgr->dqs_io_ena);
535
536         /* Update the DQS logic. */
537         writel(write_group, &sdr_scc_mgr->dqs_ena);
538
539         /* Hit update. */
540         writel(0, &sdr_scc_mgr->update);
541 }
542
543 /**
544  * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group
545  * @write_group:        Write group
546  *
547  * Load DQS settings for Write Group, do not trigger SCC update.
548  */
549 static void scc_mgr_load_dqs_for_write_group(const u32 write_group)
550 {
551         const int ratio = rwcfg->mem_if_read_dqs_width /
552                           rwcfg->mem_if_write_dqs_width;
553         const int base = write_group * ratio;
554         int i;
555         /*
556          * Load the setting in the SCC manager
557          * Although OCT affects only write data, the OCT delay is controlled
558          * by the DQS logic block which is instantiated once per read group.
559          * For protocols where a write group consists of multiple read groups,
560          * the setting must be set multiple times.
561          */
562         for (i = 0; i < ratio; i++)
563                 writel(base + i, &sdr_scc_mgr->dqs_ena);
564 }
565
566 /**
567  * scc_mgr_zero_group() - Zero all configs for a group
568  *
569  * Zero DQ, DM, DQS and OCT configs for a group.
570  */
571 static void scc_mgr_zero_group(const u32 write_group, const int out_only)
572 {
573         int i, r;
574
575         for (r = 0; r < rwcfg->mem_number_of_ranks;
576              r += NUM_RANKS_PER_SHADOW_REG) {
577                 /* Zero all DQ config settings. */
578                 for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) {
579                         scc_mgr_set_dq_out1_delay(i, 0);
580                         if (!out_only)
581                                 scc_mgr_set_dq_in_delay(i, 0);
582                 }
583
584                 /* Multicast to all DQ enables. */
585                 writel(0xff, &sdr_scc_mgr->dq_ena);
586
587                 /* Zero all DM config settings. */
588                 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
589                         scc_mgr_set_dm_out1_delay(i, 0);
590
591                 /* Multicast to all DM enables. */
592                 writel(0xff, &sdr_scc_mgr->dm_ena);
593
594                 /* Zero all DQS IO settings. */
595                 if (!out_only)
596                         scc_mgr_set_dqs_io_in_delay(0);
597
598                 /* Arria V/Cyclone V don't have out2. */
599                 scc_mgr_set_dqs_out1_delay(iocfg->dqs_out_reserve);
600                 scc_mgr_set_oct_out1_delay(write_group, iocfg->dqs_out_reserve);
601                 scc_mgr_load_dqs_for_write_group(write_group);
602
603                 /* Multicast to all DQS IO enables (only 1 in total). */
604                 writel(0, &sdr_scc_mgr->dqs_io_ena);
605
606                 /* Hit update to zero everything. */
607                 writel(0, &sdr_scc_mgr->update);
608         }
609 }
610
611 /*
612  * apply and load a particular input delay for the DQ pins in a group
613  * group_bgn is the index of the first dq pin (in the write group)
614  */
615 static void scc_mgr_apply_group_dq_in_delay(u32 group_bgn, u32 delay)
616 {
617         u32 i, p;
618
619         for (i = 0, p = group_bgn; i < rwcfg->mem_dq_per_read_dqs; i++, p++) {
620                 scc_mgr_set_dq_in_delay(p, delay);
621                 scc_mgr_load_dq(p);
622         }
623 }
624
625 /**
626  * scc_mgr_apply_group_dq_out1_delay() - Apply and load an output delay for the DQ pins in a group
627  * @delay:              Delay value
628  *
629  * Apply and load a particular output delay for the DQ pins in a group.
630  */
631 static void scc_mgr_apply_group_dq_out1_delay(const u32 delay)
632 {
633         int i;
634
635         for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) {
636                 scc_mgr_set_dq_out1_delay(i, delay);
637                 scc_mgr_load_dq(i);
638         }
639 }
640
641 /* apply and load a particular output delay for the DM pins in a group */
642 static void scc_mgr_apply_group_dm_out1_delay(u32 delay1)
643 {
644         u32 i;
645
646         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
647                 scc_mgr_set_dm_out1_delay(i, delay1);
648                 scc_mgr_load_dm(i);
649         }
650 }
651
652
653 /* apply and load delay on both DQS and OCT out1 */
654 static void scc_mgr_apply_group_dqs_io_and_oct_out1(u32 write_group,
655                                                     u32 delay)
656 {
657         scc_mgr_set_dqs_out1_delay(delay);
658         scc_mgr_load_dqs_io();
659
660         scc_mgr_set_oct_out1_delay(write_group, delay);
661         scc_mgr_load_dqs_for_write_group(write_group);
662 }
663
664 /**
665  * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side: DQ, DM, DQS, OCT
666  * @write_group:        Write group
667  * @delay:              Delay value
668  *
669  * Apply a delay to the entire output side: DQ, DM, DQS, OCT.
670  */
671 static void scc_mgr_apply_group_all_out_delay_add(const u32 write_group,
672                                                   const u32 delay)
673 {
674         u32 i, new_delay;
675
676         /* DQ shift */
677         for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++)
678                 scc_mgr_load_dq(i);
679
680         /* DM shift */
681         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
682                 scc_mgr_load_dm(i);
683
684         /* DQS shift */
685         new_delay = READ_SCC_DQS_IO_OUT2_DELAY + delay;
686         if (new_delay > iocfg->io_out2_delay_max) {
687                 debug_cond(DLEVEL == 1,
688                            "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
689                            __func__, __LINE__, write_group, delay, new_delay,
690                            iocfg->io_out2_delay_max,
691                            new_delay - iocfg->io_out2_delay_max);
692                 new_delay -= iocfg->io_out2_delay_max;
693                 scc_mgr_set_dqs_out1_delay(new_delay);
694         }
695
696         scc_mgr_load_dqs_io();
697
698         /* OCT shift */
699         new_delay = READ_SCC_OCT_OUT2_DELAY + delay;
700         if (new_delay > iocfg->io_out2_delay_max) {
701                 debug_cond(DLEVEL == 1,
702                            "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
703                            __func__, __LINE__, write_group, delay,
704                            new_delay, iocfg->io_out2_delay_max,
705                            new_delay - iocfg->io_out2_delay_max);
706                 new_delay -= iocfg->io_out2_delay_max;
707                 scc_mgr_set_oct_out1_delay(write_group, new_delay);
708         }
709
710         scc_mgr_load_dqs_for_write_group(write_group);
711 }
712
713 /**
714  * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side to all ranks
715  * @write_group:        Write group
716  * @delay:              Delay value
717  *
718  * Apply a delay to the entire output side (DQ, DM, DQS, OCT) to all ranks.
719  */
720 static void
721 scc_mgr_apply_group_all_out_delay_add_all_ranks(const u32 write_group,
722                                                 const u32 delay)
723 {
724         int r;
725
726         for (r = 0; r < rwcfg->mem_number_of_ranks;
727              r += NUM_RANKS_PER_SHADOW_REG) {
728                 scc_mgr_apply_group_all_out_delay_add(write_group, delay);
729                 writel(0, &sdr_scc_mgr->update);
730         }
731 }
732
733 /**
734  * set_jump_as_return() - Return instruction optimization
735  *
736  * Optimization used to recover some slots in ddr3 inst_rom could be
737  * applied to other protocols if we wanted to
738  */
739 static void set_jump_as_return(void)
740 {
741         /*
742          * To save space, we replace return with jump to special shared
743          * RETURN instruction so we set the counter to large value so that
744          * we always jump.
745          */
746         writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0);
747         writel(rwcfg->rreturn, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
748 }
749
750 /**
751  * delay_for_n_mem_clocks() - Delay for N memory clocks
752  * @clocks:     Length of the delay
753  *
754  * Delay for N memory clocks.
755  */
756 static void delay_for_n_mem_clocks(const u32 clocks)
757 {
758         u32 afi_clocks;
759         u16 c_loop;
760         u8 inner;
761         u8 outer;
762
763         debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks);
764
765         /* Scale (rounding up) to get afi clocks. */
766         afi_clocks = DIV_ROUND_UP(clocks, misccfg->afi_rate_ratio);
767         if (afi_clocks) /* Temporary underflow protection */
768                 afi_clocks--;
769
770         /*
771          * Note, we don't bother accounting for being off a little
772          * bit because of a few extra instructions in outer loops.
773          * Note, the loops have a test at the end, and do the test
774          * before the decrement, and so always perform the loop
775          * 1 time more than the counter value
776          */
777         c_loop = afi_clocks >> 16;
778         outer = c_loop ? 0xff : (afi_clocks >> 8);
779         inner = outer ? 0xff : afi_clocks;
780
781         /*
782          * rom instructions are structured as follows:
783          *
784          *    IDLE_LOOP2: jnz cntr0, TARGET_A
785          *    IDLE_LOOP1: jnz cntr1, TARGET_B
786          *                return
787          *
788          * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and
789          * TARGET_B is set to IDLE_LOOP2 as well
790          *
791          * if we have no outer loop, though, then we can use IDLE_LOOP1 only,
792          * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely
793          *
794          * a little confusing, but it helps save precious space in the inst_rom
795          * and sequencer rom and keeps the delays more accurate and reduces
796          * overhead
797          */
798         if (afi_clocks < 0x100) {
799                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
800                        &sdr_rw_load_mgr_regs->load_cntr1);
801
802                 writel(rwcfg->idle_loop1,
803                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
804
805                 writel(rwcfg->idle_loop1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
806                                           RW_MGR_RUN_SINGLE_GROUP_OFFSET);
807         } else {
808                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
809                        &sdr_rw_load_mgr_regs->load_cntr0);
810
811                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer),
812                        &sdr_rw_load_mgr_regs->load_cntr1);
813
814                 writel(rwcfg->idle_loop2,
815                        &sdr_rw_load_jump_mgr_regs->load_jump_add0);
816
817                 writel(rwcfg->idle_loop2,
818                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
819
820                 do {
821                         writel(rwcfg->idle_loop2,
822                                SDR_PHYGRP_RWMGRGRP_ADDRESS |
823                                RW_MGR_RUN_SINGLE_GROUP_OFFSET);
824                 } while (c_loop-- != 0);
825         }
826         debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks);
827 }
828
829 /**
830  * rw_mgr_mem_init_load_regs() - Load instruction registers
831  * @cntr0:      Counter 0 value
832  * @cntr1:      Counter 1 value
833  * @cntr2:      Counter 2 value
834  * @jump:       Jump instruction value
835  *
836  * Load instruction registers.
837  */
838 static void rw_mgr_mem_init_load_regs(u32 cntr0, u32 cntr1, u32 cntr2, u32 jump)
839 {
840         u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
841                            RW_MGR_RUN_SINGLE_GROUP_OFFSET;
842
843         /* Load counters */
844         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr0),
845                &sdr_rw_load_mgr_regs->load_cntr0);
846         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr1),
847                &sdr_rw_load_mgr_regs->load_cntr1);
848         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr2),
849                &sdr_rw_load_mgr_regs->load_cntr2);
850
851         /* Load jump address */
852         writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
853         writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add1);
854         writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
855
856         /* Execute count instruction */
857         writel(jump, grpaddr);
858 }
859
860 /**
861  * rw_mgr_mem_load_user() - Load user calibration values
862  * @fin1:       Final instruction 1
863  * @fin2:       Final instruction 2
864  * @precharge:  If 1, precharge the banks at the end
865  *
866  * Load user calibration values and optionally precharge the banks.
867  */
868 static void rw_mgr_mem_load_user(const u32 fin1, const u32 fin2,
869                                  const int precharge)
870 {
871         u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
872                       RW_MGR_RUN_SINGLE_GROUP_OFFSET;
873         u32 r;
874
875         for (r = 0; r < rwcfg->mem_number_of_ranks; r++) {
876                 /* set rank */
877                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
878
879                 /* precharge all banks ... */
880                 if (precharge)
881                         writel(rwcfg->precharge_all, grpaddr);
882
883                 /*
884                  * USER Use Mirror-ed commands for odd ranks if address
885                  * mirrorring is on
886                  */
887                 if ((rwcfg->mem_address_mirroring >> r) & 0x1) {
888                         set_jump_as_return();
889                         writel(rwcfg->mrs2_mirr, grpaddr);
890                         delay_for_n_mem_clocks(4);
891                         set_jump_as_return();
892                         writel(rwcfg->mrs3_mirr, grpaddr);
893                         delay_for_n_mem_clocks(4);
894                         set_jump_as_return();
895                         writel(rwcfg->mrs1_mirr, grpaddr);
896                         delay_for_n_mem_clocks(4);
897                         set_jump_as_return();
898                         writel(fin1, grpaddr);
899                 } else {
900                         set_jump_as_return();
901                         writel(rwcfg->mrs2, grpaddr);
902                         delay_for_n_mem_clocks(4);
903                         set_jump_as_return();
904                         writel(rwcfg->mrs3, grpaddr);
905                         delay_for_n_mem_clocks(4);
906                         set_jump_as_return();
907                         writel(rwcfg->mrs1, grpaddr);
908                         set_jump_as_return();
909                         writel(fin2, grpaddr);
910                 }
911
912                 if (precharge)
913                         continue;
914
915                 set_jump_as_return();
916                 writel(rwcfg->zqcl, grpaddr);
917
918                 /* tZQinit = tDLLK = 512 ck cycles */
919                 delay_for_n_mem_clocks(512);
920         }
921 }
922
923 /**
924  * rw_mgr_mem_initialize() - Initialize RW Manager
925  *
926  * Initialize RW Manager.
927  */
928 static void rw_mgr_mem_initialize(void)
929 {
930         debug("%s:%d\n", __func__, __LINE__);
931
932         /* The reset / cke part of initialization is broadcasted to all ranks */
933         writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
934                                 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
935
936         /*
937          * Here's how you load register for a loop
938          * Counters are located @ 0x800
939          * Jump address are located @ 0xC00
940          * For both, registers 0 to 3 are selected using bits 3 and 2, like
941          * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C
942          * I know this ain't pretty, but Avalon bus throws away the 2 least
943          * significant bits
944          */
945
946         /* Start with memory RESET activated */
947
948         /* tINIT = 200us */
949
950         /*
951          * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles
952          * If a and b are the number of iteration in 2 nested loops
953          * it takes the following number of cycles to complete the operation:
954          * number_of_cycles = ((2 + n) * a + 2) * b
955          * where n is the number of instruction in the inner loop
956          * One possible solution is n = 0 , a = 256 , b = 106 => a = FF,
957          * b = 6A
958          */
959         rw_mgr_mem_init_load_regs(misccfg->tinit_cntr0_val,
960                                   misccfg->tinit_cntr1_val,
961                                   misccfg->tinit_cntr2_val,
962                                   rwcfg->init_reset_0_cke_0);
963
964         /* Indicate that memory is stable. */
965         writel(1, &phy_mgr_cfg->reset_mem_stbl);
966
967         /*
968          * transition the RESET to high
969          * Wait for 500us
970          */
971
972         /*
973          * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles
974          * If a and b are the number of iteration in 2 nested loops
975          * it takes the following number of cycles to complete the operation
976          * number_of_cycles = ((2 + n) * a + 2) * b
977          * where n is the number of instruction in the inner loop
978          * One possible solution is n = 2 , a = 131 , b = 256 => a = 83,
979          * b = FF
980          */
981         rw_mgr_mem_init_load_regs(misccfg->treset_cntr0_val,
982                                   misccfg->treset_cntr1_val,
983                                   misccfg->treset_cntr2_val,
984                                   rwcfg->init_reset_1_cke_0);
985
986         /* Bring up clock enable. */
987
988         /* tXRP < 250 ck cycles */
989         delay_for_n_mem_clocks(250);
990
991         rw_mgr_mem_load_user(rwcfg->mrs0_dll_reset_mirr, rwcfg->mrs0_dll_reset,
992                              0);
993 }
994
995 /**
996  * rw_mgr_mem_handoff() - Hand off the memory to user
997  *
998  * At the end of calibration we have to program the user settings in
999  * and hand off the memory to the user.
1000  */
1001 static void rw_mgr_mem_handoff(void)
1002 {
1003         rw_mgr_mem_load_user(rwcfg->mrs0_user_mirr, rwcfg->mrs0_user, 1);
1004         /*
1005          * Need to wait tMOD (12CK or 15ns) time before issuing other
1006          * commands, but we will have plenty of NIOS cycles before actual
1007          * handoff so its okay.
1008          */
1009 }
1010
1011 /**
1012  * rw_mgr_mem_calibrate_write_test_issue() - Issue write test command
1013  * @group:      Write Group
1014  * @use_dm:     Use DM
1015  *
1016  * Issue write test command. Two variants are provided, one that just tests
1017  * a write pattern and another that tests datamask functionality.
1018  */
1019 static void rw_mgr_mem_calibrate_write_test_issue(u32 group,
1020                                                   u32 test_dm)
1021 {
1022         const u32 quick_write_mode =
1023                 (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES) &&
1024                 misccfg->enable_super_quick_calibration;
1025         u32 mcc_instruction;
1026         u32 rw_wl_nop_cycles;
1027
1028         /*
1029          * Set counter and jump addresses for the right
1030          * number of NOP cycles.
1031          * The number of supported NOP cycles can range from -1 to infinity
1032          * Three different cases are handled:
1033          *
1034          * 1. For a number of NOP cycles greater than 0, the RW Mgr looping
1035          *    mechanism will be used to insert the right number of NOPs
1036          *
1037          * 2. For a number of NOP cycles equals to 0, the micro-instruction
1038          *    issuing the write command will jump straight to the
1039          *    micro-instruction that turns on DQS (for DDRx), or outputs write
1040          *    data (for RLD), skipping
1041          *    the NOP micro-instruction all together
1042          *
1043          * 3. A number of NOP cycles equal to -1 indicates that DQS must be
1044          *    turned on in the same micro-instruction that issues the write
1045          *    command. Then we need
1046          *    to directly jump to the micro-instruction that sends out the data
1047          *
1048          * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters
1049          *       (2 and 3). One jump-counter (0) is used to perform multiple
1050          *       write-read operations.
1051          *       one counter left to issue this command in "multiple-group" mode
1052          */
1053
1054         rw_wl_nop_cycles = gbl->rw_wl_nop_cycles;
1055
1056         if (rw_wl_nop_cycles == -1) {
1057                 /*
1058                  * CNTR 2 - We want to execute the special write operation that
1059                  * turns on DQS right away and then skip directly to the
1060                  * instruction that sends out the data. We set the counter to a
1061                  * large number so that the jump is always taken.
1062                  */
1063                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
1064
1065                 /* CNTR 3 - Not used */
1066                 if (test_dm) {
1067                         mcc_instruction = rwcfg->lfsr_wr_rd_dm_bank_0_wl_1;
1068                         writel(rwcfg->lfsr_wr_rd_dm_bank_0_data,
1069                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1070                         writel(rwcfg->lfsr_wr_rd_dm_bank_0_nop,
1071                                &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1072                 } else {
1073                         mcc_instruction = rwcfg->lfsr_wr_rd_bank_0_wl_1;
1074                         writel(rwcfg->lfsr_wr_rd_bank_0_data,
1075                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1076                         writel(rwcfg->lfsr_wr_rd_bank_0_nop,
1077                                &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1078                 }
1079         } else if (rw_wl_nop_cycles == 0) {
1080                 /*
1081                  * CNTR 2 - We want to skip the NOP operation and go straight
1082                  * to the DQS enable instruction. We set the counter to a large
1083                  * number so that the jump is always taken.
1084                  */
1085                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
1086
1087                 /* CNTR 3 - Not used */
1088                 if (test_dm) {
1089                         mcc_instruction = rwcfg->lfsr_wr_rd_dm_bank_0;
1090                         writel(rwcfg->lfsr_wr_rd_dm_bank_0_dqs,
1091                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1092                 } else {
1093                         mcc_instruction = rwcfg->lfsr_wr_rd_bank_0;
1094                         writel(rwcfg->lfsr_wr_rd_bank_0_dqs,
1095                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1096                 }
1097         } else {
1098                 /*
1099                  * CNTR 2 - In this case we want to execute the next instruction
1100                  * and NOT take the jump. So we set the counter to 0. The jump
1101                  * address doesn't count.
1102                  */
1103                 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2);
1104                 writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1105
1106                 /*
1107                  * CNTR 3 - Set the nop counter to the number of cycles we
1108                  * need to loop for, minus 1.
1109                  */
1110                 writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3);
1111                 if (test_dm) {
1112                         mcc_instruction = rwcfg->lfsr_wr_rd_dm_bank_0;
1113                         writel(rwcfg->lfsr_wr_rd_dm_bank_0_nop,
1114                                &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1115                 } else {
1116                         mcc_instruction = rwcfg->lfsr_wr_rd_bank_0;
1117                         writel(rwcfg->lfsr_wr_rd_bank_0_nop,
1118                                &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1119                 }
1120         }
1121
1122         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1123                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1124
1125         if (quick_write_mode)
1126                 writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0);
1127         else
1128                 writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0);
1129
1130         writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1131
1132         /*
1133          * CNTR 1 - This is used to ensure enough time elapses
1134          * for read data to come back.
1135          */
1136         writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1);
1137
1138         if (test_dm) {
1139                 writel(rwcfg->lfsr_wr_rd_dm_bank_0_wait,
1140                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1141         } else {
1142                 writel(rwcfg->lfsr_wr_rd_bank_0_wait,
1143                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1144         }
1145
1146         writel(mcc_instruction, (SDR_PHYGRP_RWMGRGRP_ADDRESS |
1147                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET) +
1148                                 (group << 2));
1149 }
1150
1151 /**
1152  * rw_mgr_mem_calibrate_write_test() - Test writes, check for single/multiple pass
1153  * @rank_bgn:           Rank number
1154  * @write_group:        Write Group
1155  * @use_dm:             Use DM
1156  * @all_correct:        All bits must be correct in the mask
1157  * @bit_chk:            Resulting bit mask after the test
1158  * @all_ranks:          Test all ranks
1159  *
1160  * Test writes, can check for a single bit pass or multiple bit pass.
1161  */
1162 static int
1163 rw_mgr_mem_calibrate_write_test(const u32 rank_bgn, const u32 write_group,
1164                                 const u32 use_dm, const u32 all_correct,
1165                                 u32 *bit_chk, const u32 all_ranks)
1166 {
1167         const u32 rank_end = all_ranks ?
1168                                 rwcfg->mem_number_of_ranks :
1169                                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1170         const u32 shift_ratio = rwcfg->mem_dq_per_write_dqs /
1171                                 rwcfg->mem_virtual_groups_per_write_dqs;
1172         const u32 correct_mask_vg = param->write_correct_mask_vg;
1173
1174         u32 tmp_bit_chk, base_rw_mgr;
1175         int vg, r;
1176
1177         *bit_chk = param->write_correct_mask;
1178
1179         for (r = rank_bgn; r < rank_end; r++) {
1180                 /* Set rank */
1181                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1182
1183                 tmp_bit_chk = 0;
1184                 for (vg = rwcfg->mem_virtual_groups_per_write_dqs - 1;
1185                      vg >= 0; vg--) {
1186                         /* Reset the FIFOs to get pointers to known state. */
1187                         writel(0, &phy_mgr_cmd->fifo_reset);
1188
1189                         rw_mgr_mem_calibrate_write_test_issue(
1190                                 write_group *
1191                                 rwcfg->mem_virtual_groups_per_write_dqs + vg,
1192                                 use_dm);
1193
1194                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1195                         tmp_bit_chk <<= shift_ratio;
1196                         tmp_bit_chk |= (correct_mask_vg & ~(base_rw_mgr));
1197                 }
1198
1199                 *bit_chk &= tmp_bit_chk;
1200         }
1201
1202         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1203         if (all_correct) {
1204                 debug_cond(DLEVEL == 2,
1205                            "write_test(%u,%u,ALL) : %u == %u => %i\n",
1206                            write_group, use_dm, *bit_chk,
1207                            param->write_correct_mask,
1208                            *bit_chk == param->write_correct_mask);
1209                 return *bit_chk == param->write_correct_mask;
1210         } else {
1211                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1212                 debug_cond(DLEVEL == 2,
1213                            "write_test(%u,%u,ONE) : %u != %i => %i\n",
1214                            write_group, use_dm, *bit_chk, 0, *bit_chk != 0);
1215                 return *bit_chk != 0x00;
1216         }
1217 }
1218
1219 /**
1220  * rw_mgr_mem_calibrate_read_test_patterns() - Read back test patterns
1221  * @rank_bgn:   Rank number
1222  * @group:      Read/Write Group
1223  * @all_ranks:  Test all ranks
1224  *
1225  * Performs a guaranteed read on the patterns we are going to use during a
1226  * read test to ensure memory works.
1227  */
1228 static int
1229 rw_mgr_mem_calibrate_read_test_patterns(const u32 rank_bgn, const u32 group,
1230                                         const u32 all_ranks)
1231 {
1232         const u32 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1233                          RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1234         const u32 addr_offset =
1235                          (group * rwcfg->mem_virtual_groups_per_read_dqs) << 2;
1236         const u32 rank_end = all_ranks ?
1237                                 rwcfg->mem_number_of_ranks :
1238                                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1239         const u32 shift_ratio = rwcfg->mem_dq_per_read_dqs /
1240                                 rwcfg->mem_virtual_groups_per_read_dqs;
1241         const u32 correct_mask_vg = param->read_correct_mask_vg;
1242
1243         u32 tmp_bit_chk, base_rw_mgr, bit_chk;
1244         int vg, r;
1245         int ret = 0;
1246
1247         bit_chk = param->read_correct_mask;
1248
1249         for (r = rank_bgn; r < rank_end; r++) {
1250                 /* Set rank */
1251                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1252
1253                 /* Load up a constant bursts of read commands */
1254                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1255                 writel(rwcfg->guaranteed_read,
1256                        &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1257
1258                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1259                 writel(rwcfg->guaranteed_read_cont,
1260                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1261
1262                 tmp_bit_chk = 0;
1263                 for (vg = rwcfg->mem_virtual_groups_per_read_dqs - 1;
1264                      vg >= 0; vg--) {
1265                         /* Reset the FIFOs to get pointers to known state. */
1266                         writel(0, &phy_mgr_cmd->fifo_reset);
1267                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1268                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1269                         writel(rwcfg->guaranteed_read,
1270                                addr + addr_offset + (vg << 2));
1271
1272                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1273                         tmp_bit_chk <<= shift_ratio;
1274                         tmp_bit_chk |= correct_mask_vg & ~base_rw_mgr;
1275                 }
1276
1277                 bit_chk &= tmp_bit_chk;
1278         }
1279
1280         writel(rwcfg->clear_dqs_enable, addr + (group << 2));
1281
1282         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1283
1284         if (bit_chk != param->read_correct_mask)
1285                 ret = -EIO;
1286
1287         debug_cond(DLEVEL == 1,
1288                    "%s:%d test_load_patterns(%u,ALL) => (%u == %u) => %i\n",
1289                    __func__, __LINE__, group, bit_chk,
1290                    param->read_correct_mask, ret);
1291
1292         return ret;
1293 }
1294
1295 /**
1296  * rw_mgr_mem_calibrate_read_load_patterns() - Load up the patterns for read test
1297  * @rank_bgn:   Rank number
1298  * @all_ranks:  Test all ranks
1299  *
1300  * Load up the patterns we are going to use during a read test.
1301  */
1302 static void rw_mgr_mem_calibrate_read_load_patterns(const u32 rank_bgn,
1303                                                     const int all_ranks)
1304 {
1305         const u32 rank_end = all_ranks ?
1306                         rwcfg->mem_number_of_ranks :
1307                         (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1308         u32 r;
1309
1310         debug("%s:%d\n", __func__, __LINE__);
1311
1312         for (r = rank_bgn; r < rank_end; r++) {
1313                 /* set rank */
1314                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1315
1316                 /* Load up a constant bursts */
1317                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1318
1319                 writel(rwcfg->guaranteed_write_wait0,
1320                        &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1321
1322                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1323
1324                 writel(rwcfg->guaranteed_write_wait1,
1325                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1326
1327                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2);
1328
1329                 writel(rwcfg->guaranteed_write_wait2,
1330                        &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1331
1332                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3);
1333
1334                 writel(rwcfg->guaranteed_write_wait3,
1335                        &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1336
1337                 writel(rwcfg->guaranteed_write, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1338                                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
1339         }
1340
1341         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1342 }
1343
1344 /**
1345  * rw_mgr_mem_calibrate_read_test() - Perform READ test on single rank
1346  * @rank_bgn:           Rank number
1347  * @group:              Read/Write group
1348  * @num_tries:          Number of retries of the test
1349  * @all_correct:        All bits must be correct in the mask
1350  * @bit_chk:            Resulting bit mask after the test
1351  * @all_groups:         Test all R/W groups
1352  * @all_ranks:          Test all ranks
1353  *
1354  * Try a read and see if it returns correct data back. Test has dummy reads
1355  * inserted into the mix used to align DQS enable. Test has more thorough
1356  * checks than the regular read test.
1357  */
1358 static int
1359 rw_mgr_mem_calibrate_read_test(const u32 rank_bgn, const u32 group,
1360                                const u32 num_tries, const u32 all_correct,
1361                                u32 *bit_chk,
1362                                const u32 all_groups, const u32 all_ranks)
1363 {
1364         const u32 rank_end = all_ranks ? rwcfg->mem_number_of_ranks :
1365                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1366         const u32 quick_read_mode =
1367                 ((STATIC_CALIB_STEPS & CALIB_SKIP_DELAY_SWEEPS) &&
1368                  misccfg->enable_super_quick_calibration);
1369         u32 correct_mask_vg = param->read_correct_mask_vg;
1370         u32 tmp_bit_chk;
1371         u32 base_rw_mgr;
1372         u32 addr;
1373
1374         int r, vg, ret;
1375
1376         *bit_chk = param->read_correct_mask;
1377
1378         for (r = rank_bgn; r < rank_end; r++) {
1379                 /* set rank */
1380                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1381
1382                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1);
1383
1384                 writel(rwcfg->read_b2b_wait1,
1385                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1386
1387                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2);
1388                 writel(rwcfg->read_b2b_wait2,
1389                        &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1390
1391                 if (quick_read_mode)
1392                         writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0);
1393                         /* need at least two (1+1) reads to capture failures */
1394                 else if (all_groups)
1395                         writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0);
1396                 else
1397                         writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0);
1398
1399                 writel(rwcfg->read_b2b,
1400                        &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1401                 if (all_groups)
1402                         writel(rwcfg->mem_if_read_dqs_width *
1403                                rwcfg->mem_virtual_groups_per_read_dqs - 1,
1404                                &sdr_rw_load_mgr_regs->load_cntr3);
1405                 else
1406                         writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3);
1407
1408                 writel(rwcfg->read_b2b,
1409                        &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1410
1411                 tmp_bit_chk = 0;
1412                 for (vg = rwcfg->mem_virtual_groups_per_read_dqs - 1; vg >= 0;
1413                      vg--) {
1414                         /* Reset the FIFOs to get pointers to known state. */
1415                         writel(0, &phy_mgr_cmd->fifo_reset);
1416                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1417                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1418
1419                         if (all_groups) {
1420                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1421                                        RW_MGR_RUN_ALL_GROUPS_OFFSET;
1422                         } else {
1423                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1424                                        RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1425                         }
1426
1427                         writel(rwcfg->read_b2b, addr +
1428                                ((group *
1429                                  rwcfg->mem_virtual_groups_per_read_dqs +
1430                                  vg) << 2));
1431
1432                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1433                         tmp_bit_chk <<= rwcfg->mem_dq_per_read_dqs /
1434                                         rwcfg->mem_virtual_groups_per_read_dqs;
1435                         tmp_bit_chk |= correct_mask_vg & ~(base_rw_mgr);
1436                 }
1437
1438                 *bit_chk &= tmp_bit_chk;
1439         }
1440
1441         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1442         writel(rwcfg->clear_dqs_enable, addr + (group << 2));
1443
1444         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1445
1446         if (all_correct) {
1447                 ret = (*bit_chk == param->read_correct_mask);
1448                 debug_cond(DLEVEL == 2,
1449                            "%s:%d read_test(%u,ALL,%u) => (%u == %u) => %i\n",
1450                            __func__, __LINE__, group, all_groups, *bit_chk,
1451                            param->read_correct_mask, ret);
1452         } else  {
1453                 ret = (*bit_chk != 0x00);
1454                 debug_cond(DLEVEL == 2,
1455                            "%s:%d read_test(%u,ONE,%u) => (%u != %u) => %i\n",
1456                            __func__, __LINE__, group, all_groups, *bit_chk,
1457                            0, ret);
1458         }
1459
1460         return ret;
1461 }
1462
1463 /**
1464  * rw_mgr_mem_calibrate_read_test_all_ranks() - Perform READ test on all ranks
1465  * @grp:                Read/Write group
1466  * @num_tries:          Number of retries of the test
1467  * @all_correct:        All bits must be correct in the mask
1468  * @all_groups:         Test all R/W groups
1469  *
1470  * Perform a READ test across all memory ranks.
1471  */
1472 static int
1473 rw_mgr_mem_calibrate_read_test_all_ranks(const u32 grp, const u32 num_tries,
1474                                          const u32 all_correct,
1475                                          const u32 all_groups)
1476 {
1477         u32 bit_chk;
1478         return rw_mgr_mem_calibrate_read_test(0, grp, num_tries, all_correct,
1479                                               &bit_chk, all_groups, 1);
1480 }
1481
1482 /**
1483  * rw_mgr_incr_vfifo() - Increase VFIFO value
1484  * @grp:        Read/Write group
1485  *
1486  * Increase VFIFO value.
1487  */
1488 static void rw_mgr_incr_vfifo(const u32 grp)
1489 {
1490         writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy);
1491 }
1492
1493 /**
1494  * rw_mgr_decr_vfifo() - Decrease VFIFO value
1495  * @grp:        Read/Write group
1496  *
1497  * Decrease VFIFO value.
1498  */
1499 static void rw_mgr_decr_vfifo(const u32 grp)
1500 {
1501         u32 i;
1502
1503         for (i = 0; i < misccfg->read_valid_fifo_size - 1; i++)
1504                 rw_mgr_incr_vfifo(grp);
1505 }
1506
1507 /**
1508  * find_vfifo_failing_read() - Push VFIFO to get a failing read
1509  * @grp:        Read/Write group
1510  *
1511  * Push VFIFO until a failing read happens.
1512  */
1513 static int find_vfifo_failing_read(const u32 grp)
1514 {
1515         u32 v, ret, fail_cnt = 0;
1516
1517         for (v = 0; v < misccfg->read_valid_fifo_size; v++) {
1518                 debug_cond(DLEVEL == 2, "%s:%d: vfifo %u\n",
1519                            __func__, __LINE__, v);
1520                 ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1521                                                 PASS_ONE_BIT, 0);
1522                 if (!ret) {
1523                         fail_cnt++;
1524
1525                         if (fail_cnt == 2)
1526                                 return v;
1527                 }
1528
1529                 /* Fiddle with FIFO. */
1530                 rw_mgr_incr_vfifo(grp);
1531         }
1532
1533         /* No failing read found! Something must have gone wrong. */
1534         debug_cond(DLEVEL == 2, "%s:%d: vfifo failed\n", __func__, __LINE__);
1535         return 0;
1536 }
1537
1538 /**
1539  * sdr_find_phase_delay() - Find DQS enable phase or delay
1540  * @working:    If 1, look for working phase/delay, if 0, look for non-working
1541  * @delay:      If 1, look for delay, if 0, look for phase
1542  * @grp:        Read/Write group
1543  * @work:       Working window position
1544  * @work_inc:   Working window increment
1545  * @pd:         DQS Phase/Delay Iterator
1546  *
1547  * Find working or non-working DQS enable phase setting.
1548  */
1549 static int sdr_find_phase_delay(int working, int delay, const u32 grp,
1550                                 u32 *work, const u32 work_inc, u32 *pd)
1551 {
1552         const u32 max = delay ? iocfg->dqs_en_delay_max :
1553                                 iocfg->dqs_en_phase_max;
1554         u32 ret;
1555
1556         for (; *pd <= max; (*pd)++) {
1557                 if (delay)
1558                         scc_mgr_set_dqs_en_delay_all_ranks(grp, *pd);
1559                 else
1560                         scc_mgr_set_dqs_en_phase_all_ranks(grp, *pd);
1561
1562                 ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1563                                         PASS_ONE_BIT, 0);
1564                 if (!working)
1565                         ret = !ret;
1566
1567                 if (ret)
1568                         return 0;
1569
1570                 if (work)
1571                         *work += work_inc;
1572         }
1573
1574         return -EINVAL;
1575 }
1576 /**
1577  * sdr_find_phase() - Find DQS enable phase
1578  * @working:    If 1, look for working phase, if 0, look for non-working phase
1579  * @grp:        Read/Write group
1580  * @work:       Working window position
1581  * @i:          Iterator
1582  * @p:          DQS Phase Iterator
1583  *
1584  * Find working or non-working DQS enable phase setting.
1585  */
1586 static int sdr_find_phase(int working, const u32 grp, u32 *work,
1587                           u32 *i, u32 *p)
1588 {
1589         const u32 end = misccfg->read_valid_fifo_size + (working ? 0 : 1);
1590         int ret;
1591
1592         for (; *i < end; (*i)++) {
1593                 if (working)
1594                         *p = 0;
1595
1596                 ret = sdr_find_phase_delay(working, 0, grp, work,
1597                                            iocfg->delay_per_opa_tap, p);
1598                 if (!ret)
1599                         return 0;
1600
1601                 if (*p > iocfg->dqs_en_phase_max) {
1602                         /* Fiddle with FIFO. */
1603                         rw_mgr_incr_vfifo(grp);
1604                         if (!working)
1605                                 *p = 0;
1606                 }
1607         }
1608
1609         return -EINVAL;
1610 }
1611
1612 /**
1613  * sdr_working_phase() - Find working DQS enable phase
1614  * @grp:        Read/Write group
1615  * @work_bgn:   Working window start position
1616  * @d:          dtaps output value
1617  * @p:          DQS Phase Iterator
1618  * @i:          Iterator
1619  *
1620  * Find working DQS enable phase setting.
1621  */
1622 static int sdr_working_phase(const u32 grp, u32 *work_bgn, u32 *d,
1623                              u32 *p, u32 *i)
1624 {
1625         const u32 dtaps_per_ptap = iocfg->delay_per_opa_tap /
1626                                    iocfg->delay_per_dqs_en_dchain_tap;
1627         int ret;
1628
1629         *work_bgn = 0;
1630
1631         for (*d = 0; *d <= dtaps_per_ptap; (*d)++) {
1632                 *i = 0;
1633                 scc_mgr_set_dqs_en_delay_all_ranks(grp, *d);
1634                 ret = sdr_find_phase(1, grp, work_bgn, i, p);
1635                 if (!ret)
1636                         return 0;
1637                 *work_bgn += iocfg->delay_per_dqs_en_dchain_tap;
1638         }
1639
1640         /* Cannot find working solution */
1641         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/ptap/dtap\n",
1642                    __func__, __LINE__);
1643         return -EINVAL;
1644 }
1645
1646 /**
1647  * sdr_backup_phase() - Find DQS enable backup phase
1648  * @grp:        Read/Write group
1649  * @work_bgn:   Working window start position
1650  * @p:          DQS Phase Iterator
1651  *
1652  * Find DQS enable backup phase setting.
1653  */
1654 static void sdr_backup_phase(const u32 grp, u32 *work_bgn, u32 *p)
1655 {
1656         u32 tmp_delay, d;
1657         int ret;
1658
1659         /* Special case code for backing up a phase */
1660         if (*p == 0) {
1661                 *p = iocfg->dqs_en_phase_max;
1662                 rw_mgr_decr_vfifo(grp);
1663         } else {
1664                 (*p)--;
1665         }
1666         tmp_delay = *work_bgn - iocfg->delay_per_opa_tap;
1667         scc_mgr_set_dqs_en_phase_all_ranks(grp, *p);
1668
1669         for (d = 0; d <= iocfg->dqs_en_delay_max && tmp_delay < *work_bgn;
1670              d++) {
1671                 scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1672
1673                 ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1674                                         PASS_ONE_BIT, 0);
1675                 if (ret) {
1676                         *work_bgn = tmp_delay;
1677                         break;
1678                 }
1679
1680                 tmp_delay += iocfg->delay_per_dqs_en_dchain_tap;
1681         }
1682
1683         /* Restore VFIFO to old state before we decremented it (if needed). */
1684         (*p)++;
1685         if (*p > iocfg->dqs_en_phase_max) {
1686                 *p = 0;
1687                 rw_mgr_incr_vfifo(grp);
1688         }
1689
1690         scc_mgr_set_dqs_en_delay_all_ranks(grp, 0);
1691 }
1692
1693 /**
1694  * sdr_nonworking_phase() - Find non-working DQS enable phase
1695  * @grp:        Read/Write group
1696  * @work_end:   Working window end position
1697  * @p:          DQS Phase Iterator
1698  * @i:          Iterator
1699  *
1700  * Find non-working DQS enable phase setting.
1701  */
1702 static int sdr_nonworking_phase(const u32 grp, u32 *work_end, u32 *p, u32 *i)
1703 {
1704         int ret;
1705
1706         (*p)++;
1707         *work_end += iocfg->delay_per_opa_tap;
1708         if (*p > iocfg->dqs_en_phase_max) {
1709                 /* Fiddle with FIFO. */
1710                 *p = 0;
1711                 rw_mgr_incr_vfifo(grp);
1712         }
1713
1714         ret = sdr_find_phase(0, grp, work_end, i, p);
1715         if (ret) {
1716                 /* Cannot see edge of failing read. */
1717                 debug_cond(DLEVEL == 2, "%s:%d: end: failed\n",
1718                            __func__, __LINE__);
1719         }
1720
1721         return ret;
1722 }
1723
1724 /**
1725  * sdr_find_window_center() - Find center of the working DQS window.
1726  * @grp:        Read/Write group
1727  * @work_bgn:   First working settings
1728  * @work_end:   Last working settings
1729  *
1730  * Find center of the working DQS enable window.
1731  */
1732 static int sdr_find_window_center(const u32 grp, const u32 work_bgn,
1733                                   const u32 work_end)
1734 {
1735         u32 work_mid;
1736         int tmp_delay = 0;
1737         int i, p, d;
1738
1739         work_mid = (work_bgn + work_end) / 2;
1740
1741         debug_cond(DLEVEL == 2, "work_bgn=%d work_end=%d work_mid=%d\n",
1742                    work_bgn, work_end, work_mid);
1743         /* Get the middle delay to be less than a VFIFO delay */
1744         tmp_delay = (iocfg->dqs_en_phase_max + 1) * iocfg->delay_per_opa_tap;
1745
1746         debug_cond(DLEVEL == 2, "vfifo ptap delay %d\n", tmp_delay);
1747         work_mid %= tmp_delay;
1748         debug_cond(DLEVEL == 2, "new work_mid %d\n", work_mid);
1749
1750         tmp_delay = rounddown(work_mid, iocfg->delay_per_opa_tap);
1751         if (tmp_delay > iocfg->dqs_en_phase_max * iocfg->delay_per_opa_tap)
1752                 tmp_delay = iocfg->dqs_en_phase_max * iocfg->delay_per_opa_tap;
1753         p = tmp_delay / iocfg->delay_per_opa_tap;
1754
1755         debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", p, tmp_delay);
1756
1757         d = DIV_ROUND_UP(work_mid - tmp_delay,
1758                          iocfg->delay_per_dqs_en_dchain_tap);
1759         if (d > iocfg->dqs_en_delay_max)
1760                 d = iocfg->dqs_en_delay_max;
1761         tmp_delay += d * iocfg->delay_per_dqs_en_dchain_tap;
1762
1763         debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", d, tmp_delay);
1764
1765         scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1766         scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1767
1768         /*
1769          * push vfifo until we can successfully calibrate. We can do this
1770          * because the largest possible margin in 1 VFIFO cycle.
1771          */
1772         for (i = 0; i < misccfg->read_valid_fifo_size; i++) {
1773                 debug_cond(DLEVEL == 2, "find_dqs_en_phase: center\n");
1774                 if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1775                                                              PASS_ONE_BIT,
1776                                                              0)) {
1777                         debug_cond(DLEVEL == 2,
1778                                    "%s:%d center: found: ptap=%u dtap=%u\n",
1779                                    __func__, __LINE__, p, d);
1780                         return 0;
1781                 }
1782
1783                 /* Fiddle with FIFO. */
1784                 rw_mgr_incr_vfifo(grp);
1785         }
1786
1787         debug_cond(DLEVEL == 2, "%s:%d center: failed.\n",
1788                    __func__, __LINE__);
1789         return -EINVAL;
1790 }
1791
1792 /**
1793  * rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase() - Find a good DQS enable to use
1794  * @grp:        Read/Write Group
1795  *
1796  * Find a good DQS enable to use.
1797  */
1798 static int rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(const u32 grp)
1799 {
1800         u32 d, p, i;
1801         u32 dtaps_per_ptap;
1802         u32 work_bgn, work_end;
1803         u32 found_passing_read, found_failing_read = 0, initial_failing_dtap;
1804         int ret;
1805
1806         debug("%s:%d %u\n", __func__, __LINE__, grp);
1807
1808         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
1809
1810         scc_mgr_set_dqs_en_delay_all_ranks(grp, 0);
1811         scc_mgr_set_dqs_en_phase_all_ranks(grp, 0);
1812
1813         /* Step 0: Determine number of delay taps for each phase tap. */
1814         dtaps_per_ptap = iocfg->delay_per_opa_tap /
1815                          iocfg->delay_per_dqs_en_dchain_tap;
1816
1817         /* Step 1: First push vfifo until we get a failing read. */
1818         find_vfifo_failing_read(grp);
1819
1820         /* Step 2: Find first working phase, increment in ptaps. */
1821         work_bgn = 0;
1822         ret = sdr_working_phase(grp, &work_bgn, &d, &p, &i);
1823         if (ret)
1824                 return ret;
1825
1826         work_end = work_bgn;
1827
1828         /*
1829          * If d is 0 then the working window covers a phase tap and we can
1830          * follow the old procedure. Otherwise, we've found the beginning
1831          * and we need to increment the dtaps until we find the end.
1832          */
1833         if (d == 0) {
1834                 /*
1835                  * Step 3a: If we have room, back off by one and
1836                  *          increment in dtaps.
1837                  */
1838                 sdr_backup_phase(grp, &work_bgn, &p);
1839
1840                 /*
1841                  * Step 4a: go forward from working phase to non working
1842                  * phase, increment in ptaps.
1843                  */
1844                 ret = sdr_nonworking_phase(grp, &work_end, &p, &i);
1845                 if (ret)
1846                         return ret;
1847
1848                 /* Step 5a: Back off one from last, increment in dtaps. */
1849
1850                 /* Special case code for backing up a phase */
1851                 if (p == 0) {
1852                         p = iocfg->dqs_en_phase_max;
1853                         rw_mgr_decr_vfifo(grp);
1854                 } else {
1855                         p = p - 1;
1856                 }
1857
1858                 work_end -= iocfg->delay_per_opa_tap;
1859                 scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1860
1861                 d = 0;
1862
1863                 debug_cond(DLEVEL == 2, "%s:%d p: ptap=%u\n",
1864                            __func__, __LINE__, p);
1865         }
1866
1867         /* The dtap increment to find the failing edge is done here. */
1868         sdr_find_phase_delay(0, 1, grp, &work_end,
1869                              iocfg->delay_per_dqs_en_dchain_tap, &d);
1870
1871         /* Go back to working dtap */
1872         if (d != 0)
1873                 work_end -= iocfg->delay_per_dqs_en_dchain_tap;
1874
1875         debug_cond(DLEVEL == 2,
1876                    "%s:%d p/d: ptap=%u dtap=%u end=%u\n",
1877                    __func__, __LINE__, p, d - 1, work_end);
1878
1879         if (work_end < work_bgn) {
1880                 /* nil range */
1881                 debug_cond(DLEVEL == 2, "%s:%d end-2: failed\n",
1882                            __func__, __LINE__);
1883                 return -EINVAL;
1884         }
1885
1886         debug_cond(DLEVEL == 2, "%s:%d found range [%u,%u]\n",
1887                    __func__, __LINE__, work_bgn, work_end);
1888
1889         /*
1890          * We need to calculate the number of dtaps that equal a ptap.
1891          * To do that we'll back up a ptap and re-find the edge of the
1892          * window using dtaps
1893          */
1894         debug_cond(DLEVEL == 2, "%s:%d calculate dtaps_per_ptap for tracking\n",
1895                    __func__, __LINE__);
1896
1897         /* Special case code for backing up a phase */
1898         if (p == 0) {
1899                 p = iocfg->dqs_en_phase_max;
1900                 rw_mgr_decr_vfifo(grp);
1901                 debug_cond(DLEVEL == 2, "%s:%d backedup cycle/phase: p=%u\n",
1902                            __func__, __LINE__, p);
1903         } else {
1904                 p = p - 1;
1905                 debug_cond(DLEVEL == 2, "%s:%d backedup phase only: p=%u",
1906                            __func__, __LINE__, p);
1907         }
1908
1909         scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1910
1911         /*
1912          * Increase dtap until we first see a passing read (in case the
1913          * window is smaller than a ptap), and then a failing read to
1914          * mark the edge of the window again.
1915          */
1916
1917         /* Find a passing read. */
1918         debug_cond(DLEVEL == 2, "%s:%d find passing read\n",
1919                    __func__, __LINE__);
1920
1921         initial_failing_dtap = d;
1922
1923         found_passing_read = !sdr_find_phase_delay(1, 1, grp, NULL, 0, &d);
1924         if (found_passing_read) {
1925                 /* Find a failing read. */
1926                 debug_cond(DLEVEL == 2, "%s:%d find failing read\n",
1927                            __func__, __LINE__);
1928                 d++;
1929                 found_failing_read = !sdr_find_phase_delay(0, 1, grp, NULL, 0,
1930                                                            &d);
1931         } else {
1932                 debug_cond(DLEVEL == 1,
1933                            "%s:%d failed to calculate dtaps per ptap. Fall back on static value\n",
1934                            __func__, __LINE__);
1935         }
1936
1937         /*
1938          * The dynamically calculated dtaps_per_ptap is only valid if we
1939          * found a passing/failing read. If we didn't, it means d hit the max
1940          * (iocfg->dqs_en_delay_max). Otherwise, dtaps_per_ptap retains its
1941          * statically calculated value.
1942          */
1943         if (found_passing_read && found_failing_read)
1944                 dtaps_per_ptap = d - initial_failing_dtap;
1945
1946         writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap);
1947         debug_cond(DLEVEL == 2, "%s:%d dtaps_per_ptap=%u - %u = %u",
1948                    __func__, __LINE__, d, initial_failing_dtap, dtaps_per_ptap);
1949
1950         /* Step 6: Find the centre of the window. */
1951         ret = sdr_find_window_center(grp, work_bgn, work_end);
1952
1953         return ret;
1954 }
1955
1956 /**
1957  * search_stop_check() - Check if the detected edge is valid
1958  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
1959  * @d:                  DQS delay
1960  * @rank_bgn:           Rank number
1961  * @write_group:        Write Group
1962  * @read_group:         Read Group
1963  * @bit_chk:            Resulting bit mask after the test
1964  * @sticky_bit_chk:     Resulting sticky bit mask after the test
1965  * @use_read_test:      Perform read test
1966  *
1967  * Test if the found edge is valid.
1968  */
1969 static u32 search_stop_check(const int write, const int d, const int rank_bgn,
1970                              const u32 write_group, const u32 read_group,
1971                              u32 *bit_chk, u32 *sticky_bit_chk,
1972                              const u32 use_read_test)
1973 {
1974         const u32 ratio = rwcfg->mem_if_read_dqs_width /
1975                           rwcfg->mem_if_write_dqs_width;
1976         const u32 correct_mask = write ? param->write_correct_mask :
1977                                          param->read_correct_mask;
1978         const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs :
1979                                     rwcfg->mem_dq_per_read_dqs;
1980         u32 ret;
1981         /*
1982          * Stop searching when the read test doesn't pass AND when
1983          * we've seen a passing read on every bit.
1984          */
1985         if (write) {                    /* WRITE-ONLY */
1986                 ret = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
1987                                                          0, PASS_ONE_BIT,
1988                                                          bit_chk, 0);
1989         } else if (use_read_test) {     /* READ-ONLY */
1990                 ret = !rw_mgr_mem_calibrate_read_test(rank_bgn, read_group,
1991                                                         NUM_READ_PB_TESTS,
1992                                                         PASS_ONE_BIT, bit_chk,
1993                                                         0, 0);
1994         } else {                        /* READ-ONLY */
1995                 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 0,
1996                                                 PASS_ONE_BIT, bit_chk, 0);
1997                 *bit_chk = *bit_chk >> (per_dqs *
1998                         (read_group - (write_group * ratio)));
1999                 ret = (*bit_chk == 0);
2000         }
2001         *sticky_bit_chk = *sticky_bit_chk | *bit_chk;
2002         ret = ret && (*sticky_bit_chk == correct_mask);
2003         debug_cond(DLEVEL == 2,
2004                    "%s:%d center(left): dtap=%u => %u == %u && %u",
2005                    __func__, __LINE__, d,
2006                    *sticky_bit_chk, correct_mask, ret);
2007         return ret;
2008 }
2009
2010 /**
2011  * search_left_edge() - Find left edge of DQ/DQS working phase
2012  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2013  * @rank_bgn:           Rank number
2014  * @write_group:        Write Group
2015  * @read_group:         Read Group
2016  * @test_bgn:           Rank number to begin the test
2017  * @sticky_bit_chk:     Resulting sticky bit mask after the test
2018  * @left_edge:          Left edge of the DQ/DQS phase
2019  * @right_edge:         Right edge of the DQ/DQS phase
2020  * @use_read_test:      Perform read test
2021  *
2022  * Find left edge of DQ/DQS working phase.
2023  */
2024 static void search_left_edge(const int write, const int rank_bgn,
2025         const u32 write_group, const u32 read_group, const u32 test_bgn,
2026         u32 *sticky_bit_chk,
2027         int *left_edge, int *right_edge, const u32 use_read_test)
2028 {
2029         const u32 delay_max = write ? iocfg->io_out1_delay_max :
2030                                       iocfg->io_in_delay_max;
2031         const u32 dqs_max = write ? iocfg->io_out1_delay_max :
2032                                     iocfg->dqs_in_delay_max;
2033         const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs :
2034                                     rwcfg->mem_dq_per_read_dqs;
2035         u32 stop, bit_chk;
2036         int i, d;
2037
2038         for (d = 0; d <= dqs_max; d++) {
2039                 if (write)
2040                         scc_mgr_apply_group_dq_out1_delay(d);
2041                 else
2042                         scc_mgr_apply_group_dq_in_delay(test_bgn, d);
2043
2044                 writel(0, &sdr_scc_mgr->update);
2045
2046                 stop = search_stop_check(write, d, rank_bgn, write_group,
2047                                          read_group, &bit_chk, sticky_bit_chk,
2048                                          use_read_test);
2049                 if (stop == 1)
2050                         break;
2051
2052                 /* stop != 1 */
2053                 for (i = 0; i < per_dqs; i++) {
2054                         if (bit_chk & 1) {
2055                                 /*
2056                                  * Remember a passing test as
2057                                  * the left_edge.
2058                                  */
2059                                 left_edge[i] = d;
2060                         } else {
2061                                 /*
2062                                  * If a left edge has not been seen
2063                                  * yet, then a future passing test
2064                                  * will mark this edge as the right
2065                                  * edge.
2066                                  */
2067                                 if (left_edge[i] == delay_max + 1)
2068                                         right_edge[i] = -(d + 1);
2069                         }
2070                         bit_chk >>= 1;
2071                 }
2072         }
2073
2074         /* Reset DQ delay chains to 0 */
2075         if (write)
2076                 scc_mgr_apply_group_dq_out1_delay(0);
2077         else
2078                 scc_mgr_apply_group_dq_in_delay(test_bgn, 0);
2079
2080         *sticky_bit_chk = 0;
2081         for (i = per_dqs - 1; i >= 0; i--) {
2082                 debug_cond(DLEVEL == 2,
2083                            "%s:%d vfifo_center: left_edge[%u]: %d right_edge[%u]: %d\n",
2084                            __func__, __LINE__, i, left_edge[i],
2085                            i, right_edge[i]);
2086
2087                 /*
2088                  * Check for cases where we haven't found the left edge,
2089                  * which makes our assignment of the the right edge invalid.
2090                  * Reset it to the illegal value.
2091                  */
2092                 if ((left_edge[i] == delay_max + 1) &&
2093                     (right_edge[i] != delay_max + 1)) {
2094                         right_edge[i] = delay_max + 1;
2095                         debug_cond(DLEVEL == 2,
2096                                    "%s:%d vfifo_center: reset right_edge[%u]: %d\n",
2097                                    __func__, __LINE__, i, right_edge[i]);
2098                 }
2099
2100                 /*
2101                  * Reset sticky bit
2102                  * READ: except for bits where we have seen both
2103                  *       the left and right edge.
2104                  * WRITE: except for bits where we have seen the
2105                  *        left edge.
2106                  */
2107                 *sticky_bit_chk <<= 1;
2108                 if (write) {
2109                         if (left_edge[i] != delay_max + 1)
2110                                 *sticky_bit_chk |= 1;
2111                 } else {
2112                         if ((left_edge[i] != delay_max + 1) &&
2113                             (right_edge[i] != delay_max + 1))
2114                                 *sticky_bit_chk |= 1;
2115                 }
2116         }
2117 }
2118
2119 /**
2120  * search_right_edge() - Find right edge of DQ/DQS working phase
2121  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2122  * @rank_bgn:           Rank number
2123  * @write_group:        Write Group
2124  * @read_group:         Read Group
2125  * @start_dqs:          DQS start phase
2126  * @start_dqs_en:       DQS enable start phase
2127  * @sticky_bit_chk:     Resulting sticky bit mask after the test
2128  * @left_edge:          Left edge of the DQ/DQS phase
2129  * @right_edge:         Right edge of the DQ/DQS phase
2130  * @use_read_test:      Perform read test
2131  *
2132  * Find right edge of DQ/DQS working phase.
2133  */
2134 static int search_right_edge(const int write, const int rank_bgn,
2135         const u32 write_group, const u32 read_group,
2136         const int start_dqs, const int start_dqs_en,
2137         u32 *sticky_bit_chk,
2138         int *left_edge, int *right_edge, const u32 use_read_test)
2139 {
2140         const u32 delay_max = write ? iocfg->io_out1_delay_max :
2141                                       iocfg->io_in_delay_max;
2142         const u32 dqs_max = write ? iocfg->io_out1_delay_max :
2143                                     iocfg->dqs_in_delay_max;
2144         const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs :
2145                                     rwcfg->mem_dq_per_read_dqs;
2146         u32 stop, bit_chk;
2147         int i, d;
2148
2149         for (d = 0; d <= dqs_max - start_dqs; d++) {
2150                 if (write) {    /* WRITE-ONLY */
2151                         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
2152                                                                 d + start_dqs);
2153                 } else {        /* READ-ONLY */
2154                         scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs);
2155                         if (iocfg->shift_dqs_en_when_shift_dqs) {
2156                                 u32 delay = d + start_dqs_en;
2157                                 if (delay > iocfg->dqs_en_delay_max)
2158                                         delay = iocfg->dqs_en_delay_max;
2159                                 scc_mgr_set_dqs_en_delay(read_group, delay);
2160                         }
2161                         scc_mgr_load_dqs(read_group);
2162                 }
2163
2164                 writel(0, &sdr_scc_mgr->update);
2165
2166                 stop = search_stop_check(write, d, rank_bgn, write_group,
2167                                          read_group, &bit_chk, sticky_bit_chk,
2168                                          use_read_test);
2169                 if (stop == 1) {
2170                         if (write && (d == 0)) {        /* WRITE-ONLY */
2171                                 for (i = 0; i < rwcfg->mem_dq_per_write_dqs;
2172                                      i++) {
2173                                         /*
2174                                          * d = 0 failed, but it passed when
2175                                          * testing the left edge, so it must be
2176                                          * marginal, set it to -1
2177                                          */
2178                                         if (right_edge[i] == delay_max + 1 &&
2179                                             left_edge[i] != delay_max + 1)
2180                                                 right_edge[i] = -1;
2181                                 }
2182                         }
2183                         break;
2184                 }
2185
2186                 /* stop != 1 */
2187                 for (i = 0; i < per_dqs; i++) {
2188                         if (bit_chk & 1) {
2189                                 /*
2190                                  * Remember a passing test as
2191                                  * the right_edge.
2192                                  */
2193                                 right_edge[i] = d;
2194                         } else {
2195                                 if (d != 0) {
2196                                         /*
2197                                          * If a right edge has not
2198                                          * been seen yet, then a future
2199                                          * passing test will mark this
2200                                          * edge as the left edge.
2201                                          */
2202                                         if (right_edge[i] == delay_max + 1)
2203                                                 left_edge[i] = -(d + 1);
2204                                 } else {
2205                                         /*
2206                                          * d = 0 failed, but it passed
2207                                          * when testing the left edge,
2208                                          * so it must be marginal, set
2209                                          * it to -1
2210                                          */
2211                                         if (right_edge[i] == delay_max + 1 &&
2212                                             left_edge[i] != delay_max + 1)
2213                                                 right_edge[i] = -1;
2214                                         /*
2215                                          * If a right edge has not been
2216                                          * seen yet, then a future
2217                                          * passing test will mark this
2218                                          * edge as the left edge.
2219                                          */
2220                                         else if (right_edge[i] == delay_max + 1)
2221                                                 left_edge[i] = -(d + 1);
2222                                 }
2223                         }
2224
2225                         debug_cond(DLEVEL == 2, "%s:%d center[r,d=%u]: ",
2226                                    __func__, __LINE__, d);
2227                         debug_cond(DLEVEL == 2,
2228                                    "bit_chk_test=%i left_edge[%u]: %d ",
2229                                    bit_chk & 1, i, left_edge[i]);
2230                         debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2231                                    right_edge[i]);
2232                         bit_chk >>= 1;
2233                 }
2234         }
2235
2236         /* Check that all bits have a window */
2237         for (i = 0; i < per_dqs; i++) {
2238                 debug_cond(DLEVEL == 2,
2239                            "%s:%d write_center: left_edge[%u]: %d right_edge[%u]: %d",
2240                            __func__, __LINE__, i, left_edge[i],
2241                            i, right_edge[i]);
2242                 if ((left_edge[i] == dqs_max + 1) ||
2243                     (right_edge[i] == dqs_max + 1))
2244                         return i + 1;   /* FIXME: If we fail, retval > 0 */
2245         }
2246
2247         return 0;
2248 }
2249
2250 /**
2251  * get_window_mid_index() - Find the best middle setting of DQ/DQS phase
2252  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2253  * @left_edge:          Left edge of the DQ/DQS phase
2254  * @right_edge:         Right edge of the DQ/DQS phase
2255  * @mid_min:            Best DQ/DQS phase middle setting
2256  *
2257  * Find index and value of the middle of the DQ/DQS working phase.
2258  */
2259 static int get_window_mid_index(const int write, int *left_edge,
2260                                 int *right_edge, int *mid_min)
2261 {
2262         const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs :
2263                                     rwcfg->mem_dq_per_read_dqs;
2264         int i, mid, min_index;
2265
2266         /* Find middle of window for each DQ bit */
2267         *mid_min = left_edge[0] - right_edge[0];
2268         min_index = 0;
2269         for (i = 1; i < per_dqs; i++) {
2270                 mid = left_edge[i] - right_edge[i];
2271                 if (mid < *mid_min) {
2272                         *mid_min = mid;
2273                         min_index = i;
2274                 }
2275         }
2276
2277         /*
2278          * -mid_min/2 represents the amount that we need to move DQS.
2279          * If mid_min is odd and positive we'll need to add one to make
2280          * sure the rounding in further calculations is correct (always
2281          * bias to the right), so just add 1 for all positive values.
2282          */
2283         if (*mid_min > 0)
2284                 (*mid_min)++;
2285         *mid_min = *mid_min / 2;
2286
2287         debug_cond(DLEVEL == 1, "%s:%d vfifo_center: *mid_min=%d (index=%u)\n",
2288                    __func__, __LINE__, *mid_min, min_index);
2289         return min_index;
2290 }
2291
2292 /**
2293  * center_dq_windows() - Center the DQ/DQS windows
2294  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2295  * @left_edge:          Left edge of the DQ/DQS phase
2296  * @right_edge:         Right edge of the DQ/DQS phase
2297  * @mid_min:            Adjusted DQ/DQS phase middle setting
2298  * @orig_mid_min:       Original DQ/DQS phase middle setting
2299  * @min_index:          DQ/DQS phase middle setting index
2300  * @test_bgn:           Rank number to begin the test
2301  * @dq_margin:          Amount of shift for the DQ
2302  * @dqs_margin:         Amount of shift for the DQS
2303  *
2304  * Align the DQ/DQS windows in each group.
2305  */
2306 static void center_dq_windows(const int write, int *left_edge, int *right_edge,
2307                               const int mid_min, const int orig_mid_min,
2308                               const int min_index, const int test_bgn,
2309                               int *dq_margin, int *dqs_margin)
2310 {
2311         const u32 delay_max = write ? iocfg->io_out1_delay_max :
2312                                       iocfg->io_in_delay_max;
2313         const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs :
2314                                     rwcfg->mem_dq_per_read_dqs;
2315         const u32 delay_off = write ? SCC_MGR_IO_OUT1_DELAY_OFFSET :
2316                                       SCC_MGR_IO_IN_DELAY_OFFSET;
2317         const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | delay_off;
2318
2319         u32 temp_dq_io_delay1, temp_dq_io_delay2;
2320         int shift_dq, i, p;
2321
2322         /* Initialize data for export structures */
2323         *dqs_margin = delay_max + 1;
2324         *dq_margin  = delay_max + 1;
2325
2326         /* add delay to bring centre of all DQ windows to the same "level" */
2327         for (i = 0, p = test_bgn; i < per_dqs; i++, p++) {
2328                 /* Use values before divide by 2 to reduce round off error */
2329                 shift_dq = (left_edge[i] - right_edge[i] -
2330                         (left_edge[min_index] - right_edge[min_index]))/2  +
2331                         (orig_mid_min - mid_min);
2332
2333                 debug_cond(DLEVEL == 2,
2334                            "vfifo_center: before: shift_dq[%u]=%d\n",
2335                            i, shift_dq);
2336
2337                 temp_dq_io_delay1 = readl(addr + (p << 2));
2338                 temp_dq_io_delay2 = readl(addr + (i << 2));
2339
2340                 if (shift_dq + temp_dq_io_delay1 > delay_max)
2341                         shift_dq = delay_max - temp_dq_io_delay2;
2342                 else if (shift_dq + temp_dq_io_delay1 < 0)
2343                         shift_dq = -temp_dq_io_delay1;
2344
2345                 debug_cond(DLEVEL == 2,
2346                            "vfifo_center: after: shift_dq[%u]=%d\n",
2347                            i, shift_dq);
2348
2349                 if (write)
2350                         scc_mgr_set_dq_out1_delay(i,
2351                                                   temp_dq_io_delay1 + shift_dq);
2352                 else
2353                         scc_mgr_set_dq_in_delay(p,
2354                                                 temp_dq_io_delay1 + shift_dq);
2355
2356                 scc_mgr_load_dq(p);
2357
2358                 debug_cond(DLEVEL == 2,
2359                            "vfifo_center: margin[%u]=[%d,%d]\n", i,
2360                            left_edge[i] - shift_dq + (-mid_min),
2361                            right_edge[i] + shift_dq - (-mid_min));
2362
2363                 /* To determine values for export structures */
2364                 if (left_edge[i] - shift_dq + (-mid_min) < *dq_margin)
2365                         *dq_margin = left_edge[i] - shift_dq + (-mid_min);
2366
2367                 if (right_edge[i] + shift_dq - (-mid_min) < *dqs_margin)
2368                         *dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2369         }
2370 }
2371
2372 /**
2373  * rw_mgr_mem_calibrate_vfifo_center() - Per-bit deskew DQ and centering
2374  * @rank_bgn:           Rank number
2375  * @rw_group:           Read/Write Group
2376  * @test_bgn:           Rank at which the test begins
2377  * @use_read_test:      Perform a read test
2378  * @update_fom:         Update FOM
2379  *
2380  * Per-bit deskew DQ and centering.
2381  */
2382 static int rw_mgr_mem_calibrate_vfifo_center(const u32 rank_bgn,
2383                         const u32 rw_group, const u32 test_bgn,
2384                         const int use_read_test, const int update_fom)
2385 {
2386         const u32 addr =
2387                 SDR_PHYGRP_SCCGRP_ADDRESS + SCC_MGR_DQS_IN_DELAY_OFFSET +
2388                 (rw_group << 2);
2389         /*
2390          * Store these as signed since there are comparisons with
2391          * signed numbers.
2392          */
2393         u32 sticky_bit_chk;
2394         int32_t left_edge[rwcfg->mem_dq_per_read_dqs];
2395         int32_t right_edge[rwcfg->mem_dq_per_read_dqs];
2396         int32_t orig_mid_min, mid_min;
2397         int32_t new_dqs, start_dqs, start_dqs_en = 0, final_dqs_en;
2398         int32_t dq_margin, dqs_margin;
2399         int i, min_index;
2400         int ret;
2401
2402         debug("%s:%d: %u %u", __func__, __LINE__, rw_group, test_bgn);
2403
2404         start_dqs = readl(addr);
2405         if (iocfg->shift_dqs_en_when_shift_dqs)
2406                 start_dqs_en = readl(addr - iocfg->dqs_en_delay_offset);
2407
2408         /* set the left and right edge of each bit to an illegal value */
2409         /* use (iocfg->io_in_delay_max + 1) as an illegal value */
2410         sticky_bit_chk = 0;
2411         for (i = 0; i < rwcfg->mem_dq_per_read_dqs; i++) {
2412                 left_edge[i]  = iocfg->io_in_delay_max + 1;
2413                 right_edge[i] = iocfg->io_in_delay_max + 1;
2414         }
2415
2416         /* Search for the left edge of the window for each bit */
2417         search_left_edge(0, rank_bgn, rw_group, rw_group, test_bgn,
2418                          &sticky_bit_chk,
2419                          left_edge, right_edge, use_read_test);
2420
2421
2422         /* Search for the right edge of the window for each bit */
2423         ret = search_right_edge(0, rank_bgn, rw_group, rw_group,
2424                                 start_dqs, start_dqs_en,
2425                                 &sticky_bit_chk,
2426                                 left_edge, right_edge, use_read_test);
2427         if (ret) {
2428                 /*
2429                  * Restore delay chain settings before letting the loop
2430                  * in rw_mgr_mem_calibrate_vfifo to retry different
2431                  * dqs/ck relationships.
2432                  */
2433                 scc_mgr_set_dqs_bus_in_delay(rw_group, start_dqs);
2434                 if (iocfg->shift_dqs_en_when_shift_dqs)
2435                         scc_mgr_set_dqs_en_delay(rw_group, start_dqs_en);
2436
2437                 scc_mgr_load_dqs(rw_group);
2438                 writel(0, &sdr_scc_mgr->update);
2439
2440                 debug_cond(DLEVEL == 1,
2441                            "%s:%d vfifo_center: failed to find edge [%u]: %d %d",
2442                            __func__, __LINE__, i, left_edge[i], right_edge[i]);
2443                 if (use_read_test) {
2444                         set_failing_group_stage(rw_group *
2445                                 rwcfg->mem_dq_per_read_dqs + i,
2446                                 CAL_STAGE_VFIFO,
2447                                 CAL_SUBSTAGE_VFIFO_CENTER);
2448                 } else {
2449                         set_failing_group_stage(rw_group *
2450                                 rwcfg->mem_dq_per_read_dqs + i,
2451                                 CAL_STAGE_VFIFO_AFTER_WRITES,
2452                                 CAL_SUBSTAGE_VFIFO_CENTER);
2453                 }
2454                 return -EIO;
2455         }
2456
2457         min_index = get_window_mid_index(0, left_edge, right_edge, &mid_min);
2458
2459         /* Determine the amount we can change DQS (which is -mid_min) */
2460         orig_mid_min = mid_min;
2461         new_dqs = start_dqs - mid_min;
2462         if (new_dqs > iocfg->dqs_in_delay_max)
2463                 new_dqs = iocfg->dqs_in_delay_max;
2464         else if (new_dqs < 0)
2465                 new_dqs = 0;
2466
2467         mid_min = start_dqs - new_dqs;
2468         debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n",
2469                    mid_min, new_dqs);
2470
2471         if (iocfg->shift_dqs_en_when_shift_dqs) {
2472                 if (start_dqs_en - mid_min > iocfg->dqs_en_delay_max)
2473                         mid_min += start_dqs_en - mid_min -
2474                                    iocfg->dqs_en_delay_max;
2475                 else if (start_dqs_en - mid_min < 0)
2476                         mid_min += start_dqs_en - mid_min;
2477         }
2478         new_dqs = start_dqs - mid_min;
2479
2480         debug_cond(DLEVEL == 1,
2481                    "vfifo_center: start_dqs=%d start_dqs_en=%d new_dqs=%d mid_min=%d\n",
2482                    start_dqs,
2483                    iocfg->shift_dqs_en_when_shift_dqs ? start_dqs_en : -1,
2484                    new_dqs, mid_min);
2485
2486         /* Add delay to bring centre of all DQ windows to the same "level". */
2487         center_dq_windows(0, left_edge, right_edge, mid_min, orig_mid_min,
2488                           min_index, test_bgn, &dq_margin, &dqs_margin);
2489
2490         /* Move DQS-en */
2491         if (iocfg->shift_dqs_en_when_shift_dqs) {
2492                 final_dqs_en = start_dqs_en - mid_min;
2493                 scc_mgr_set_dqs_en_delay(rw_group, final_dqs_en);
2494                 scc_mgr_load_dqs(rw_group);
2495         }
2496
2497         /* Move DQS */
2498         scc_mgr_set_dqs_bus_in_delay(rw_group, new_dqs);
2499         scc_mgr_load_dqs(rw_group);
2500         debug_cond(DLEVEL == 2,
2501                    "%s:%d vfifo_center: dq_margin=%d dqs_margin=%d",
2502                    __func__, __LINE__, dq_margin, dqs_margin);
2503
2504         /*
2505          * Do not remove this line as it makes sure all of our decisions
2506          * have been applied. Apply the update bit.
2507          */
2508         writel(0, &sdr_scc_mgr->update);
2509
2510         if ((dq_margin < 0) || (dqs_margin < 0))
2511                 return -EINVAL;
2512
2513         return 0;
2514 }
2515
2516 /**
2517  * rw_mgr_mem_calibrate_guaranteed_write() - Perform guaranteed write into the device
2518  * @rw_group:   Read/Write Group
2519  * @phase:      DQ/DQS phase
2520  *
2521  * Because initially no communication ca be reliably performed with the memory
2522  * device, the sequencer uses a guaranteed write mechanism to write data into
2523  * the memory device.
2524  */
2525 static int rw_mgr_mem_calibrate_guaranteed_write(const u32 rw_group,
2526                                                  const u32 phase)
2527 {
2528         int ret;
2529
2530         /* Set a particular DQ/DQS phase. */
2531         scc_mgr_set_dqdqs_output_phase_all_ranks(rw_group, phase);
2532
2533         debug_cond(DLEVEL == 1, "%s:%d guaranteed write: g=%u p=%u\n",
2534                    __func__, __LINE__, rw_group, phase);
2535
2536         /*
2537          * Altera EMI_RM 2015.05.04 :: Figure 1-25
2538          * Load up the patterns used by read calibration using the
2539          * current DQDQS phase.
2540          */
2541         rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2542
2543         if (gbl->phy_debug_mode_flags & PHY_DEBUG_DISABLE_GUARANTEED_READ)
2544                 return 0;
2545
2546         /*
2547          * Altera EMI_RM 2015.05.04 :: Figure 1-26
2548          * Back-to-Back reads of the patterns used for calibration.
2549          */
2550         ret = rw_mgr_mem_calibrate_read_test_patterns(0, rw_group, 1);
2551         if (ret)
2552                 debug_cond(DLEVEL == 1,
2553                            "%s:%d Guaranteed read test failed: g=%u p=%u\n",
2554                            __func__, __LINE__, rw_group, phase);
2555         return ret;
2556 }
2557
2558 /**
2559  * rw_mgr_mem_calibrate_dqs_enable_calibration() - DQS Enable Calibration
2560  * @rw_group:   Read/Write Group
2561  * @test_bgn:   Rank at which the test begins
2562  *
2563  * DQS enable calibration ensures reliable capture of the DQ signal without
2564  * glitches on the DQS line.
2565  */
2566 static int rw_mgr_mem_calibrate_dqs_enable_calibration(const u32 rw_group,
2567                                                        const u32 test_bgn)
2568 {
2569         /*
2570          * Altera EMI_RM 2015.05.04 :: Figure 1-27
2571          * DQS and DQS Eanble Signal Relationships.
2572          */
2573
2574         /* We start at zero, so have one less dq to devide among */
2575         const u32 delay_step = iocfg->io_in_delay_max /
2576                                (rwcfg->mem_dq_per_read_dqs - 1);
2577         int ret;
2578         u32 i, p, d, r;
2579
2580         debug("%s:%d (%u,%u)\n", __func__, __LINE__, rw_group, test_bgn);
2581
2582         /* Try different dq_in_delays since the DQ path is shorter than DQS. */
2583         for (r = 0; r < rwcfg->mem_number_of_ranks;
2584              r += NUM_RANKS_PER_SHADOW_REG) {
2585                 for (i = 0, p = test_bgn, d = 0;
2586                      i < rwcfg->mem_dq_per_read_dqs;
2587                      i++, p++, d += delay_step) {
2588                         debug_cond(DLEVEL == 1,
2589                                    "%s:%d: g=%u r=%u i=%u p=%u d=%u\n",
2590                                    __func__, __LINE__, rw_group, r, i, p, d);
2591
2592                         scc_mgr_set_dq_in_delay(p, d);
2593                         scc_mgr_load_dq(p);
2594                 }
2595
2596                 writel(0, &sdr_scc_mgr->update);
2597         }
2598
2599         /*
2600          * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different
2601          * dq_in_delay values
2602          */
2603         ret = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(rw_group);
2604
2605         debug_cond(DLEVEL == 1,
2606                    "%s:%d: g=%u found=%u; Reseting delay chain to zero\n",
2607                    __func__, __LINE__, rw_group, !ret);
2608
2609         for (r = 0; r < rwcfg->mem_number_of_ranks;
2610              r += NUM_RANKS_PER_SHADOW_REG) {
2611                 scc_mgr_apply_group_dq_in_delay(test_bgn, 0);
2612                 writel(0, &sdr_scc_mgr->update);
2613         }
2614
2615         return ret;
2616 }
2617
2618 /**
2619  * rw_mgr_mem_calibrate_dq_dqs_centering() - Centering DQ/DQS
2620  * @rw_group:           Read/Write Group
2621  * @test_bgn:           Rank at which the test begins
2622  * @use_read_test:      Perform a read test
2623  * @update_fom:         Update FOM
2624  *
2625  * The centerin DQ/DQS stage attempts to align DQ and DQS signals on reads
2626  * within a group.
2627  */
2628 static int
2629 rw_mgr_mem_calibrate_dq_dqs_centering(const u32 rw_group, const u32 test_bgn,
2630                                       const int use_read_test,
2631                                       const int update_fom)
2632
2633 {
2634         int ret, grp_calibrated;
2635         u32 rank_bgn, sr;
2636
2637         /*
2638          * Altera EMI_RM 2015.05.04 :: Figure 1-28
2639          * Read per-bit deskew can be done on a per shadow register basis.
2640          */
2641         grp_calibrated = 1;
2642         for (rank_bgn = 0, sr = 0;
2643              rank_bgn < rwcfg->mem_number_of_ranks;
2644              rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) {
2645                 ret = rw_mgr_mem_calibrate_vfifo_center(rank_bgn, rw_group,
2646                                                         test_bgn,
2647                                                         use_read_test,
2648                                                         update_fom);
2649                 if (!ret)
2650                         continue;
2651
2652                 grp_calibrated = 0;
2653         }
2654
2655         if (!grp_calibrated)
2656                 return -EIO;
2657
2658         return 0;
2659 }
2660
2661 /**
2662  * rw_mgr_mem_calibrate_vfifo() - Calibrate the read valid prediction FIFO
2663  * @rw_group:           Read/Write Group
2664  * @test_bgn:           Rank at which the test begins
2665  *
2666  * Stage 1: Calibrate the read valid prediction FIFO.
2667  *
2668  * This function implements UniPHY calibration Stage 1, as explained in
2669  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2670  *
2671  * - read valid prediction will consist of finding:
2672  *   - DQS enable phase and DQS enable delay (DQS Enable Calibration)
2673  *   - DQS input phase  and DQS input delay (DQ/DQS Centering)
2674  *  - we also do a per-bit deskew on the DQ lines.
2675  */
2676 static int rw_mgr_mem_calibrate_vfifo(const u32 rw_group, const u32 test_bgn)
2677 {
2678         u32 p, d;
2679         u32 dtaps_per_ptap;
2680         u32 failed_substage;
2681
2682         int ret;
2683
2684         debug("%s:%d: %u %u\n", __func__, __LINE__, rw_group, test_bgn);
2685
2686         /* Update info for sims */
2687         reg_file_set_group(rw_group);
2688         reg_file_set_stage(CAL_STAGE_VFIFO);
2689         reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ);
2690
2691         failed_substage = CAL_SUBSTAGE_GUARANTEED_READ;
2692
2693         /* USER Determine number of delay taps for each phase tap. */
2694         dtaps_per_ptap = DIV_ROUND_UP(iocfg->delay_per_opa_tap,
2695                                       iocfg->delay_per_dqs_en_dchain_tap) - 1;
2696
2697         for (d = 0; d <= dtaps_per_ptap; d += 2) {
2698                 /*
2699                  * In RLDRAMX we may be messing the delay of pins in
2700                  * the same write rw_group but outside of the current read
2701                  * the rw_group, but that's ok because we haven't calibrated
2702                  * output side yet.
2703                  */
2704                 if (d > 0) {
2705                         scc_mgr_apply_group_all_out_delay_add_all_ranks(
2706                                                                 rw_group, d);
2707                 }
2708
2709                 for (p = 0; p <= iocfg->dqdqs_out_phase_max; p++) {
2710                         /* 1) Guaranteed Write */
2711                         ret = rw_mgr_mem_calibrate_guaranteed_write(rw_group, p);
2712                         if (ret)
2713                                 break;
2714
2715                         /* 2) DQS Enable Calibration */
2716                         ret = rw_mgr_mem_calibrate_dqs_enable_calibration(rw_group,
2717                                                                           test_bgn);
2718                         if (ret) {
2719                                 failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE;
2720                                 continue;
2721                         }
2722
2723                         /* 3) Centering DQ/DQS */
2724                         /*
2725                          * If doing read after write calibration, do not update
2726                          * FOM now. Do it then.
2727                          */
2728                         ret = rw_mgr_mem_calibrate_dq_dqs_centering(rw_group,
2729                                                                 test_bgn, 1, 0);
2730                         if (ret) {
2731                                 failed_substage = CAL_SUBSTAGE_VFIFO_CENTER;
2732                                 continue;
2733                         }
2734
2735                         /* All done. */
2736                         goto cal_done_ok;
2737                 }
2738         }
2739
2740         /* Calibration Stage 1 failed. */
2741         set_failing_group_stage(rw_group, CAL_STAGE_VFIFO, failed_substage);
2742         return 0;
2743
2744         /* Calibration Stage 1 completed OK. */
2745 cal_done_ok:
2746         /*
2747          * Reset the delay chains back to zero if they have moved > 1
2748          * (check for > 1 because loop will increase d even when pass in
2749          * first case).
2750          */
2751         if (d > 2)
2752                 scc_mgr_zero_group(rw_group, 1);
2753
2754         return 1;
2755 }
2756
2757 /**
2758  * rw_mgr_mem_calibrate_vfifo_end() - DQ/DQS Centering.
2759  * @rw_group:           Read/Write Group
2760  * @test_bgn:           Rank at which the test begins
2761  *
2762  * Stage 3: DQ/DQS Centering.
2763  *
2764  * This function implements UniPHY calibration Stage 3, as explained in
2765  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2766  */
2767 static int rw_mgr_mem_calibrate_vfifo_end(const u32 rw_group,
2768                                           const u32 test_bgn)
2769 {
2770         int ret;
2771
2772         debug("%s:%d %u %u", __func__, __LINE__, rw_group, test_bgn);
2773
2774         /* Update info for sims. */
2775         reg_file_set_group(rw_group);
2776         reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES);
2777         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
2778
2779         ret = rw_mgr_mem_calibrate_dq_dqs_centering(rw_group, test_bgn, 0, 1);
2780         if (ret)
2781                 set_failing_group_stage(rw_group,
2782                                         CAL_STAGE_VFIFO_AFTER_WRITES,
2783                                         CAL_SUBSTAGE_VFIFO_CENTER);
2784         return ret;
2785 }
2786
2787 /**
2788  * rw_mgr_mem_calibrate_lfifo() - Minimize latency
2789  *
2790  * Stage 4: Minimize latency.
2791  *
2792  * This function implements UniPHY calibration Stage 4, as explained in
2793  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2794  * Calibrate LFIFO to find smallest read latency.
2795  */
2796 static u32 rw_mgr_mem_calibrate_lfifo(void)
2797 {
2798         int found_one = 0;
2799
2800         debug("%s:%d\n", __func__, __LINE__);
2801
2802         /* Update info for sims. */
2803         reg_file_set_stage(CAL_STAGE_LFIFO);
2804         reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY);
2805
2806         /* Load up the patterns used by read calibration for all ranks */
2807         rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2808
2809         do {
2810                 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2811                 debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u",
2812                            __func__, __LINE__, gbl->curr_read_lat);
2813
2814                 if (!rw_mgr_mem_calibrate_read_test_all_ranks(0, NUM_READ_TESTS,
2815                                                               PASS_ALL_BITS, 1))
2816                         break;
2817
2818                 found_one = 1;
2819                 /*
2820                  * Reduce read latency and see if things are
2821                  * working correctly.
2822                  */
2823                 gbl->curr_read_lat--;
2824         } while (gbl->curr_read_lat > 0);
2825
2826         /* Reset the fifos to get pointers to known state. */
2827         writel(0, &phy_mgr_cmd->fifo_reset);
2828
2829         if (found_one) {
2830                 /* Add a fudge factor to the read latency that was determined */
2831                 gbl->curr_read_lat += 2;
2832                 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2833                 debug_cond(DLEVEL == 2,
2834                            "%s:%d lfifo: success: using read_lat=%u\n",
2835                            __func__, __LINE__, gbl->curr_read_lat);
2836         } else {
2837                 set_failing_group_stage(0xff, CAL_STAGE_LFIFO,
2838                                         CAL_SUBSTAGE_READ_LATENCY);
2839
2840                 debug_cond(DLEVEL == 2,
2841                            "%s:%d lfifo: failed at initial read_lat=%u\n",
2842                            __func__, __LINE__, gbl->curr_read_lat);
2843         }
2844
2845         return found_one;
2846 }
2847
2848 /**
2849  * search_window() - Search for the/part of the window with DM/DQS shift
2850  * @search_dm:          If 1, search for the DM shift, if 0, search for DQS shift
2851  * @rank_bgn:           Rank number
2852  * @write_group:        Write Group
2853  * @bgn_curr:           Current window begin
2854  * @end_curr:           Current window end
2855  * @bgn_best:           Current best window begin
2856  * @end_best:           Current best window end
2857  * @win_best:           Size of the best window
2858  * @new_dqs:            New DQS value (only applicable if search_dm = 0).
2859  *
2860  * Search for the/part of the window with DM/DQS shift.
2861  */
2862 static void search_window(const int search_dm,
2863                           const u32 rank_bgn, const u32 write_group,
2864                           int *bgn_curr, int *end_curr, int *bgn_best,
2865                           int *end_best, int *win_best, int new_dqs)
2866 {
2867         u32 bit_chk;
2868         const int max = iocfg->io_out1_delay_max - new_dqs;
2869         int d, di;
2870
2871         /* Search for the/part of the window with DM/DQS shift. */
2872         for (di = max; di >= 0; di -= DELTA_D) {
2873                 if (search_dm) {
2874                         d = di;
2875                         scc_mgr_apply_group_dm_out1_delay(d);
2876                 } else {
2877                         /* For DQS, we go from 0...max */
2878                         d = max - di;
2879                         /*
2880                          * Note: This only shifts DQS, so are we limiting
2881                          *       ourselves to width of DQ unnecessarily.
2882                          */
2883                         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
2884                                                                 d + new_dqs);
2885                 }
2886
2887                 writel(0, &sdr_scc_mgr->update);
2888
2889                 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1,
2890                                                     PASS_ALL_BITS, &bit_chk,
2891                                                     0)) {
2892                         /* Set current end of the window. */
2893                         *end_curr = search_dm ? -d : d;
2894
2895                         /*
2896                          * If a starting edge of our window has not been seen
2897                          * this is our current start of the DM window.
2898                          */
2899                         if (*bgn_curr == iocfg->io_out1_delay_max + 1)
2900                                 *bgn_curr = search_dm ? -d : d;
2901
2902                         /*
2903                          * If current window is bigger than best seen.
2904                          * Set best seen to be current window.
2905                          */
2906                         if ((*end_curr - *bgn_curr + 1) > *win_best) {
2907                                 *win_best = *end_curr - *bgn_curr + 1;
2908                                 *bgn_best = *bgn_curr;
2909                                 *end_best = *end_curr;
2910                         }
2911                 } else {
2912                         /* We just saw a failing test. Reset temp edge. */
2913                         *bgn_curr = iocfg->io_out1_delay_max + 1;
2914                         *end_curr = iocfg->io_out1_delay_max + 1;
2915
2916                         /* Early exit is only applicable to DQS. */
2917                         if (search_dm)
2918                                 continue;
2919
2920                         /*
2921                          * Early exit optimization: if the remaining delay
2922                          * chain space is less than already seen largest
2923                          * window we can exit.
2924                          */
2925                         if (*win_best - 1 > iocfg->io_out1_delay_max - new_dqs - d)
2926                                 break;
2927                 }
2928         }
2929 }
2930
2931 /*
2932  * rw_mgr_mem_calibrate_writes_center() - Center all windows
2933  * @rank_bgn:           Rank number
2934  * @write_group:        Write group
2935  * @test_bgn:           Rank at which the test begins
2936  *
2937  * Center all windows. Do per-bit-deskew to possibly increase size of
2938  * certain windows.
2939  */
2940 static int
2941 rw_mgr_mem_calibrate_writes_center(const u32 rank_bgn, const u32 write_group,
2942                                    const u32 test_bgn)
2943 {
2944         int i;
2945         u32 sticky_bit_chk;
2946         u32 min_index;
2947         int left_edge[rwcfg->mem_dq_per_write_dqs];
2948         int right_edge[rwcfg->mem_dq_per_write_dqs];
2949         int mid;
2950         int mid_min, orig_mid_min;
2951         int new_dqs, start_dqs;
2952         int dq_margin, dqs_margin, dm_margin;
2953         int bgn_curr = iocfg->io_out1_delay_max + 1;
2954         int end_curr = iocfg->io_out1_delay_max + 1;
2955         int bgn_best = iocfg->io_out1_delay_max + 1;
2956         int end_best = iocfg->io_out1_delay_max + 1;
2957         int win_best = 0;
2958
2959         int ret;
2960
2961         debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn);
2962
2963         dm_margin = 0;
2964
2965         start_dqs = readl((SDR_PHYGRP_SCCGRP_ADDRESS |
2966                           SCC_MGR_IO_OUT1_DELAY_OFFSET) +
2967                           (rwcfg->mem_dq_per_write_dqs << 2));
2968
2969         /* Per-bit deskew. */
2970
2971         /*
2972          * Set the left and right edge of each bit to an illegal value.
2973          * Use (iocfg->io_out1_delay_max + 1) as an illegal value.
2974          */
2975         sticky_bit_chk = 0;
2976         for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) {
2977                 left_edge[i]  = iocfg->io_out1_delay_max + 1;
2978                 right_edge[i] = iocfg->io_out1_delay_max + 1;
2979         }
2980
2981         /* Search for the left edge of the window for each bit. */
2982         search_left_edge(1, rank_bgn, write_group, 0, test_bgn,
2983                          &sticky_bit_chk,
2984                          left_edge, right_edge, 0);
2985
2986         /* Search for the right edge of the window for each bit. */
2987         ret = search_right_edge(1, rank_bgn, write_group, 0,
2988                                 start_dqs, 0,
2989                                 &sticky_bit_chk,
2990                                 left_edge, right_edge, 0);
2991         if (ret) {
2992                 set_failing_group_stage(test_bgn + ret - 1, CAL_STAGE_WRITES,
2993                                         CAL_SUBSTAGE_WRITES_CENTER);
2994                 return -EINVAL;
2995         }
2996
2997         min_index = get_window_mid_index(1, left_edge, right_edge, &mid_min);
2998
2999         /* Determine the amount we can change DQS (which is -mid_min). */
3000         orig_mid_min = mid_min;
3001         new_dqs = start_dqs;
3002         mid_min = 0;
3003         debug_cond(DLEVEL == 1,
3004                    "%s:%d write_center: start_dqs=%d new_dqs=%d mid_min=%d\n",
3005                    __func__, __LINE__, start_dqs, new_dqs, mid_min);
3006
3007         /* Add delay to bring centre of all DQ windows to the same "level". */
3008         center_dq_windows(1, left_edge, right_edge, mid_min, orig_mid_min,
3009                           min_index, 0, &dq_margin, &dqs_margin);
3010
3011         /* Move DQS */
3012         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
3013         writel(0, &sdr_scc_mgr->update);
3014
3015         /* Centre DM */
3016         debug_cond(DLEVEL == 2, "%s:%d write_center: DM\n", __func__, __LINE__);
3017
3018         /*
3019          * Set the left and right edge of each bit to an illegal value.
3020          * Use (iocfg->io_out1_delay_max + 1) as an illegal value.
3021          */
3022         left_edge[0]  = iocfg->io_out1_delay_max + 1;
3023         right_edge[0] = iocfg->io_out1_delay_max + 1;
3024
3025         /* Search for the/part of the window with DM shift. */
3026         search_window(1, rank_bgn, write_group, &bgn_curr, &end_curr,
3027                       &bgn_best, &end_best, &win_best, 0);
3028
3029         /* Reset DM delay chains to 0. */
3030         scc_mgr_apply_group_dm_out1_delay(0);
3031
3032         /*
3033          * Check to see if the current window nudges up aganist 0 delay.
3034          * If so we need to continue the search by shifting DQS otherwise DQS
3035          * search begins as a new search.
3036          */
3037         if (end_curr != 0) {
3038                 bgn_curr = iocfg->io_out1_delay_max + 1;
3039                 end_curr = iocfg->io_out1_delay_max + 1;
3040         }
3041
3042         /* Search for the/part of the window with DQS shifts. */
3043         search_window(0, rank_bgn, write_group, &bgn_curr, &end_curr,
3044                       &bgn_best, &end_best, &win_best, new_dqs);
3045
3046         /* Assign left and right edge for cal and reporting. */
3047         left_edge[0] = -1 * bgn_best;
3048         right_edge[0] = end_best;
3049
3050         debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n",
3051                    __func__, __LINE__, left_edge[0], right_edge[0]);
3052
3053         /* Move DQS (back to orig). */
3054         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
3055
3056         /* Move DM */
3057
3058         /* Find middle of window for the DM bit. */
3059         mid = (left_edge[0] - right_edge[0]) / 2;
3060
3061         /* Only move right, since we are not moving DQS/DQ. */
3062         if (mid < 0)
3063                 mid = 0;
3064
3065         /* dm_marign should fail if we never find a window. */
3066         if (win_best == 0)
3067                 dm_margin = -1;
3068         else
3069                 dm_margin = left_edge[0] - mid;
3070
3071         scc_mgr_apply_group_dm_out1_delay(mid);
3072         writel(0, &sdr_scc_mgr->update);
3073
3074         debug_cond(DLEVEL == 2,
3075                    "%s:%d dm_calib: left=%d right=%d mid=%d dm_margin=%d\n",
3076                    __func__, __LINE__, left_edge[0], right_edge[0],
3077                    mid, dm_margin);
3078         /* Export values. */
3079         gbl->fom_out += dq_margin + dqs_margin;
3080
3081         debug_cond(DLEVEL == 2,
3082                    "%s:%d write_center: dq_margin=%d dqs_margin=%d dm_margin=%d\n",
3083                    __func__, __LINE__, dq_margin, dqs_margin, dm_margin);
3084
3085         /*
3086          * Do not remove this line as it makes sure all of our
3087          * decisions have been applied.
3088          */
3089         writel(0, &sdr_scc_mgr->update);
3090
3091         if ((dq_margin < 0) || (dqs_margin < 0) || (dm_margin < 0))
3092                 return -EINVAL;
3093
3094         return 0;
3095 }
3096
3097 /**
3098  * rw_mgr_mem_calibrate_writes() - Write Calibration Part One
3099  * @rank_bgn:           Rank number
3100  * @group:              Read/Write Group
3101  * @test_bgn:           Rank at which the test begins
3102  *
3103  * Stage 2: Write Calibration Part One.
3104  *
3105  * This function implements UniPHY calibration Stage 2, as explained in
3106  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
3107  */
3108 static int rw_mgr_mem_calibrate_writes(const u32 rank_bgn, const u32 group,
3109                                        const u32 test_bgn)
3110 {
3111         int ret;
3112
3113         /* Update info for sims */
3114         debug("%s:%d %u %u\n", __func__, __LINE__, group, test_bgn);
3115
3116         reg_file_set_group(group);
3117         reg_file_set_stage(CAL_STAGE_WRITES);
3118         reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER);
3119
3120         ret = rw_mgr_mem_calibrate_writes_center(rank_bgn, group, test_bgn);
3121         if (ret)
3122                 set_failing_group_stage(group, CAL_STAGE_WRITES,
3123                                         CAL_SUBSTAGE_WRITES_CENTER);
3124
3125         return ret;
3126 }
3127
3128 /**
3129  * mem_precharge_and_activate() - Precharge all banks and activate
3130  *
3131  * Precharge all banks and activate row 0 in bank "000..." and bank "111...".
3132  */
3133 static void mem_precharge_and_activate(void)
3134 {
3135         int r;
3136
3137         for (r = 0; r < rwcfg->mem_number_of_ranks; r++) {
3138                 /* Set rank. */
3139                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
3140
3141                 /* Precharge all banks. */
3142                 writel(rwcfg->precharge_all, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3143                                              RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3144
3145                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0);
3146                 writel(rwcfg->activate_0_and_1_wait1,
3147                        &sdr_rw_load_jump_mgr_regs->load_jump_add0);
3148
3149                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1);
3150                 writel(rwcfg->activate_0_and_1_wait2,
3151                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
3152
3153                 /* Activate rows. */
3154                 writel(rwcfg->activate_0_and_1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3155                                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3156         }
3157 }
3158
3159 /**
3160  * mem_init_latency() - Configure memory RLAT and WLAT settings
3161  *
3162  * Configure memory RLAT and WLAT parameters.
3163  */
3164 static void mem_init_latency(void)
3165 {
3166         /*
3167          * For AV/CV, LFIFO is hardened and always runs at full rate
3168          * so max latency in AFI clocks, used here, is correspondingly
3169          * smaller.
3170          */
3171         const u32 max_latency = (1 << misccfg->max_latency_count_width) - 1;
3172         u32 rlat, wlat;
3173
3174         debug("%s:%d\n", __func__, __LINE__);
3175
3176         /*
3177          * Read in write latency.
3178          * WL for Hard PHY does not include additive latency.
3179          */
3180         wlat = readl(&data_mgr->t_wl_add);
3181         wlat += readl(&data_mgr->mem_t_add);
3182
3183         gbl->rw_wl_nop_cycles = wlat - 1;
3184
3185         /* Read in readl latency. */
3186         rlat = readl(&data_mgr->t_rl_add);
3187
3188         /* Set a pretty high read latency initially. */
3189         gbl->curr_read_lat = rlat + 16;
3190         if (gbl->curr_read_lat > max_latency)
3191                 gbl->curr_read_lat = max_latency;
3192
3193         writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3194
3195         /* Advertise write latency. */
3196         writel(wlat, &phy_mgr_cfg->afi_wlat);
3197 }
3198
3199 /**
3200  * @mem_skip_calibrate() - Set VFIFO and LFIFO to instant-on settings
3201  *
3202  * Set VFIFO and LFIFO to instant-on settings in skip calibration mode.
3203  */
3204 static void mem_skip_calibrate(void)
3205 {
3206         u32 vfifo_offset;
3207         u32 i, j, r;
3208
3209         debug("%s:%d\n", __func__, __LINE__);
3210         /* Need to update every shadow register set used by the interface */
3211         for (r = 0; r < rwcfg->mem_number_of_ranks;
3212              r += NUM_RANKS_PER_SHADOW_REG) {
3213                 /*
3214                  * Set output phase alignment settings appropriate for
3215                  * skip calibration.
3216                  */
3217                 for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) {
3218                         scc_mgr_set_dqs_en_phase(i, 0);
3219                         if (iocfg->dll_chain_length == 6)
3220                                 scc_mgr_set_dqdqs_output_phase(i, 6);
3221                         else
3222                                 scc_mgr_set_dqdqs_output_phase(i, 7);
3223                         /*
3224                          * Case:33398
3225                          *
3226                          * Write data arrives to the I/O two cycles before write
3227                          * latency is reached (720 deg).
3228                          *   -> due to bit-slip in a/c bus
3229                          *   -> to allow board skew where dqs is longer than ck
3230                          *      -> how often can this happen!?
3231                          *      -> can claim back some ptaps for high freq
3232                          *       support if we can relax this, but i digress...
3233                          *
3234                          * The write_clk leads mem_ck by 90 deg
3235                          * The minimum ptap of the OPA is 180 deg
3236                          * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay
3237                          * The write_clk is always delayed by 2 ptaps
3238                          *
3239                          * Hence, to make DQS aligned to CK, we need to delay
3240                          * DQS by:
3241                          *    (720 - 90 - 180 - 2) *
3242                          *      (360 / iocfg->dll_chain_length)
3243                          *
3244                          * Dividing the above by (360 / iocfg->dll_chain_length)
3245                          * gives us the number of ptaps, which simplies to:
3246                          *
3247                          *    (1.25 * iocfg->dll_chain_length - 2)
3248                          */
3249                         scc_mgr_set_dqdqs_output_phase(i,
3250                                        ((125 * iocfg->dll_chain_length) / 100) - 2);
3251                 }
3252                 writel(0xff, &sdr_scc_mgr->dqs_ena);
3253                 writel(0xff, &sdr_scc_mgr->dqs_io_ena);
3254
3255                 for (i = 0; i < rwcfg->mem_if_write_dqs_width; i++) {
3256                         writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3257                                   SCC_MGR_GROUP_COUNTER_OFFSET);
3258                 }
3259                 writel(0xff, &sdr_scc_mgr->dq_ena);
3260                 writel(0xff, &sdr_scc_mgr->dm_ena);
3261                 writel(0, &sdr_scc_mgr->update);
3262         }
3263
3264         /* Compensate for simulation model behaviour */
3265         for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) {
3266                 scc_mgr_set_dqs_bus_in_delay(i, 10);
3267                 scc_mgr_load_dqs(i);
3268         }
3269         writel(0, &sdr_scc_mgr->update);
3270
3271         /*
3272          * ArriaV has hard FIFOs that can only be initialized by incrementing
3273          * in sequencer.
3274          */
3275         vfifo_offset = misccfg->calib_vfifo_offset;
3276         for (j = 0; j < vfifo_offset; j++)
3277                 writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy);
3278         writel(0, &phy_mgr_cmd->fifo_reset);
3279
3280         /*
3281          * For Arria V and Cyclone V with hard LFIFO, we get the skip-cal
3282          * setting from generation-time constant.
3283          */
3284         gbl->curr_read_lat = misccfg->calib_lfifo_offset;
3285         writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3286 }
3287
3288 /**
3289  * mem_calibrate() - Memory calibration entry point.
3290  *
3291  * Perform memory calibration.
3292  */
3293 static u32 mem_calibrate(void)
3294 {
3295         u32 i;
3296         u32 rank_bgn, sr;
3297         u32 write_group, write_test_bgn;
3298         u32 read_group, read_test_bgn;
3299         u32 run_groups, current_run;
3300         u32 failing_groups = 0;
3301         u32 group_failed = 0;
3302
3303         const u32 rwdqs_ratio = rwcfg->mem_if_read_dqs_width /
3304                                 rwcfg->mem_if_write_dqs_width;
3305
3306         debug("%s:%d\n", __func__, __LINE__);
3307
3308         /* Initialize the data settings */
3309         gbl->error_substage = CAL_SUBSTAGE_NIL;
3310         gbl->error_stage = CAL_STAGE_NIL;
3311         gbl->error_group = 0xff;
3312         gbl->fom_in = 0;
3313         gbl->fom_out = 0;
3314
3315         /* Initialize WLAT and RLAT. */
3316         mem_init_latency();
3317
3318         /* Initialize bit slips. */
3319         mem_precharge_and_activate();
3320
3321         for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) {
3322                 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3323                           SCC_MGR_GROUP_COUNTER_OFFSET);
3324                 /* Only needed once to set all groups, pins, DQ, DQS, DM. */
3325                 if (i == 0)
3326                         scc_mgr_set_hhp_extras();
3327
3328                 scc_set_bypass_mode(i);
3329         }
3330
3331         /* Calibration is skipped. */
3332         if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) {
3333                 /*
3334                  * Set VFIFO and LFIFO to instant-on settings in skip
3335                  * calibration mode.
3336                  */
3337                 mem_skip_calibrate();
3338
3339                 /*
3340                  * Do not remove this line as it makes sure all of our
3341                  * decisions have been applied.
3342                  */
3343                 writel(0, &sdr_scc_mgr->update);
3344                 return 1;
3345         }
3346
3347         /* Calibration is not skipped. */
3348         for (i = 0; i < NUM_CALIB_REPEAT; i++) {
3349                 /*
3350                  * Zero all delay chain/phase settings for all
3351                  * groups and all shadow register sets.
3352                  */
3353                 scc_mgr_zero_all();
3354
3355                 run_groups = ~0;
3356
3357                 for (write_group = 0, write_test_bgn = 0; write_group
3358                         < rwcfg->mem_if_write_dqs_width; write_group++,
3359                         write_test_bgn += rwcfg->mem_dq_per_write_dqs) {
3360                         /* Initialize the group failure */
3361                         group_failed = 0;
3362
3363                         current_run = run_groups & ((1 <<
3364                                 RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1);
3365                         run_groups = run_groups >>
3366                                 RW_MGR_NUM_DQS_PER_WRITE_GROUP;
3367
3368                         if (current_run == 0)
3369                                 continue;
3370
3371                         writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS |
3372                                             SCC_MGR_GROUP_COUNTER_OFFSET);
3373                         scc_mgr_zero_group(write_group, 0);
3374
3375                         for (read_group = write_group * rwdqs_ratio,
3376                              read_test_bgn = 0;
3377                              read_group < (write_group + 1) * rwdqs_ratio;
3378                              read_group++,
3379                              read_test_bgn += rwcfg->mem_dq_per_read_dqs) {
3380                                 if (STATIC_CALIB_STEPS & CALIB_SKIP_VFIFO)
3381                                         continue;
3382
3383                                 /* Calibrate the VFIFO */
3384                                 if (rw_mgr_mem_calibrate_vfifo(read_group,
3385                                                                read_test_bgn))
3386                                         continue;
3387
3388                                 if (!(gbl->phy_debug_mode_flags &
3389                                       PHY_DEBUG_SWEEP_ALL_GROUPS))
3390                                         return 0;
3391
3392                                 /* The group failed, we're done. */
3393                                 goto grp_failed;
3394                         }
3395
3396                         /* Calibrate the output side */
3397                         for (rank_bgn = 0, sr = 0;
3398                              rank_bgn < rwcfg->mem_number_of_ranks;
3399                              rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) {
3400                                 if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES)
3401                                         continue;
3402
3403                                 /* Not needed in quick mode! */
3404                                 if (STATIC_CALIB_STEPS &
3405                                     CALIB_SKIP_DELAY_SWEEPS)
3406                                         continue;
3407
3408                                 /* Calibrate WRITEs */
3409                                 if (!rw_mgr_mem_calibrate_writes(rank_bgn,
3410                                                                  write_group,
3411                                                                  write_test_bgn))
3412                                         continue;
3413
3414                                 group_failed = 1;
3415                                 if (!(gbl->phy_debug_mode_flags &
3416                                       PHY_DEBUG_SWEEP_ALL_GROUPS))
3417                                         return 0;
3418                         }
3419
3420                         /* Some group failed, we're done. */
3421                         if (group_failed)
3422                                 goto grp_failed;
3423
3424                         for (read_group = write_group * rwdqs_ratio,
3425                              read_test_bgn = 0;
3426                              read_group < (write_group + 1) * rwdqs_ratio;
3427                              read_group++,
3428                              read_test_bgn += rwcfg->mem_dq_per_read_dqs) {
3429                                 if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES)
3430                                         continue;
3431
3432                                 if (!rw_mgr_mem_calibrate_vfifo_end(read_group,
3433                                                                     read_test_bgn))
3434                                         continue;
3435
3436                                 if (!(gbl->phy_debug_mode_flags &
3437                                       PHY_DEBUG_SWEEP_ALL_GROUPS))
3438                                         return 0;
3439
3440                                 /* The group failed, we're done. */
3441                                 goto grp_failed;
3442                         }
3443
3444                         /* No group failed, continue as usual. */
3445                         continue;
3446
3447 grp_failed:             /* A group failed, increment the counter. */
3448                         failing_groups++;
3449                 }
3450
3451                 /*
3452                  * USER If there are any failing groups then report
3453                  * the failure.
3454                  */
3455                 if (failing_groups != 0)
3456                         return 0;
3457
3458                 if (STATIC_CALIB_STEPS & CALIB_SKIP_LFIFO)
3459                         continue;
3460
3461                 /* Calibrate the LFIFO */
3462                 if (!rw_mgr_mem_calibrate_lfifo())
3463                         return 0;
3464         }
3465
3466         /*
3467          * Do not remove this line as it makes sure all of our decisions
3468          * have been applied.
3469          */
3470         writel(0, &sdr_scc_mgr->update);
3471         return 1;
3472 }
3473
3474 /**
3475  * run_mem_calibrate() - Perform memory calibration
3476  *
3477  * This function triggers the entire memory calibration procedure.
3478  */
3479 static int run_mem_calibrate(void)
3480 {
3481         int pass;
3482
3483         debug("%s:%d\n", __func__, __LINE__);
3484
3485         /* Reset pass/fail status shown on afi_cal_success/fail */
3486         writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status);
3487
3488         /* Stop tracking manager. */
3489         clrbits_le32(&sdr_ctrl->ctrl_cfg, 1 << 22);
3490
3491         phy_mgr_initialize();
3492         rw_mgr_mem_initialize();
3493
3494         /* Perform the actual memory calibration. */
3495         pass = mem_calibrate();
3496
3497         mem_precharge_and_activate();
3498         writel(0, &phy_mgr_cmd->fifo_reset);
3499
3500         /* Handoff. */
3501         rw_mgr_mem_handoff();
3502         /*
3503          * In Hard PHY this is a 2-bit control:
3504          * 0: AFI Mux Select
3505          * 1: DDIO Mux Select
3506          */
3507         writel(0x2, &phy_mgr_cfg->mux_sel);
3508
3509         /* Start tracking manager. */
3510         setbits_le32(&sdr_ctrl->ctrl_cfg, 1 << 22);
3511
3512         return pass;
3513 }
3514
3515 /**
3516  * debug_mem_calibrate() - Report result of memory calibration
3517  * @pass:       Value indicating whether calibration passed or failed
3518  *
3519  * This function reports the results of the memory calibration
3520  * and writes debug information into the register file.
3521  */
3522 static void debug_mem_calibrate(int pass)
3523 {
3524         u32 debug_info;
3525
3526         if (pass) {
3527                 printf("%s: CALIBRATION PASSED\n", __FILE__);
3528
3529                 gbl->fom_in /= 2;
3530                 gbl->fom_out /= 2;
3531
3532                 if (gbl->fom_in > 0xff)
3533                         gbl->fom_in = 0xff;
3534
3535                 if (gbl->fom_out > 0xff)
3536                         gbl->fom_out = 0xff;
3537
3538                 /* Update the FOM in the register file */
3539                 debug_info = gbl->fom_in;
3540                 debug_info |= gbl->fom_out << 8;
3541                 writel(debug_info, &sdr_reg_file->fom);
3542
3543                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3544                 writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status);
3545         } else {
3546                 printf("%s: CALIBRATION FAILED\n", __FILE__);
3547
3548                 debug_info = gbl->error_stage;
3549                 debug_info |= gbl->error_substage << 8;
3550                 debug_info |= gbl->error_group << 16;
3551
3552                 writel(debug_info, &sdr_reg_file->failing_stage);
3553                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3554                 writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status);
3555
3556                 /* Update the failing group/stage in the register file */
3557                 debug_info = gbl->error_stage;
3558                 debug_info |= gbl->error_substage << 8;
3559                 debug_info |= gbl->error_group << 16;
3560                 writel(debug_info, &sdr_reg_file->failing_stage);
3561         }
3562
3563         printf("%s: Calibration complete\n", __FILE__);
3564 }
3565
3566 /**
3567  * hc_initialize_rom_data() - Initialize ROM data
3568  *
3569  * Initialize ROM data.
3570  */
3571 static void hc_initialize_rom_data(void)
3572 {
3573         unsigned int nelem = 0;
3574         const u32 *rom_init;
3575         u32 i, addr;
3576
3577         socfpga_get_seq_inst_init(&rom_init, &nelem);
3578         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET;
3579         for (i = 0; i < nelem; i++)
3580                 writel(rom_init[i], addr + (i << 2));
3581
3582         socfpga_get_seq_ac_init(&rom_init, &nelem);
3583         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET;
3584         for (i = 0; i < nelem; i++)
3585                 writel(rom_init[i], addr + (i << 2));
3586 }
3587
3588 /**
3589  * initialize_reg_file() - Initialize SDR register file
3590  *
3591  * Initialize SDR register file.
3592  */
3593 static void initialize_reg_file(void)
3594 {
3595         /* Initialize the register file with the correct data */
3596         writel(misccfg->reg_file_init_seq_signature, &sdr_reg_file->signature);
3597         writel(0, &sdr_reg_file->debug_data_addr);
3598         writel(0, &sdr_reg_file->cur_stage);
3599         writel(0, &sdr_reg_file->fom);
3600         writel(0, &sdr_reg_file->failing_stage);
3601         writel(0, &sdr_reg_file->debug1);
3602         writel(0, &sdr_reg_file->debug2);
3603 }
3604
3605 /**
3606  * initialize_hps_phy() - Initialize HPS PHY
3607  *
3608  * Initialize HPS PHY.
3609  */
3610 static void initialize_hps_phy(void)
3611 {
3612         u32 reg;
3613         /*
3614          * Tracking also gets configured here because it's in the
3615          * same register.
3616          */
3617         u32 trk_sample_count = 7500;
3618         u32 trk_long_idle_sample_count = (10 << 16) | 100;
3619         /*
3620          * Format is number of outer loops in the 16 MSB, sample
3621          * count in 16 LSB.
3622          */
3623
3624         reg = 0;
3625         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2);
3626         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1);
3627         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1);
3628         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1);
3629         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0);
3630         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1);
3631         /*
3632          * This field selects the intrinsic latency to RDATA_EN/FULL path.
3633          * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles.
3634          */
3635         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0);
3636         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET(
3637                 trk_sample_count);
3638         writel(reg, &sdr_ctrl->phy_ctrl0);
3639
3640         reg = 0;
3641         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET(
3642                 trk_sample_count >>
3643                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH);
3644         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET(
3645                 trk_long_idle_sample_count);
3646         writel(reg, &sdr_ctrl->phy_ctrl1);
3647
3648         reg = 0;
3649         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET(
3650                 trk_long_idle_sample_count >>
3651                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH);
3652         writel(reg, &sdr_ctrl->phy_ctrl2);
3653 }
3654
3655 /**
3656  * initialize_tracking() - Initialize tracking
3657  *
3658  * Initialize the register file with usable initial data.
3659  */
3660 static void initialize_tracking(void)
3661 {
3662         /*
3663          * Initialize the register file with the correct data.
3664          * Compute usable version of value in case we skip full
3665          * computation later.
3666          */
3667         writel(DIV_ROUND_UP(iocfg->delay_per_opa_tap,
3668                             iocfg->delay_per_dchain_tap) - 1,
3669                &sdr_reg_file->dtaps_per_ptap);
3670
3671         /* trk_sample_count */
3672         writel(7500, &sdr_reg_file->trk_sample_count);
3673
3674         /* longidle outer loop [15:0] */
3675         writel((10 << 16) | (100 << 0), &sdr_reg_file->trk_longidle);
3676
3677         /*
3678          * longidle sample count [31:24]
3679          * trfc, worst case of 933Mhz 4Gb [23:16]
3680          * trcd, worst case [15:8]
3681          * vfifo wait [7:0]
3682          */
3683         writel((243 << 24) | (14 << 16) | (10 << 8) | (4 << 0),
3684                &sdr_reg_file->delays);
3685
3686         /* mux delay */
3687         writel((rwcfg->idle << 24) | (rwcfg->activate_1 << 16) |
3688                (rwcfg->sgle_read << 8) | (rwcfg->precharge_all << 0),
3689                &sdr_reg_file->trk_rw_mgr_addr);
3690
3691         writel(rwcfg->mem_if_read_dqs_width,
3692                &sdr_reg_file->trk_read_dqs_width);
3693
3694         /* trefi [7:0] */
3695         writel((rwcfg->refresh_all << 24) | (1000 << 0),
3696                &sdr_reg_file->trk_rfsh);
3697 }
3698
3699 int sdram_calibration_full(void)
3700 {
3701         struct param_type my_param;
3702         struct gbl_type my_gbl;
3703         u32 pass;
3704
3705         memset(&my_param, 0, sizeof(my_param));
3706         memset(&my_gbl, 0, sizeof(my_gbl));
3707
3708         param = &my_param;
3709         gbl = &my_gbl;
3710
3711         rwcfg = socfpga_get_sdram_rwmgr_config();
3712         iocfg = socfpga_get_sdram_io_config();
3713         misccfg = socfpga_get_sdram_misc_config();
3714
3715         /* Set the calibration enabled by default */
3716         gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT;
3717         /*
3718          * Only sweep all groups (regardless of fail state) by default
3719          * Set enabled read test by default.
3720          */
3721 #if DISABLE_GUARANTEED_READ
3722         gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ;
3723 #endif
3724         /* Initialize the register file */
3725         initialize_reg_file();
3726
3727         /* Initialize any PHY CSR */
3728         initialize_hps_phy();
3729
3730         scc_mgr_initialize();
3731
3732         initialize_tracking();
3733
3734         printf("%s: Preparing to start memory calibration\n", __FILE__);
3735
3736         debug("%s:%d\n", __func__, __LINE__);
3737         debug_cond(DLEVEL == 1,
3738                    "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ",
3739                    rwcfg->mem_number_of_ranks, rwcfg->mem_number_of_cs_per_dimm,
3740                    rwcfg->mem_dq_per_read_dqs, rwcfg->mem_dq_per_write_dqs,
3741                    rwcfg->mem_virtual_groups_per_read_dqs,
3742                    rwcfg->mem_virtual_groups_per_write_dqs);
3743         debug_cond(DLEVEL == 1,
3744                    "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ",
3745                    rwcfg->mem_if_read_dqs_width, rwcfg->mem_if_write_dqs_width,
3746                    rwcfg->mem_data_width, rwcfg->mem_data_mask_width,
3747                    iocfg->delay_per_opa_tap, iocfg->delay_per_dchain_tap);
3748         debug_cond(DLEVEL == 1, "dtap_dqsen_delay=%u, dll=%u",
3749                    iocfg->delay_per_dqs_en_dchain_tap, iocfg->dll_chain_length);
3750         debug_cond(DLEVEL == 1,
3751                    "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ",
3752                    iocfg->dqs_en_phase_max, iocfg->dqdqs_out_phase_max,
3753                    iocfg->dqs_en_delay_max, iocfg->dqs_in_delay_max);
3754         debug_cond(DLEVEL == 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ",
3755                    iocfg->io_in_delay_max, iocfg->io_out1_delay_max,
3756                    iocfg->io_out2_delay_max);
3757         debug_cond(DLEVEL == 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n",
3758                    iocfg->dqs_in_reserve, iocfg->dqs_out_reserve);
3759
3760         hc_initialize_rom_data();
3761
3762         /* update info for sims */
3763         reg_file_set_stage(CAL_STAGE_NIL);
3764         reg_file_set_group(0);
3765
3766         /*
3767          * Load global needed for those actions that require
3768          * some dynamic calibration support.
3769          */
3770         dyn_calib_steps = STATIC_CALIB_STEPS;
3771         /*
3772          * Load global to allow dynamic selection of delay loop settings
3773          * based on calibration mode.
3774          */
3775         if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS))
3776                 skip_delay_mask = 0xff;
3777         else
3778                 skip_delay_mask = 0x0;
3779
3780         pass = run_mem_calibrate();
3781         debug_mem_calibrate(pass);
3782         return pass;
3783 }