]> git.kernelconcepts.de Git - karo-tx-uboot.git/blob - drivers/ddr/altera/sequencer.c
ddr: altera: sequencer: Move qts-generated files to board dir
[karo-tx-uboot.git] / drivers / ddr / altera / sequencer.c
1 /*
2  * Copyright Altera Corporation (C) 2012-2015
3  *
4  * SPDX-License-Identifier:    BSD-3-Clause
5  */
6
7 #include <common.h>
8 #include <asm/io.h>
9 #include <asm/arch/sdram.h>
10 #include <errno.h>
11 #include "sequencer.h"
12
13 /*
14  * FIXME: This path is temporary until the SDRAM driver gets
15  *        a proper thorough cleanup.
16  */
17 #include "../../../board/altera/socfpga/qts/sequencer_auto.h"
18 #include "../../../board/altera/socfpga/qts/sequencer_auto_ac_init.h"
19 #include "../../../board/altera/socfpga/qts/sequencer_auto_inst_init.h"
20 #include "../../../board/altera/socfpga/qts/sequencer_defines.h"
21
22 static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs =
23         (struct socfpga_sdr_rw_load_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800);
24
25 static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs =
26         (struct socfpga_sdr_rw_load_jump_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00);
27
28 static struct socfpga_sdr_reg_file *sdr_reg_file =
29         (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS;
30
31 static struct socfpga_sdr_scc_mgr *sdr_scc_mgr =
32         (struct socfpga_sdr_scc_mgr *)(SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00);
33
34 static struct socfpga_phy_mgr_cmd *phy_mgr_cmd =
35         (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS;
36
37 static struct socfpga_phy_mgr_cfg *phy_mgr_cfg =
38         (struct socfpga_phy_mgr_cfg *)(SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40);
39
40 static struct socfpga_data_mgr *data_mgr =
41         (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS;
42
43 static struct socfpga_sdr_ctrl *sdr_ctrl =
44         (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS;
45
46 #define DELTA_D         1
47
48 /*
49  * In order to reduce ROM size, most of the selectable calibration steps are
50  * decided at compile time based on the user's calibration mode selection,
51  * as captured by the STATIC_CALIB_STEPS selection below.
52  *
53  * However, to support simulation-time selection of fast simulation mode, where
54  * we skip everything except the bare minimum, we need a few of the steps to
55  * be dynamic.  In those cases, we either use the DYNAMIC_CALIB_STEPS for the
56  * check, which is based on the rtl-supplied value, or we dynamically compute
57  * the value to use based on the dynamically-chosen calibration mode
58  */
59
60 #define DLEVEL 0
61 #define STATIC_IN_RTL_SIM 0
62 #define STATIC_SKIP_DELAY_LOOPS 0
63
64 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \
65         STATIC_SKIP_DELAY_LOOPS)
66
67 /* calibration steps requested by the rtl */
68 uint16_t dyn_calib_steps;
69
70 /*
71  * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option
72  * instead of static, we use boolean logic to select between
73  * non-skip and skip values
74  *
75  * The mask is set to include all bits when not-skipping, but is
76  * zero when skipping
77  */
78
79 uint16_t skip_delay_mask;       /* mask off bits when skipping/not-skipping */
80
81 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \
82         ((non_skip_value) & skip_delay_mask)
83
84 struct gbl_type *gbl;
85 struct param_type *param;
86 uint32_t curr_shadow_reg;
87
88 static void set_failing_group_stage(uint32_t group, uint32_t stage,
89         uint32_t substage)
90 {
91         /*
92          * Only set the global stage if there was not been any other
93          * failing group
94          */
95         if (gbl->error_stage == CAL_STAGE_NIL)  {
96                 gbl->error_substage = substage;
97                 gbl->error_stage = stage;
98                 gbl->error_group = group;
99         }
100 }
101
102 static void reg_file_set_group(u16 set_group)
103 {
104         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16);
105 }
106
107 static void reg_file_set_stage(u8 set_stage)
108 {
109         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff);
110 }
111
112 static void reg_file_set_sub_stage(u8 set_sub_stage)
113 {
114         set_sub_stage &= 0xff;
115         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8);
116 }
117
118 /**
119  * phy_mgr_initialize() - Initialize PHY Manager
120  *
121  * Initialize PHY Manager.
122  */
123 static void phy_mgr_initialize(void)
124 {
125         u32 ratio;
126
127         debug("%s:%d\n", __func__, __LINE__);
128         /* Calibration has control over path to memory */
129         /*
130          * In Hard PHY this is a 2-bit control:
131          * 0: AFI Mux Select
132          * 1: DDIO Mux Select
133          */
134         writel(0x3, &phy_mgr_cfg->mux_sel);
135
136         /* USER memory clock is not stable we begin initialization  */
137         writel(0, &phy_mgr_cfg->reset_mem_stbl);
138
139         /* USER calibration status all set to zero */
140         writel(0, &phy_mgr_cfg->cal_status);
141
142         writel(0, &phy_mgr_cfg->cal_debug_info);
143
144         /* Init params only if we do NOT skip calibration. */
145         if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL)
146                 return;
147
148         ratio = RW_MGR_MEM_DQ_PER_READ_DQS /
149                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS;
150         param->read_correct_mask_vg = (1 << ratio) - 1;
151         param->write_correct_mask_vg = (1 << ratio) - 1;
152         param->read_correct_mask = (1 << RW_MGR_MEM_DQ_PER_READ_DQS) - 1;
153         param->write_correct_mask = (1 << RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1;
154         ratio = RW_MGR_MEM_DATA_WIDTH /
155                 RW_MGR_MEM_DATA_MASK_WIDTH;
156         param->dm_correct_mask = (1 << ratio) - 1;
157 }
158
159 /**
160  * set_rank_and_odt_mask() - Set Rank and ODT mask
161  * @rank:       Rank mask
162  * @odt_mode:   ODT mode, OFF or READ_WRITE
163  *
164  * Set Rank and ODT mask (On-Die Termination).
165  */
166 static void set_rank_and_odt_mask(const u32 rank, const u32 odt_mode)
167 {
168         u32 odt_mask_0 = 0;
169         u32 odt_mask_1 = 0;
170         u32 cs_and_odt_mask;
171
172         if (odt_mode == RW_MGR_ODT_MODE_OFF) {
173                 odt_mask_0 = 0x0;
174                 odt_mask_1 = 0x0;
175         } else {        /* RW_MGR_ODT_MODE_READ_WRITE */
176                 switch (RW_MGR_MEM_NUMBER_OF_RANKS) {
177                 case 1: /* 1 Rank */
178                         /* Read: ODT = 0 ; Write: ODT = 1 */
179                         odt_mask_0 = 0x0;
180                         odt_mask_1 = 0x1;
181                         break;
182                 case 2: /* 2 Ranks */
183                         if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1) {
184                                 /*
185                                  * - Dual-Slot , Single-Rank (1 CS per DIMM)
186                                  *   OR
187                                  * - RDIMM, 4 total CS (2 CS per DIMM, 2 DIMM)
188                                  *
189                                  * Since MEM_NUMBER_OF_RANKS is 2, they
190                                  * are both single rank with 2 CS each
191                                  * (special for RDIMM).
192                                  *
193                                  * Read: Turn on ODT on the opposite rank
194                                  * Write: Turn on ODT on all ranks
195                                  */
196                                 odt_mask_0 = 0x3 & ~(1 << rank);
197                                 odt_mask_1 = 0x3;
198                         } else {
199                                 /*
200                                  * - Single-Slot , Dual-Rank (2 CS per DIMM)
201                                  *
202                                  * Read: Turn on ODT off on all ranks
203                                  * Write: Turn on ODT on active rank
204                                  */
205                                 odt_mask_0 = 0x0;
206                                 odt_mask_1 = 0x3 & (1 << rank);
207                         }
208                         break;
209                 case 4: /* 4 Ranks */
210                         /* Read:
211                          * ----------+-----------------------+
212                          *           |         ODT           |
213                          * Read From +-----------------------+
214                          *   Rank    |  3  |  2  |  1  |  0  |
215                          * ----------+-----+-----+-----+-----+
216                          *     0     |  0  |  1  |  0  |  0  |
217                          *     1     |  1  |  0  |  0  |  0  |
218                          *     2     |  0  |  0  |  0  |  1  |
219                          *     3     |  0  |  0  |  1  |  0  |
220                          * ----------+-----+-----+-----+-----+
221                          *
222                          * Write:
223                          * ----------+-----------------------+
224                          *           |         ODT           |
225                          * Write To  +-----------------------+
226                          *   Rank    |  3  |  2  |  1  |  0  |
227                          * ----------+-----+-----+-----+-----+
228                          *     0     |  0  |  1  |  0  |  1  |
229                          *     1     |  1  |  0  |  1  |  0  |
230                          *     2     |  0  |  1  |  0  |  1  |
231                          *     3     |  1  |  0  |  1  |  0  |
232                          * ----------+-----+-----+-----+-----+
233                          */
234                         switch (rank) {
235                         case 0:
236                                 odt_mask_0 = 0x4;
237                                 odt_mask_1 = 0x5;
238                                 break;
239                         case 1:
240                                 odt_mask_0 = 0x8;
241                                 odt_mask_1 = 0xA;
242                                 break;
243                         case 2:
244                                 odt_mask_0 = 0x1;
245                                 odt_mask_1 = 0x5;
246                                 break;
247                         case 3:
248                                 odt_mask_0 = 0x2;
249                                 odt_mask_1 = 0xA;
250                                 break;
251                         }
252                         break;
253                 }
254         }
255
256         cs_and_odt_mask = (0xFF & ~(1 << rank)) |
257                           ((0xFF & odt_mask_0) << 8) |
258                           ((0xFF & odt_mask_1) << 16);
259         writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS |
260                                 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
261 }
262
263 /**
264  * scc_mgr_set() - Set SCC Manager register
265  * @off:        Base offset in SCC Manager space
266  * @grp:        Read/Write group
267  * @val:        Value to be set
268  *
269  * This function sets the SCC Manager (Scan Chain Control Manager) register.
270  */
271 static void scc_mgr_set(u32 off, u32 grp, u32 val)
272 {
273         writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2));
274 }
275
276 /**
277  * scc_mgr_initialize() - Initialize SCC Manager registers
278  *
279  * Initialize SCC Manager registers.
280  */
281 static void scc_mgr_initialize(void)
282 {
283         /*
284          * Clear register file for HPS. 16 (2^4) is the size of the
285          * full register file in the scc mgr:
286          *      RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS +
287          *                             MEM_IF_READ_DQS_WIDTH - 1);
288          */
289         int i;
290
291         for (i = 0; i < 16; i++) {
292                 debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u\n",
293                            __func__, __LINE__, i);
294                 scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, 0, i);
295         }
296 }
297
298 static void scc_mgr_set_dqdqs_output_phase(uint32_t write_group, uint32_t phase)
299 {
300         scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase);
301 }
302
303 static void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, uint32_t delay)
304 {
305         scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay);
306 }
307
308 static void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase)
309 {
310         scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase);
311 }
312
313 static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay)
314 {
315         scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay);
316 }
317
318 static void scc_mgr_set_dqs_io_in_delay(uint32_t delay)
319 {
320         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
321                     delay);
322 }
323
324 static void scc_mgr_set_dq_in_delay(uint32_t dq_in_group, uint32_t delay)
325 {
326         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay);
327 }
328
329 static void scc_mgr_set_dq_out1_delay(uint32_t dq_in_group, uint32_t delay)
330 {
331         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay);
332 }
333
334 static void scc_mgr_set_dqs_out1_delay(uint32_t delay)
335 {
336         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
337                     delay);
338 }
339
340 static void scc_mgr_set_dm_out1_delay(uint32_t dm, uint32_t delay)
341 {
342         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET,
343                     RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + dm,
344                     delay);
345 }
346
347 /* load up dqs config settings */
348 static void scc_mgr_load_dqs(uint32_t dqs)
349 {
350         writel(dqs, &sdr_scc_mgr->dqs_ena);
351 }
352
353 /* load up dqs io config settings */
354 static void scc_mgr_load_dqs_io(void)
355 {
356         writel(0, &sdr_scc_mgr->dqs_io_ena);
357 }
358
359 /* load up dq config settings */
360 static void scc_mgr_load_dq(uint32_t dq_in_group)
361 {
362         writel(dq_in_group, &sdr_scc_mgr->dq_ena);
363 }
364
365 /* load up dm config settings */
366 static void scc_mgr_load_dm(uint32_t dm)
367 {
368         writel(dm, &sdr_scc_mgr->dm_ena);
369 }
370
371 /**
372  * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks
373  * @off:        Base offset in SCC Manager space
374  * @grp:        Read/Write group
375  * @val:        Value to be set
376  * @update:     If non-zero, trigger SCC Manager update for all ranks
377  *
378  * This function sets the SCC Manager (Scan Chain Control Manager) register
379  * and optionally triggers the SCC update for all ranks.
380  */
381 static void scc_mgr_set_all_ranks(const u32 off, const u32 grp, const u32 val,
382                                   const int update)
383 {
384         u32 r;
385
386         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
387              r += NUM_RANKS_PER_SHADOW_REG) {
388                 scc_mgr_set(off, grp, val);
389
390                 if (update || (r == 0)) {
391                         writel(grp, &sdr_scc_mgr->dqs_ena);
392                         writel(0, &sdr_scc_mgr->update);
393                 }
394         }
395 }
396
397 static void scc_mgr_set_dqs_en_phase_all_ranks(u32 read_group, u32 phase)
398 {
399         /*
400          * USER although the h/w doesn't support different phases per
401          * shadow register, for simplicity our scc manager modeling
402          * keeps different phase settings per shadow reg, and it's
403          * important for us to keep them in sync to match h/w.
404          * for efficiency, the scan chain update should occur only
405          * once to sr0.
406          */
407         scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_PHASE_OFFSET,
408                               read_group, phase, 0);
409 }
410
411 static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group,
412                                                      uint32_t phase)
413 {
414         /*
415          * USER although the h/w doesn't support different phases per
416          * shadow register, for simplicity our scc manager modeling
417          * keeps different phase settings per shadow reg, and it's
418          * important for us to keep them in sync to match h/w.
419          * for efficiency, the scan chain update should occur only
420          * once to sr0.
421          */
422         scc_mgr_set_all_ranks(SCC_MGR_DQDQS_OUT_PHASE_OFFSET,
423                               write_group, phase, 0);
424 }
425
426 static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group,
427                                                uint32_t delay)
428 {
429         /*
430          * In shadow register mode, the T11 settings are stored in
431          * registers in the core, which are updated by the DQS_ENA
432          * signals. Not issuing the SCC_MGR_UPD command allows us to
433          * save lots of rank switching overhead, by calling
434          * select_shadow_regs_for_update with update_scan_chains
435          * set to 0.
436          */
437         scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_DELAY_OFFSET,
438                               read_group, delay, 1);
439         writel(0, &sdr_scc_mgr->update);
440 }
441
442 /**
443  * scc_mgr_set_oct_out1_delay() - Set OCT output delay
444  * @write_group:        Write group
445  * @delay:              Delay value
446  *
447  * This function sets the OCT output delay in SCC manager.
448  */
449 static void scc_mgr_set_oct_out1_delay(const u32 write_group, const u32 delay)
450 {
451         const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
452                           RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
453         const int base = write_group * ratio;
454         int i;
455         /*
456          * Load the setting in the SCC manager
457          * Although OCT affects only write data, the OCT delay is controlled
458          * by the DQS logic block which is instantiated once per read group.
459          * For protocols where a write group consists of multiple read groups,
460          * the setting must be set multiple times.
461          */
462         for (i = 0; i < ratio; i++)
463                 scc_mgr_set(SCC_MGR_OCT_OUT1_DELAY_OFFSET, base + i, delay);
464 }
465
466 /**
467  * scc_mgr_set_hhp_extras() - Set HHP extras.
468  *
469  * Load the fixed setting in the SCC manager HHP extras.
470  */
471 static void scc_mgr_set_hhp_extras(void)
472 {
473         /*
474          * Load the fixed setting in the SCC manager
475          * bits: 0:0 = 1'b1     - DQS bypass
476          * bits: 1:1 = 1'b1     - DQ bypass
477          * bits: 4:2 = 3'b001   - rfifo_mode
478          * bits: 6:5 = 2'b01    - rfifo clock_select
479          * bits: 7:7 = 1'b0     - separate gating from ungating setting
480          * bits: 8:8 = 1'b0     - separate OE from Output delay setting
481          */
482         const u32 value = (0 << 8) | (0 << 7) | (1 << 5) |
483                           (1 << 2) | (1 << 1) | (1 << 0);
484         const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS |
485                          SCC_MGR_HHP_GLOBALS_OFFSET |
486                          SCC_MGR_HHP_EXTRAS_OFFSET;
487
488         debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n",
489                    __func__, __LINE__);
490         writel(value, addr);
491         debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n",
492                    __func__, __LINE__);
493 }
494
495 /**
496  * scc_mgr_zero_all() - Zero all DQS config
497  *
498  * Zero all DQS config.
499  */
500 static void scc_mgr_zero_all(void)
501 {
502         int i, r;
503
504         /*
505          * USER Zero all DQS config settings, across all groups and all
506          * shadow registers
507          */
508         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
509              r += NUM_RANKS_PER_SHADOW_REG) {
510                 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
511                         /*
512                          * The phases actually don't exist on a per-rank basis,
513                          * but there's no harm updating them several times, so
514                          * let's keep the code simple.
515                          */
516                         scc_mgr_set_dqs_bus_in_delay(i, IO_DQS_IN_RESERVE);
517                         scc_mgr_set_dqs_en_phase(i, 0);
518                         scc_mgr_set_dqs_en_delay(i, 0);
519                 }
520
521                 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
522                         scc_mgr_set_dqdqs_output_phase(i, 0);
523                         /* Arria V/Cyclone V don't have out2. */
524                         scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE);
525                 }
526         }
527
528         /* Multicast to all DQS group enables. */
529         writel(0xff, &sdr_scc_mgr->dqs_ena);
530         writel(0, &sdr_scc_mgr->update);
531 }
532
533 /**
534  * scc_set_bypass_mode() - Set bypass mode and trigger SCC update
535  * @write_group:        Write group
536  *
537  * Set bypass mode and trigger SCC update.
538  */
539 static void scc_set_bypass_mode(const u32 write_group)
540 {
541         /* Multicast to all DQ enables. */
542         writel(0xff, &sdr_scc_mgr->dq_ena);
543         writel(0xff, &sdr_scc_mgr->dm_ena);
544
545         /* Update current DQS IO enable. */
546         writel(0, &sdr_scc_mgr->dqs_io_ena);
547
548         /* Update the DQS logic. */
549         writel(write_group, &sdr_scc_mgr->dqs_ena);
550
551         /* Hit update. */
552         writel(0, &sdr_scc_mgr->update);
553 }
554
555 /**
556  * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group
557  * @write_group:        Write group
558  *
559  * Load DQS settings for Write Group, do not trigger SCC update.
560  */
561 static void scc_mgr_load_dqs_for_write_group(const u32 write_group)
562 {
563         const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
564                           RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
565         const int base = write_group * ratio;
566         int i;
567         /*
568          * Load the setting in the SCC manager
569          * Although OCT affects only write data, the OCT delay is controlled
570          * by the DQS logic block which is instantiated once per read group.
571          * For protocols where a write group consists of multiple read groups,
572          * the setting must be set multiple times.
573          */
574         for (i = 0; i < ratio; i++)
575                 writel(base + i, &sdr_scc_mgr->dqs_ena);
576 }
577
578 /**
579  * scc_mgr_zero_group() - Zero all configs for a group
580  *
581  * Zero DQ, DM, DQS and OCT configs for a group.
582  */
583 static void scc_mgr_zero_group(const u32 write_group, const int out_only)
584 {
585         int i, r;
586
587         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
588              r += NUM_RANKS_PER_SHADOW_REG) {
589                 /* Zero all DQ config settings. */
590                 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
591                         scc_mgr_set_dq_out1_delay(i, 0);
592                         if (!out_only)
593                                 scc_mgr_set_dq_in_delay(i, 0);
594                 }
595
596                 /* Multicast to all DQ enables. */
597                 writel(0xff, &sdr_scc_mgr->dq_ena);
598
599                 /* Zero all DM config settings. */
600                 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
601                         scc_mgr_set_dm_out1_delay(i, 0);
602
603                 /* Multicast to all DM enables. */
604                 writel(0xff, &sdr_scc_mgr->dm_ena);
605
606                 /* Zero all DQS IO settings. */
607                 if (!out_only)
608                         scc_mgr_set_dqs_io_in_delay(0);
609
610                 /* Arria V/Cyclone V don't have out2. */
611                 scc_mgr_set_dqs_out1_delay(IO_DQS_OUT_RESERVE);
612                 scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE);
613                 scc_mgr_load_dqs_for_write_group(write_group);
614
615                 /* Multicast to all DQS IO enables (only 1 in total). */
616                 writel(0, &sdr_scc_mgr->dqs_io_ena);
617
618                 /* Hit update to zero everything. */
619                 writel(0, &sdr_scc_mgr->update);
620         }
621 }
622
623 /*
624  * apply and load a particular input delay for the DQ pins in a group
625  * group_bgn is the index of the first dq pin (in the write group)
626  */
627 static void scc_mgr_apply_group_dq_in_delay(uint32_t group_bgn, uint32_t delay)
628 {
629         uint32_t i, p;
630
631         for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) {
632                 scc_mgr_set_dq_in_delay(p, delay);
633                 scc_mgr_load_dq(p);
634         }
635 }
636
637 /**
638  * scc_mgr_apply_group_dq_out1_delay() - Apply and load an output delay for the DQ pins in a group
639  * @delay:              Delay value
640  *
641  * Apply and load a particular output delay for the DQ pins in a group.
642  */
643 static void scc_mgr_apply_group_dq_out1_delay(const u32 delay)
644 {
645         int i;
646
647         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
648                 scc_mgr_set_dq_out1_delay(i, delay);
649                 scc_mgr_load_dq(i);
650         }
651 }
652
653 /* apply and load a particular output delay for the DM pins in a group */
654 static void scc_mgr_apply_group_dm_out1_delay(uint32_t delay1)
655 {
656         uint32_t i;
657
658         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
659                 scc_mgr_set_dm_out1_delay(i, delay1);
660                 scc_mgr_load_dm(i);
661         }
662 }
663
664
665 /* apply and load delay on both DQS and OCT out1 */
666 static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group,
667                                                     uint32_t delay)
668 {
669         scc_mgr_set_dqs_out1_delay(delay);
670         scc_mgr_load_dqs_io();
671
672         scc_mgr_set_oct_out1_delay(write_group, delay);
673         scc_mgr_load_dqs_for_write_group(write_group);
674 }
675
676 /**
677  * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side: DQ, DM, DQS, OCT
678  * @write_group:        Write group
679  * @delay:              Delay value
680  *
681  * Apply a delay to the entire output side: DQ, DM, DQS, OCT.
682  */
683 static void scc_mgr_apply_group_all_out_delay_add(const u32 write_group,
684                                                   const u32 delay)
685 {
686         u32 i, new_delay;
687
688         /* DQ shift */
689         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++)
690                 scc_mgr_load_dq(i);
691
692         /* DM shift */
693         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
694                 scc_mgr_load_dm(i);
695
696         /* DQS shift */
697         new_delay = READ_SCC_DQS_IO_OUT2_DELAY + delay;
698         if (new_delay > IO_IO_OUT2_DELAY_MAX) {
699                 debug_cond(DLEVEL == 1,
700                            "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
701                            __func__, __LINE__, write_group, delay, new_delay,
702                            IO_IO_OUT2_DELAY_MAX,
703                            new_delay - IO_IO_OUT2_DELAY_MAX);
704                 new_delay -= IO_IO_OUT2_DELAY_MAX;
705                 scc_mgr_set_dqs_out1_delay(new_delay);
706         }
707
708         scc_mgr_load_dqs_io();
709
710         /* OCT shift */
711         new_delay = READ_SCC_OCT_OUT2_DELAY + delay;
712         if (new_delay > IO_IO_OUT2_DELAY_MAX) {
713                 debug_cond(DLEVEL == 1,
714                            "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
715                            __func__, __LINE__, write_group, delay,
716                            new_delay, IO_IO_OUT2_DELAY_MAX,
717                            new_delay - IO_IO_OUT2_DELAY_MAX);
718                 new_delay -= IO_IO_OUT2_DELAY_MAX;
719                 scc_mgr_set_oct_out1_delay(write_group, new_delay);
720         }
721
722         scc_mgr_load_dqs_for_write_group(write_group);
723 }
724
725 /**
726  * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side to all ranks
727  * @write_group:        Write group
728  * @delay:              Delay value
729  *
730  * Apply a delay to the entire output side (DQ, DM, DQS, OCT) to all ranks.
731  */
732 static void
733 scc_mgr_apply_group_all_out_delay_add_all_ranks(const u32 write_group,
734                                                 const u32 delay)
735 {
736         int r;
737
738         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
739              r += NUM_RANKS_PER_SHADOW_REG) {
740                 scc_mgr_apply_group_all_out_delay_add(write_group, delay);
741                 writel(0, &sdr_scc_mgr->update);
742         }
743 }
744
745 /**
746  * set_jump_as_return() - Return instruction optimization
747  *
748  * Optimization used to recover some slots in ddr3 inst_rom could be
749  * applied to other protocols if we wanted to
750  */
751 static void set_jump_as_return(void)
752 {
753         /*
754          * To save space, we replace return with jump to special shared
755          * RETURN instruction so we set the counter to large value so that
756          * we always jump.
757          */
758         writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0);
759         writel(RW_MGR_RETURN, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
760 }
761
762 /**
763  * delay_for_n_mem_clocks() - Delay for N memory clocks
764  * @clocks:     Length of the delay
765  *
766  * Delay for N memory clocks.
767  */
768 static void delay_for_n_mem_clocks(const u32 clocks)
769 {
770         u32 afi_clocks;
771         u16 c_loop;
772         u8 inner;
773         u8 outer;
774
775         debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks);
776
777         /* Scale (rounding up) to get afi clocks. */
778         afi_clocks = DIV_ROUND_UP(clocks, AFI_RATE_RATIO);
779         if (afi_clocks) /* Temporary underflow protection */
780                 afi_clocks--;
781
782         /*
783          * Note, we don't bother accounting for being off a little
784          * bit because of a few extra instructions in outer loops.
785          * Note, the loops have a test at the end, and do the test
786          * before the decrement, and so always perform the loop
787          * 1 time more than the counter value
788          */
789         c_loop = afi_clocks >> 16;
790         outer = c_loop ? 0xff : (afi_clocks >> 8);
791         inner = outer ? 0xff : afi_clocks;
792
793         /*
794          * rom instructions are structured as follows:
795          *
796          *    IDLE_LOOP2: jnz cntr0, TARGET_A
797          *    IDLE_LOOP1: jnz cntr1, TARGET_B
798          *                return
799          *
800          * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and
801          * TARGET_B is set to IDLE_LOOP2 as well
802          *
803          * if we have no outer loop, though, then we can use IDLE_LOOP1 only,
804          * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely
805          *
806          * a little confusing, but it helps save precious space in the inst_rom
807          * and sequencer rom and keeps the delays more accurate and reduces
808          * overhead
809          */
810         if (afi_clocks < 0x100) {
811                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
812                         &sdr_rw_load_mgr_regs->load_cntr1);
813
814                 writel(RW_MGR_IDLE_LOOP1,
815                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
816
817                 writel(RW_MGR_IDLE_LOOP1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
818                                           RW_MGR_RUN_SINGLE_GROUP_OFFSET);
819         } else {
820                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
821                         &sdr_rw_load_mgr_regs->load_cntr0);
822
823                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer),
824                         &sdr_rw_load_mgr_regs->load_cntr1);
825
826                 writel(RW_MGR_IDLE_LOOP2,
827                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
828
829                 writel(RW_MGR_IDLE_LOOP2,
830                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
831
832                 do {
833                         writel(RW_MGR_IDLE_LOOP2,
834                                 SDR_PHYGRP_RWMGRGRP_ADDRESS |
835                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
836                 } while (c_loop-- != 0);
837         }
838         debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks);
839 }
840
841 /**
842  * rw_mgr_mem_init_load_regs() - Load instruction registers
843  * @cntr0:      Counter 0 value
844  * @cntr1:      Counter 1 value
845  * @cntr2:      Counter 2 value
846  * @jump:       Jump instruction value
847  *
848  * Load instruction registers.
849  */
850 static void rw_mgr_mem_init_load_regs(u32 cntr0, u32 cntr1, u32 cntr2, u32 jump)
851 {
852         uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
853                            RW_MGR_RUN_SINGLE_GROUP_OFFSET;
854
855         /* Load counters */
856         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr0),
857                &sdr_rw_load_mgr_regs->load_cntr0);
858         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr1),
859                &sdr_rw_load_mgr_regs->load_cntr1);
860         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr2),
861                &sdr_rw_load_mgr_regs->load_cntr2);
862
863         /* Load jump address */
864         writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
865         writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add1);
866         writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
867
868         /* Execute count instruction */
869         writel(jump, grpaddr);
870 }
871
872 /**
873  * rw_mgr_mem_load_user() - Load user calibration values
874  * @fin1:       Final instruction 1
875  * @fin2:       Final instruction 2
876  * @precharge:  If 1, precharge the banks at the end
877  *
878  * Load user calibration values and optionally precharge the banks.
879  */
880 static void rw_mgr_mem_load_user(const u32 fin1, const u32 fin2,
881                                  const int precharge)
882 {
883         u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
884                       RW_MGR_RUN_SINGLE_GROUP_OFFSET;
885         u32 r;
886
887         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
888                 if (param->skip_ranks[r]) {
889                         /* request to skip the rank */
890                         continue;
891                 }
892
893                 /* set rank */
894                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
895
896                 /* precharge all banks ... */
897                 if (precharge)
898                         writel(RW_MGR_PRECHARGE_ALL, grpaddr);
899
900                 /*
901                  * USER Use Mirror-ed commands for odd ranks if address
902                  * mirrorring is on
903                  */
904                 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) {
905                         set_jump_as_return();
906                         writel(RW_MGR_MRS2_MIRR, grpaddr);
907                         delay_for_n_mem_clocks(4);
908                         set_jump_as_return();
909                         writel(RW_MGR_MRS3_MIRR, grpaddr);
910                         delay_for_n_mem_clocks(4);
911                         set_jump_as_return();
912                         writel(RW_MGR_MRS1_MIRR, grpaddr);
913                         delay_for_n_mem_clocks(4);
914                         set_jump_as_return();
915                         writel(fin1, grpaddr);
916                 } else {
917                         set_jump_as_return();
918                         writel(RW_MGR_MRS2, grpaddr);
919                         delay_for_n_mem_clocks(4);
920                         set_jump_as_return();
921                         writel(RW_MGR_MRS3, grpaddr);
922                         delay_for_n_mem_clocks(4);
923                         set_jump_as_return();
924                         writel(RW_MGR_MRS1, grpaddr);
925                         set_jump_as_return();
926                         writel(fin2, grpaddr);
927                 }
928
929                 if (precharge)
930                         continue;
931
932                 set_jump_as_return();
933                 writel(RW_MGR_ZQCL, grpaddr);
934
935                 /* tZQinit = tDLLK = 512 ck cycles */
936                 delay_for_n_mem_clocks(512);
937         }
938 }
939
940 /**
941  * rw_mgr_mem_initialize() - Initialize RW Manager
942  *
943  * Initialize RW Manager.
944  */
945 static void rw_mgr_mem_initialize(void)
946 {
947         debug("%s:%d\n", __func__, __LINE__);
948
949         /* The reset / cke part of initialization is broadcasted to all ranks */
950         writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
951                                 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
952
953         /*
954          * Here's how you load register for a loop
955          * Counters are located @ 0x800
956          * Jump address are located @ 0xC00
957          * For both, registers 0 to 3 are selected using bits 3 and 2, like
958          * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C
959          * I know this ain't pretty, but Avalon bus throws away the 2 least
960          * significant bits
961          */
962
963         /* Start with memory RESET activated */
964
965         /* tINIT = 200us */
966
967         /*
968          * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles
969          * If a and b are the number of iteration in 2 nested loops
970          * it takes the following number of cycles to complete the operation:
971          * number_of_cycles = ((2 + n) * a + 2) * b
972          * where n is the number of instruction in the inner loop
973          * One possible solution is n = 0 , a = 256 , b = 106 => a = FF,
974          * b = 6A
975          */
976         rw_mgr_mem_init_load_regs(SEQ_TINIT_CNTR0_VAL, SEQ_TINIT_CNTR1_VAL,
977                                   SEQ_TINIT_CNTR2_VAL,
978                                   RW_MGR_INIT_RESET_0_CKE_0);
979
980         /* Indicate that memory is stable. */
981         writel(1, &phy_mgr_cfg->reset_mem_stbl);
982
983         /*
984          * transition the RESET to high
985          * Wait for 500us
986          */
987
988         /*
989          * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles
990          * If a and b are the number of iteration in 2 nested loops
991          * it takes the following number of cycles to complete the operation
992          * number_of_cycles = ((2 + n) * a + 2) * b
993          * where n is the number of instruction in the inner loop
994          * One possible solution is n = 2 , a = 131 , b = 256 => a = 83,
995          * b = FF
996          */
997         rw_mgr_mem_init_load_regs(SEQ_TRESET_CNTR0_VAL, SEQ_TRESET_CNTR1_VAL,
998                                   SEQ_TRESET_CNTR2_VAL,
999                                   RW_MGR_INIT_RESET_1_CKE_0);
1000
1001         /* Bring up clock enable. */
1002
1003         /* tXRP < 250 ck cycles */
1004         delay_for_n_mem_clocks(250);
1005
1006         rw_mgr_mem_load_user(RW_MGR_MRS0_DLL_RESET_MIRR, RW_MGR_MRS0_DLL_RESET,
1007                              0);
1008 }
1009
1010 /**
1011  * rw_mgr_mem_handoff() - Hand off the memory to user
1012  *
1013  * At the end of calibration we have to program the user settings in
1014  * and hand off the memory to the user.
1015  */
1016 static void rw_mgr_mem_handoff(void)
1017 {
1018         rw_mgr_mem_load_user(RW_MGR_MRS0_USER_MIRR, RW_MGR_MRS0_USER, 1);
1019         /*
1020          * Need to wait tMOD (12CK or 15ns) time before issuing other
1021          * commands, but we will have plenty of NIOS cycles before actual
1022          * handoff so its okay.
1023          */
1024 }
1025
1026 /**
1027  * rw_mgr_mem_calibrate_write_test_issue() - Issue write test command
1028  * @group:      Write Group
1029  * @use_dm:     Use DM
1030  *
1031  * Issue write test command. Two variants are provided, one that just tests
1032  * a write pattern and another that tests datamask functionality.
1033  */
1034 static void rw_mgr_mem_calibrate_write_test_issue(u32 group,
1035                                                   u32 test_dm)
1036 {
1037         const u32 quick_write_mode =
1038                 (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES) &&
1039                 ENABLE_SUPER_QUICK_CALIBRATION;
1040         u32 mcc_instruction;
1041         u32 rw_wl_nop_cycles;
1042
1043         /*
1044          * Set counter and jump addresses for the right
1045          * number of NOP cycles.
1046          * The number of supported NOP cycles can range from -1 to infinity
1047          * Three different cases are handled:
1048          *
1049          * 1. For a number of NOP cycles greater than 0, the RW Mgr looping
1050          *    mechanism will be used to insert the right number of NOPs
1051          *
1052          * 2. For a number of NOP cycles equals to 0, the micro-instruction
1053          *    issuing the write command will jump straight to the
1054          *    micro-instruction that turns on DQS (for DDRx), or outputs write
1055          *    data (for RLD), skipping
1056          *    the NOP micro-instruction all together
1057          *
1058          * 3. A number of NOP cycles equal to -1 indicates that DQS must be
1059          *    turned on in the same micro-instruction that issues the write
1060          *    command. Then we need
1061          *    to directly jump to the micro-instruction that sends out the data
1062          *
1063          * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters
1064          *       (2 and 3). One jump-counter (0) is used to perform multiple
1065          *       write-read operations.
1066          *       one counter left to issue this command in "multiple-group" mode
1067          */
1068
1069         rw_wl_nop_cycles = gbl->rw_wl_nop_cycles;
1070
1071         if (rw_wl_nop_cycles == -1) {
1072                 /*
1073                  * CNTR 2 - We want to execute the special write operation that
1074                  * turns on DQS right away and then skip directly to the
1075                  * instruction that sends out the data. We set the counter to a
1076                  * large number so that the jump is always taken.
1077                  */
1078                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
1079
1080                 /* CNTR 3 - Not used */
1081                 if (test_dm) {
1082                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1;
1083                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA,
1084                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1085                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
1086                                &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1087                 } else {
1088                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0_WL_1;
1089                         writel(RW_MGR_LFSR_WR_RD_BANK_0_DATA,
1090                                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1091                         writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
1092                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1093                 }
1094         } else if (rw_wl_nop_cycles == 0) {
1095                 /*
1096                  * CNTR 2 - We want to skip the NOP operation and go straight
1097                  * to the DQS enable instruction. We set the counter to a large
1098                  * number so that the jump is always taken.
1099                  */
1100                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
1101
1102                 /* CNTR 3 - Not used */
1103                 if (test_dm) {
1104                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
1105                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS,
1106                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1107                 } else {
1108                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
1109                         writel(RW_MGR_LFSR_WR_RD_BANK_0_DQS,
1110                                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1111                 }
1112         } else {
1113                 /*
1114                  * CNTR 2 - In this case we want to execute the next instruction
1115                  * and NOT take the jump. So we set the counter to 0. The jump
1116                  * address doesn't count.
1117                  */
1118                 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2);
1119                 writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1120
1121                 /*
1122                  * CNTR 3 - Set the nop counter to the number of cycles we
1123                  * need to loop for, minus 1.
1124                  */
1125                 writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3);
1126                 if (test_dm) {
1127                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
1128                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
1129                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1130                 } else {
1131                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
1132                         writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
1133                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1134                 }
1135         }
1136
1137         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1138                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1139
1140         if (quick_write_mode)
1141                 writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0);
1142         else
1143                 writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0);
1144
1145         writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1146
1147         /*
1148          * CNTR 1 - This is used to ensure enough time elapses
1149          * for read data to come back.
1150          */
1151         writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1);
1152
1153         if (test_dm) {
1154                 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT,
1155                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1156         } else {
1157                 writel(RW_MGR_LFSR_WR_RD_BANK_0_WAIT,
1158                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1159         }
1160
1161         writel(mcc_instruction, (SDR_PHYGRP_RWMGRGRP_ADDRESS |
1162                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET) +
1163                                 (group << 2));
1164 }
1165
1166 /**
1167  * rw_mgr_mem_calibrate_write_test() - Test writes, check for single/multiple pass
1168  * @rank_bgn:           Rank number
1169  * @write_group:        Write Group
1170  * @use_dm:             Use DM
1171  * @all_correct:        All bits must be correct in the mask
1172  * @bit_chk:            Resulting bit mask after the test
1173  * @all_ranks:          Test all ranks
1174  *
1175  * Test writes, can check for a single bit pass or multiple bit pass.
1176  */
1177 static int
1178 rw_mgr_mem_calibrate_write_test(const u32 rank_bgn, const u32 write_group,
1179                                 const u32 use_dm, const u32 all_correct,
1180                                 u32 *bit_chk, const u32 all_ranks)
1181 {
1182         const u32 rank_end = all_ranks ?
1183                                 RW_MGR_MEM_NUMBER_OF_RANKS :
1184                                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1185         const u32 shift_ratio = RW_MGR_MEM_DQ_PER_WRITE_DQS /
1186                                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS;
1187         const u32 correct_mask_vg = param->write_correct_mask_vg;
1188
1189         u32 tmp_bit_chk, base_rw_mgr;
1190         int vg, r;
1191
1192         *bit_chk = param->write_correct_mask;
1193
1194         for (r = rank_bgn; r < rank_end; r++) {
1195                 /* Request to skip the rank */
1196                 if (param->skip_ranks[r])
1197                         continue;
1198
1199                 /* Set rank */
1200                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1201
1202                 tmp_bit_chk = 0;
1203                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS - 1;
1204                      vg >= 0; vg--) {
1205                         /* Reset the FIFOs to get pointers to known state. */
1206                         writel(0, &phy_mgr_cmd->fifo_reset);
1207
1208                         rw_mgr_mem_calibrate_write_test_issue(
1209                                 write_group *
1210                                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS + vg,
1211                                 use_dm);
1212
1213                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1214                         tmp_bit_chk <<= shift_ratio;
1215                         tmp_bit_chk |= (correct_mask_vg & ~(base_rw_mgr));
1216                 }
1217
1218                 *bit_chk &= tmp_bit_chk;
1219         }
1220
1221         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1222         if (all_correct) {
1223                 debug_cond(DLEVEL == 2,
1224                            "write_test(%u,%u,ALL) : %u == %u => %i\n",
1225                            write_group, use_dm, *bit_chk,
1226                            param->write_correct_mask,
1227                            *bit_chk == param->write_correct_mask);
1228                 return *bit_chk == param->write_correct_mask;
1229         } else {
1230                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1231                 debug_cond(DLEVEL == 2,
1232                            "write_test(%u,%u,ONE) : %u != %i => %i\n",
1233                            write_group, use_dm, *bit_chk, 0, *bit_chk != 0);
1234                 return *bit_chk != 0x00;
1235         }
1236 }
1237
1238 /**
1239  * rw_mgr_mem_calibrate_read_test_patterns() - Read back test patterns
1240  * @rank_bgn:   Rank number
1241  * @group:      Read/Write Group
1242  * @all_ranks:  Test all ranks
1243  *
1244  * Performs a guaranteed read on the patterns we are going to use during a
1245  * read test to ensure memory works.
1246  */
1247 static int
1248 rw_mgr_mem_calibrate_read_test_patterns(const u32 rank_bgn, const u32 group,
1249                                         const u32 all_ranks)
1250 {
1251         const u32 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1252                          RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1253         const u32 addr_offset =
1254                          (group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS) << 2;
1255         const u32 rank_end = all_ranks ?
1256                                 RW_MGR_MEM_NUMBER_OF_RANKS :
1257                                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1258         const u32 shift_ratio = RW_MGR_MEM_DQ_PER_READ_DQS /
1259                                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS;
1260         const u32 correct_mask_vg = param->read_correct_mask_vg;
1261
1262         u32 tmp_bit_chk, base_rw_mgr, bit_chk;
1263         int vg, r;
1264         int ret = 0;
1265
1266         bit_chk = param->read_correct_mask;
1267
1268         for (r = rank_bgn; r < rank_end; r++) {
1269                 /* Request to skip the rank */
1270                 if (param->skip_ranks[r])
1271                         continue;
1272
1273                 /* Set rank */
1274                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1275
1276                 /* Load up a constant bursts of read commands */
1277                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1278                 writel(RW_MGR_GUARANTEED_READ,
1279                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1280
1281                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1282                 writel(RW_MGR_GUARANTEED_READ_CONT,
1283                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1284
1285                 tmp_bit_chk = 0;
1286                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1;
1287                      vg >= 0; vg--) {
1288                         /* Reset the FIFOs to get pointers to known state. */
1289                         writel(0, &phy_mgr_cmd->fifo_reset);
1290                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1291                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1292                         writel(RW_MGR_GUARANTEED_READ,
1293                                addr + addr_offset + (vg << 2));
1294
1295                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1296                         tmp_bit_chk <<= shift_ratio;
1297                         tmp_bit_chk |= correct_mask_vg & ~base_rw_mgr;
1298                 }
1299
1300                 bit_chk &= tmp_bit_chk;
1301         }
1302
1303         writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2));
1304
1305         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1306
1307         if (bit_chk != param->read_correct_mask)
1308                 ret = -EIO;
1309
1310         debug_cond(DLEVEL == 1,
1311                    "%s:%d test_load_patterns(%u,ALL) => (%u == %u) => %i\n",
1312                    __func__, __LINE__, group, bit_chk,
1313                    param->read_correct_mask, ret);
1314
1315         return ret;
1316 }
1317
1318 /**
1319  * rw_mgr_mem_calibrate_read_load_patterns() - Load up the patterns for read test
1320  * @rank_bgn:   Rank number
1321  * @all_ranks:  Test all ranks
1322  *
1323  * Load up the patterns we are going to use during a read test.
1324  */
1325 static void rw_mgr_mem_calibrate_read_load_patterns(const u32 rank_bgn,
1326                                                     const int all_ranks)
1327 {
1328         const u32 rank_end = all_ranks ?
1329                         RW_MGR_MEM_NUMBER_OF_RANKS :
1330                         (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1331         u32 r;
1332
1333         debug("%s:%d\n", __func__, __LINE__);
1334
1335         for (r = rank_bgn; r < rank_end; r++) {
1336                 if (param->skip_ranks[r])
1337                         /* request to skip the rank */
1338                         continue;
1339
1340                 /* set rank */
1341                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1342
1343                 /* Load up a constant bursts */
1344                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1345
1346                 writel(RW_MGR_GUARANTEED_WRITE_WAIT0,
1347                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1348
1349                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1350
1351                 writel(RW_MGR_GUARANTEED_WRITE_WAIT1,
1352                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1353
1354                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2);
1355
1356                 writel(RW_MGR_GUARANTEED_WRITE_WAIT2,
1357                         &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1358
1359                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3);
1360
1361                 writel(RW_MGR_GUARANTEED_WRITE_WAIT3,
1362                         &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1363
1364                 writel(RW_MGR_GUARANTEED_WRITE, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1365                                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
1366         }
1367
1368         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1369 }
1370
1371 /**
1372  * rw_mgr_mem_calibrate_read_test() - Perform READ test on single rank
1373  * @rank_bgn:           Rank number
1374  * @group:              Read/Write group
1375  * @num_tries:          Number of retries of the test
1376  * @all_correct:        All bits must be correct in the mask
1377  * @bit_chk:            Resulting bit mask after the test
1378  * @all_groups:         Test all R/W groups
1379  * @all_ranks:          Test all ranks
1380  *
1381  * Try a read and see if it returns correct data back. Test has dummy reads
1382  * inserted into the mix used to align DQS enable. Test has more thorough
1383  * checks than the regular read test.
1384  */
1385 static int
1386 rw_mgr_mem_calibrate_read_test(const u32 rank_bgn, const u32 group,
1387                                const u32 num_tries, const u32 all_correct,
1388                                u32 *bit_chk,
1389                                const u32 all_groups, const u32 all_ranks)
1390 {
1391         const u32 rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1392                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1393         const u32 quick_read_mode =
1394                 ((STATIC_CALIB_STEPS & CALIB_SKIP_DELAY_SWEEPS) &&
1395                  ENABLE_SUPER_QUICK_CALIBRATION);
1396         u32 correct_mask_vg = param->read_correct_mask_vg;
1397         u32 tmp_bit_chk;
1398         u32 base_rw_mgr;
1399         u32 addr;
1400
1401         int r, vg, ret;
1402
1403         *bit_chk = param->read_correct_mask;
1404
1405         for (r = rank_bgn; r < rank_end; r++) {
1406                 if (param->skip_ranks[r])
1407                         /* request to skip the rank */
1408                         continue;
1409
1410                 /* set rank */
1411                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1412
1413                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1);
1414
1415                 writel(RW_MGR_READ_B2B_WAIT1,
1416                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1417
1418                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2);
1419                 writel(RW_MGR_READ_B2B_WAIT2,
1420                         &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1421
1422                 if (quick_read_mode)
1423                         writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0);
1424                         /* need at least two (1+1) reads to capture failures */
1425                 else if (all_groups)
1426                         writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0);
1427                 else
1428                         writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0);
1429
1430                 writel(RW_MGR_READ_B2B,
1431                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1432                 if (all_groups)
1433                         writel(RW_MGR_MEM_IF_READ_DQS_WIDTH *
1434                                RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1,
1435                                &sdr_rw_load_mgr_regs->load_cntr3);
1436                 else
1437                         writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3);
1438
1439                 writel(RW_MGR_READ_B2B,
1440                         &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1441
1442                 tmp_bit_chk = 0;
1443                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1; vg >= 0;
1444                      vg--) {
1445                         /* Reset the FIFOs to get pointers to known state. */
1446                         writel(0, &phy_mgr_cmd->fifo_reset);
1447                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1448                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1449
1450                         if (all_groups) {
1451                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1452                                        RW_MGR_RUN_ALL_GROUPS_OFFSET;
1453                         } else {
1454                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1455                                        RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1456                         }
1457
1458                         writel(RW_MGR_READ_B2B, addr +
1459                                ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS +
1460                                vg) << 2));
1461
1462                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1463                         tmp_bit_chk <<= RW_MGR_MEM_DQ_PER_READ_DQS /
1464                                         RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS;
1465                         tmp_bit_chk |= correct_mask_vg & ~(base_rw_mgr);
1466                 }
1467
1468                 *bit_chk &= tmp_bit_chk;
1469         }
1470
1471         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1472         writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2));
1473
1474         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1475
1476         if (all_correct) {
1477                 ret = (*bit_chk == param->read_correct_mask);
1478                 debug_cond(DLEVEL == 2,
1479                            "%s:%d read_test(%u,ALL,%u) => (%u == %u) => %i\n",
1480                            __func__, __LINE__, group, all_groups, *bit_chk,
1481                            param->read_correct_mask, ret);
1482         } else  {
1483                 ret = (*bit_chk != 0x00);
1484                 debug_cond(DLEVEL == 2,
1485                            "%s:%d read_test(%u,ONE,%u) => (%u != %u) => %i\n",
1486                            __func__, __LINE__, group, all_groups, *bit_chk,
1487                            0, ret);
1488         }
1489
1490         return ret;
1491 }
1492
1493 /**
1494  * rw_mgr_mem_calibrate_read_test_all_ranks() - Perform READ test on all ranks
1495  * @grp:                Read/Write group
1496  * @num_tries:          Number of retries of the test
1497  * @all_correct:        All bits must be correct in the mask
1498  * @all_groups:         Test all R/W groups
1499  *
1500  * Perform a READ test across all memory ranks.
1501  */
1502 static int
1503 rw_mgr_mem_calibrate_read_test_all_ranks(const u32 grp, const u32 num_tries,
1504                                          const u32 all_correct,
1505                                          const u32 all_groups)
1506 {
1507         u32 bit_chk;
1508         return rw_mgr_mem_calibrate_read_test(0, grp, num_tries, all_correct,
1509                                               &bit_chk, all_groups, 1);
1510 }
1511
1512 /**
1513  * rw_mgr_incr_vfifo() - Increase VFIFO value
1514  * @grp:        Read/Write group
1515  *
1516  * Increase VFIFO value.
1517  */
1518 static void rw_mgr_incr_vfifo(const u32 grp)
1519 {
1520         writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy);
1521 }
1522
1523 /**
1524  * rw_mgr_decr_vfifo() - Decrease VFIFO value
1525  * @grp:        Read/Write group
1526  *
1527  * Decrease VFIFO value.
1528  */
1529 static void rw_mgr_decr_vfifo(const u32 grp)
1530 {
1531         u32 i;
1532
1533         for (i = 0; i < VFIFO_SIZE - 1; i++)
1534                 rw_mgr_incr_vfifo(grp);
1535 }
1536
1537 /**
1538  * find_vfifo_failing_read() - Push VFIFO to get a failing read
1539  * @grp:        Read/Write group
1540  *
1541  * Push VFIFO until a failing read happens.
1542  */
1543 static int find_vfifo_failing_read(const u32 grp)
1544 {
1545         u32 v, ret, fail_cnt = 0;
1546
1547         for (v = 0; v < VFIFO_SIZE; v++) {
1548                 debug_cond(DLEVEL == 2, "%s:%d: vfifo %u\n",
1549                            __func__, __LINE__, v);
1550                 ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1551                                                 PASS_ONE_BIT, 0);
1552                 if (!ret) {
1553                         fail_cnt++;
1554
1555                         if (fail_cnt == 2)
1556                                 return v;
1557                 }
1558
1559                 /* Fiddle with FIFO. */
1560                 rw_mgr_incr_vfifo(grp);
1561         }
1562
1563         /* No failing read found! Something must have gone wrong. */
1564         debug_cond(DLEVEL == 2, "%s:%d: vfifo failed\n", __func__, __LINE__);
1565         return 0;
1566 }
1567
1568 /**
1569  * sdr_find_phase_delay() - Find DQS enable phase or delay
1570  * @working:    If 1, look for working phase/delay, if 0, look for non-working
1571  * @delay:      If 1, look for delay, if 0, look for phase
1572  * @grp:        Read/Write group
1573  * @work:       Working window position
1574  * @work_inc:   Working window increment
1575  * @pd:         DQS Phase/Delay Iterator
1576  *
1577  * Find working or non-working DQS enable phase setting.
1578  */
1579 static int sdr_find_phase_delay(int working, int delay, const u32 grp,
1580                                 u32 *work, const u32 work_inc, u32 *pd)
1581 {
1582         const u32 max = delay ? IO_DQS_EN_DELAY_MAX : IO_DQS_EN_PHASE_MAX;
1583         u32 ret;
1584
1585         for (; *pd <= max; (*pd)++) {
1586                 if (delay)
1587                         scc_mgr_set_dqs_en_delay_all_ranks(grp, *pd);
1588                 else
1589                         scc_mgr_set_dqs_en_phase_all_ranks(grp, *pd);
1590
1591                 ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1592                                         PASS_ONE_BIT, 0);
1593                 if (!working)
1594                         ret = !ret;
1595
1596                 if (ret)
1597                         return 0;
1598
1599                 if (work)
1600                         *work += work_inc;
1601         }
1602
1603         return -EINVAL;
1604 }
1605 /**
1606  * sdr_find_phase() - Find DQS enable phase
1607  * @working:    If 1, look for working phase, if 0, look for non-working phase
1608  * @grp:        Read/Write group
1609  * @work:       Working window position
1610  * @i:          Iterator
1611  * @p:          DQS Phase Iterator
1612  *
1613  * Find working or non-working DQS enable phase setting.
1614  */
1615 static int sdr_find_phase(int working, const u32 grp, u32 *work,
1616                           u32 *i, u32 *p)
1617 {
1618         const u32 end = VFIFO_SIZE + (working ? 0 : 1);
1619         int ret;
1620
1621         for (; *i < end; (*i)++) {
1622                 if (working)
1623                         *p = 0;
1624
1625                 ret = sdr_find_phase_delay(working, 0, grp, work,
1626                                            IO_DELAY_PER_OPA_TAP, p);
1627                 if (!ret)
1628                         return 0;
1629
1630                 if (*p > IO_DQS_EN_PHASE_MAX) {
1631                         /* Fiddle with FIFO. */
1632                         rw_mgr_incr_vfifo(grp);
1633                         if (!working)
1634                                 *p = 0;
1635                 }
1636         }
1637
1638         return -EINVAL;
1639 }
1640
1641 /**
1642  * sdr_working_phase() - Find working DQS enable phase
1643  * @grp:        Read/Write group
1644  * @work_bgn:   Working window start position
1645  * @d:          dtaps output value
1646  * @p:          DQS Phase Iterator
1647  * @i:          Iterator
1648  *
1649  * Find working DQS enable phase setting.
1650  */
1651 static int sdr_working_phase(const u32 grp, u32 *work_bgn, u32 *d,
1652                              u32 *p, u32 *i)
1653 {
1654         const u32 dtaps_per_ptap = IO_DELAY_PER_OPA_TAP /
1655                                    IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1656         int ret;
1657
1658         *work_bgn = 0;
1659
1660         for (*d = 0; *d <= dtaps_per_ptap; (*d)++) {
1661                 *i = 0;
1662                 scc_mgr_set_dqs_en_delay_all_ranks(grp, *d);
1663                 ret = sdr_find_phase(1, grp, work_bgn, i, p);
1664                 if (!ret)
1665                         return 0;
1666                 *work_bgn += IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1667         }
1668
1669         /* Cannot find working solution */
1670         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/ptap/dtap\n",
1671                    __func__, __LINE__);
1672         return -EINVAL;
1673 }
1674
1675 /**
1676  * sdr_backup_phase() - Find DQS enable backup phase
1677  * @grp:        Read/Write group
1678  * @work_bgn:   Working window start position
1679  * @p:          DQS Phase Iterator
1680  *
1681  * Find DQS enable backup phase setting.
1682  */
1683 static void sdr_backup_phase(const u32 grp, u32 *work_bgn, u32 *p)
1684 {
1685         u32 tmp_delay, d;
1686         int ret;
1687
1688         /* Special case code for backing up a phase */
1689         if (*p == 0) {
1690                 *p = IO_DQS_EN_PHASE_MAX;
1691                 rw_mgr_decr_vfifo(grp);
1692         } else {
1693                 (*p)--;
1694         }
1695         tmp_delay = *work_bgn - IO_DELAY_PER_OPA_TAP;
1696         scc_mgr_set_dqs_en_phase_all_ranks(grp, *p);
1697
1698         for (d = 0; d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_bgn; d++) {
1699                 scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1700
1701                 ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1702                                         PASS_ONE_BIT, 0);
1703                 if (ret) {
1704                         *work_bgn = tmp_delay;
1705                         break;
1706                 }
1707
1708                 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1709         }
1710
1711         /* Restore VFIFO to old state before we decremented it (if needed). */
1712         (*p)++;
1713         if (*p > IO_DQS_EN_PHASE_MAX) {
1714                 *p = 0;
1715                 rw_mgr_incr_vfifo(grp);
1716         }
1717
1718         scc_mgr_set_dqs_en_delay_all_ranks(grp, 0);
1719 }
1720
1721 /**
1722  * sdr_nonworking_phase() - Find non-working DQS enable phase
1723  * @grp:        Read/Write group
1724  * @work_end:   Working window end position
1725  * @p:          DQS Phase Iterator
1726  * @i:          Iterator
1727  *
1728  * Find non-working DQS enable phase setting.
1729  */
1730 static int sdr_nonworking_phase(const u32 grp, u32 *work_end, u32 *p, u32 *i)
1731 {
1732         int ret;
1733
1734         (*p)++;
1735         *work_end += IO_DELAY_PER_OPA_TAP;
1736         if (*p > IO_DQS_EN_PHASE_MAX) {
1737                 /* Fiddle with FIFO. */
1738                 *p = 0;
1739                 rw_mgr_incr_vfifo(grp);
1740         }
1741
1742         ret = sdr_find_phase(0, grp, work_end, i, p);
1743         if (ret) {
1744                 /* Cannot see edge of failing read. */
1745                 debug_cond(DLEVEL == 2, "%s:%d: end: failed\n",
1746                            __func__, __LINE__);
1747         }
1748
1749         return ret;
1750 }
1751
1752 /**
1753  * sdr_find_window_center() - Find center of the working DQS window.
1754  * @grp:        Read/Write group
1755  * @work_bgn:   First working settings
1756  * @work_end:   Last working settings
1757  *
1758  * Find center of the working DQS enable window.
1759  */
1760 static int sdr_find_window_center(const u32 grp, const u32 work_bgn,
1761                                   const u32 work_end)
1762 {
1763         u32 work_mid;
1764         int tmp_delay = 0;
1765         int i, p, d;
1766
1767         work_mid = (work_bgn + work_end) / 2;
1768
1769         debug_cond(DLEVEL == 2, "work_bgn=%d work_end=%d work_mid=%d\n",
1770                    work_bgn, work_end, work_mid);
1771         /* Get the middle delay to be less than a VFIFO delay */
1772         tmp_delay = (IO_DQS_EN_PHASE_MAX + 1) * IO_DELAY_PER_OPA_TAP;
1773
1774         debug_cond(DLEVEL == 2, "vfifo ptap delay %d\n", tmp_delay);
1775         work_mid %= tmp_delay;
1776         debug_cond(DLEVEL == 2, "new work_mid %d\n", work_mid);
1777
1778         tmp_delay = rounddown(work_mid, IO_DELAY_PER_OPA_TAP);
1779         if (tmp_delay > IO_DQS_EN_PHASE_MAX * IO_DELAY_PER_OPA_TAP)
1780                 tmp_delay = IO_DQS_EN_PHASE_MAX * IO_DELAY_PER_OPA_TAP;
1781         p = tmp_delay / IO_DELAY_PER_OPA_TAP;
1782
1783         debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", p, tmp_delay);
1784
1785         d = DIV_ROUND_UP(work_mid - tmp_delay, IO_DELAY_PER_DQS_EN_DCHAIN_TAP);
1786         if (d > IO_DQS_EN_DELAY_MAX)
1787                 d = IO_DQS_EN_DELAY_MAX;
1788         tmp_delay += d * IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1789
1790         debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", d, tmp_delay);
1791
1792         scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1793         scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1794
1795         /*
1796          * push vfifo until we can successfully calibrate. We can do this
1797          * because the largest possible margin in 1 VFIFO cycle.
1798          */
1799         for (i = 0; i < VFIFO_SIZE; i++) {
1800                 debug_cond(DLEVEL == 2, "find_dqs_en_phase: center\n");
1801                 if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1802                                                              PASS_ONE_BIT,
1803                                                              0)) {
1804                         debug_cond(DLEVEL == 2,
1805                                    "%s:%d center: found: ptap=%u dtap=%u\n",
1806                                    __func__, __LINE__, p, d);
1807                         return 0;
1808                 }
1809
1810                 /* Fiddle with FIFO. */
1811                 rw_mgr_incr_vfifo(grp);
1812         }
1813
1814         debug_cond(DLEVEL == 2, "%s:%d center: failed.\n",
1815                    __func__, __LINE__);
1816         return -EINVAL;
1817 }
1818
1819 /**
1820  * rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase() - Find a good DQS enable to use
1821  * @grp:        Read/Write Group
1822  *
1823  * Find a good DQS enable to use.
1824  */
1825 static int rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(const u32 grp)
1826 {
1827         u32 d, p, i;
1828         u32 dtaps_per_ptap;
1829         u32 work_bgn, work_end;
1830         u32 found_passing_read, found_failing_read, initial_failing_dtap;
1831         int ret;
1832
1833         debug("%s:%d %u\n", __func__, __LINE__, grp);
1834
1835         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
1836
1837         scc_mgr_set_dqs_en_delay_all_ranks(grp, 0);
1838         scc_mgr_set_dqs_en_phase_all_ranks(grp, 0);
1839
1840         /* Step 0: Determine number of delay taps for each phase tap. */
1841         dtaps_per_ptap = IO_DELAY_PER_OPA_TAP / IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1842
1843         /* Step 1: First push vfifo until we get a failing read. */
1844         find_vfifo_failing_read(grp);
1845
1846         /* Step 2: Find first working phase, increment in ptaps. */
1847         work_bgn = 0;
1848         ret = sdr_working_phase(grp, &work_bgn, &d, &p, &i);
1849         if (ret)
1850                 return ret;
1851
1852         work_end = work_bgn;
1853
1854         /*
1855          * If d is 0 then the working window covers a phase tap and we can
1856          * follow the old procedure. Otherwise, we've found the beginning
1857          * and we need to increment the dtaps until we find the end.
1858          */
1859         if (d == 0) {
1860                 /*
1861                  * Step 3a: If we have room, back off by one and
1862                  *          increment in dtaps.
1863                  */
1864                 sdr_backup_phase(grp, &work_bgn, &p);
1865
1866                 /*
1867                  * Step 4a: go forward from working phase to non working
1868                  * phase, increment in ptaps.
1869                  */
1870                 ret = sdr_nonworking_phase(grp, &work_end, &p, &i);
1871                 if (ret)
1872                         return ret;
1873
1874                 /* Step 5a: Back off one from last, increment in dtaps. */
1875
1876                 /* Special case code for backing up a phase */
1877                 if (p == 0) {
1878                         p = IO_DQS_EN_PHASE_MAX;
1879                         rw_mgr_decr_vfifo(grp);
1880                 } else {
1881                         p = p - 1;
1882                 }
1883
1884                 work_end -= IO_DELAY_PER_OPA_TAP;
1885                 scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1886
1887                 d = 0;
1888
1889                 debug_cond(DLEVEL == 2, "%s:%d p: ptap=%u\n",
1890                            __func__, __LINE__, p);
1891         }
1892
1893         /* The dtap increment to find the failing edge is done here. */
1894         sdr_find_phase_delay(0, 1, grp, &work_end,
1895                              IO_DELAY_PER_DQS_EN_DCHAIN_TAP, &d);
1896
1897         /* Go back to working dtap */
1898         if (d != 0)
1899                 work_end -= IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1900
1901         debug_cond(DLEVEL == 2,
1902                    "%s:%d p/d: ptap=%u dtap=%u end=%u\n",
1903                    __func__, __LINE__, p, d - 1, work_end);
1904
1905         if (work_end < work_bgn) {
1906                 /* nil range */
1907                 debug_cond(DLEVEL == 2, "%s:%d end-2: failed\n",
1908                            __func__, __LINE__);
1909                 return -EINVAL;
1910         }
1911
1912         debug_cond(DLEVEL == 2, "%s:%d found range [%u,%u]\n",
1913                    __func__, __LINE__, work_bgn, work_end);
1914
1915         /*
1916          * We need to calculate the number of dtaps that equal a ptap.
1917          * To do that we'll back up a ptap and re-find the edge of the
1918          * window using dtaps
1919          */
1920         debug_cond(DLEVEL == 2, "%s:%d calculate dtaps_per_ptap for tracking\n",
1921                    __func__, __LINE__);
1922
1923         /* Special case code for backing up a phase */
1924         if (p == 0) {
1925                 p = IO_DQS_EN_PHASE_MAX;
1926                 rw_mgr_decr_vfifo(grp);
1927                 debug_cond(DLEVEL == 2, "%s:%d backedup cycle/phase: p=%u\n",
1928                            __func__, __LINE__, p);
1929         } else {
1930                 p = p - 1;
1931                 debug_cond(DLEVEL == 2, "%s:%d backedup phase only: p=%u",
1932                            __func__, __LINE__, p);
1933         }
1934
1935         scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1936
1937         /*
1938          * Increase dtap until we first see a passing read (in case the
1939          * window is smaller than a ptap), and then a failing read to
1940          * mark the edge of the window again.
1941          */
1942
1943         /* Find a passing read. */
1944         debug_cond(DLEVEL == 2, "%s:%d find passing read\n",
1945                    __func__, __LINE__);
1946
1947         initial_failing_dtap = d;
1948
1949         found_passing_read = !sdr_find_phase_delay(1, 1, grp, NULL, 0, &d);
1950         if (found_passing_read) {
1951                 /* Find a failing read. */
1952                 debug_cond(DLEVEL == 2, "%s:%d find failing read\n",
1953                            __func__, __LINE__);
1954                 d++;
1955                 found_failing_read = !sdr_find_phase_delay(0, 1, grp, NULL, 0,
1956                                                            &d);
1957         } else {
1958                 debug_cond(DLEVEL == 1,
1959                            "%s:%d failed to calculate dtaps per ptap. Fall back on static value\n",
1960                            __func__, __LINE__);
1961         }
1962
1963         /*
1964          * The dynamically calculated dtaps_per_ptap is only valid if we
1965          * found a passing/failing read. If we didn't, it means d hit the max
1966          * (IO_DQS_EN_DELAY_MAX). Otherwise, dtaps_per_ptap retains its
1967          * statically calculated value.
1968          */
1969         if (found_passing_read && found_failing_read)
1970                 dtaps_per_ptap = d - initial_failing_dtap;
1971
1972         writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap);
1973         debug_cond(DLEVEL == 2, "%s:%d dtaps_per_ptap=%u - %u = %u",
1974                    __func__, __LINE__, d, initial_failing_dtap, dtaps_per_ptap);
1975
1976         /* Step 6: Find the centre of the window. */
1977         ret = sdr_find_window_center(grp, work_bgn, work_end);
1978
1979         return ret;
1980 }
1981
1982 /**
1983  * search_stop_check() - Check if the detected edge is valid
1984  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
1985  * @d:                  DQS delay
1986  * @rank_bgn:           Rank number
1987  * @write_group:        Write Group
1988  * @read_group:         Read Group
1989  * @bit_chk:            Resulting bit mask after the test
1990  * @sticky_bit_chk:     Resulting sticky bit mask after the test
1991  * @use_read_test:      Perform read test
1992  *
1993  * Test if the found edge is valid.
1994  */
1995 static u32 search_stop_check(const int write, const int d, const int rank_bgn,
1996                              const u32 write_group, const u32 read_group,
1997                              u32 *bit_chk, u32 *sticky_bit_chk,
1998                              const u32 use_read_test)
1999 {
2000         const u32 ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
2001                           RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
2002         const u32 correct_mask = write ? param->write_correct_mask :
2003                                          param->read_correct_mask;
2004         const u32 per_dqs = write ? RW_MGR_MEM_DQ_PER_WRITE_DQS :
2005                                     RW_MGR_MEM_DQ_PER_READ_DQS;
2006         u32 ret;
2007         /*
2008          * Stop searching when the read test doesn't pass AND when
2009          * we've seen a passing read on every bit.
2010          */
2011         if (write) {                    /* WRITE-ONLY */
2012                 ret = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
2013                                                          0, PASS_ONE_BIT,
2014                                                          bit_chk, 0);
2015         } else if (use_read_test) {     /* READ-ONLY */
2016                 ret = !rw_mgr_mem_calibrate_read_test(rank_bgn, read_group,
2017                                                         NUM_READ_PB_TESTS,
2018                                                         PASS_ONE_BIT, bit_chk,
2019                                                         0, 0);
2020         } else {                        /* READ-ONLY */
2021                 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 0,
2022                                                 PASS_ONE_BIT, bit_chk, 0);
2023                 *bit_chk = *bit_chk >> (per_dqs *
2024                         (read_group - (write_group * ratio)));
2025                 ret = (*bit_chk == 0);
2026         }
2027         *sticky_bit_chk = *sticky_bit_chk | *bit_chk;
2028         ret = ret && (*sticky_bit_chk == correct_mask);
2029         debug_cond(DLEVEL == 2,
2030                    "%s:%d center(left): dtap=%u => %u == %u && %u",
2031                    __func__, __LINE__, d,
2032                    *sticky_bit_chk, correct_mask, ret);
2033         return ret;
2034 }
2035
2036 /**
2037  * search_left_edge() - Find left edge of DQ/DQS working phase
2038  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2039  * @rank_bgn:           Rank number
2040  * @write_group:        Write Group
2041  * @read_group:         Read Group
2042  * @test_bgn:           Rank number to begin the test
2043  * @sticky_bit_chk:     Resulting sticky bit mask after the test
2044  * @left_edge:          Left edge of the DQ/DQS phase
2045  * @right_edge:         Right edge of the DQ/DQS phase
2046  * @use_read_test:      Perform read test
2047  *
2048  * Find left edge of DQ/DQS working phase.
2049  */
2050 static void search_left_edge(const int write, const int rank_bgn,
2051         const u32 write_group, const u32 read_group, const u32 test_bgn,
2052         u32 *sticky_bit_chk,
2053         int *left_edge, int *right_edge, const u32 use_read_test)
2054 {
2055         const u32 delay_max = write ? IO_IO_OUT1_DELAY_MAX : IO_IO_IN_DELAY_MAX;
2056         const u32 dqs_max = write ? IO_IO_OUT1_DELAY_MAX : IO_DQS_IN_DELAY_MAX;
2057         const u32 per_dqs = write ? RW_MGR_MEM_DQ_PER_WRITE_DQS :
2058                                     RW_MGR_MEM_DQ_PER_READ_DQS;
2059         u32 stop, bit_chk;
2060         int i, d;
2061
2062         for (d = 0; d <= dqs_max; d++) {
2063                 if (write)
2064                         scc_mgr_apply_group_dq_out1_delay(d);
2065                 else
2066                         scc_mgr_apply_group_dq_in_delay(test_bgn, d);
2067
2068                 writel(0, &sdr_scc_mgr->update);
2069
2070                 stop = search_stop_check(write, d, rank_bgn, write_group,
2071                                          read_group, &bit_chk, sticky_bit_chk,
2072                                          use_read_test);
2073                 if (stop == 1)
2074                         break;
2075
2076                 /* stop != 1 */
2077                 for (i = 0; i < per_dqs; i++) {
2078                         if (bit_chk & 1) {
2079                                 /*
2080                                  * Remember a passing test as
2081                                  * the left_edge.
2082                                  */
2083                                 left_edge[i] = d;
2084                         } else {
2085                                 /*
2086                                  * If a left edge has not been seen
2087                                  * yet, then a future passing test
2088                                  * will mark this edge as the right
2089                                  * edge.
2090                                  */
2091                                 if (left_edge[i] == delay_max + 1)
2092                                         right_edge[i] = -(d + 1);
2093                         }
2094                         bit_chk >>= 1;
2095                 }
2096         }
2097
2098         /* Reset DQ delay chains to 0 */
2099         if (write)
2100                 scc_mgr_apply_group_dq_out1_delay(0);
2101         else
2102                 scc_mgr_apply_group_dq_in_delay(test_bgn, 0);
2103
2104         *sticky_bit_chk = 0;
2105         for (i = per_dqs - 1; i >= 0; i--) {
2106                 debug_cond(DLEVEL == 2,
2107                            "%s:%d vfifo_center: left_edge[%u]: %d right_edge[%u]: %d\n",
2108                            __func__, __LINE__, i, left_edge[i],
2109                            i, right_edge[i]);
2110
2111                 /*
2112                  * Check for cases where we haven't found the left edge,
2113                  * which makes our assignment of the the right edge invalid.
2114                  * Reset it to the illegal value.
2115                  */
2116                 if ((left_edge[i] == delay_max + 1) &&
2117                     (right_edge[i] != delay_max + 1)) {
2118                         right_edge[i] = delay_max + 1;
2119                         debug_cond(DLEVEL == 2,
2120                                    "%s:%d vfifo_center: reset right_edge[%u]: %d\n",
2121                                    __func__, __LINE__, i, right_edge[i]);
2122                 }
2123
2124                 /*
2125                  * Reset sticky bit
2126                  * READ: except for bits where we have seen both
2127                  *       the left and right edge.
2128                  * WRITE: except for bits where we have seen the
2129                  *        left edge.
2130                  */
2131                 *sticky_bit_chk <<= 1;
2132                 if (write) {
2133                         if (left_edge[i] != delay_max + 1)
2134                                 *sticky_bit_chk |= 1;
2135                 } else {
2136                         if ((left_edge[i] != delay_max + 1) &&
2137                             (right_edge[i] != delay_max + 1))
2138                                 *sticky_bit_chk |= 1;
2139                 }
2140         }
2141
2142
2143 }
2144
2145 /**
2146  * search_right_edge() - Find right edge of DQ/DQS working phase
2147  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2148  * @rank_bgn:           Rank number
2149  * @write_group:        Write Group
2150  * @read_group:         Read Group
2151  * @start_dqs:          DQS start phase
2152  * @start_dqs_en:       DQS enable start phase
2153  * @sticky_bit_chk:     Resulting sticky bit mask after the test
2154  * @left_edge:          Left edge of the DQ/DQS phase
2155  * @right_edge:         Right edge of the DQ/DQS phase
2156  * @use_read_test:      Perform read test
2157  *
2158  * Find right edge of DQ/DQS working phase.
2159  */
2160 static int search_right_edge(const int write, const int rank_bgn,
2161         const u32 write_group, const u32 read_group,
2162         const int start_dqs, const int start_dqs_en,
2163         u32 *sticky_bit_chk,
2164         int *left_edge, int *right_edge, const u32 use_read_test)
2165 {
2166         const u32 delay_max = write ? IO_IO_OUT1_DELAY_MAX : IO_IO_IN_DELAY_MAX;
2167         const u32 dqs_max = write ? IO_IO_OUT1_DELAY_MAX : IO_DQS_IN_DELAY_MAX;
2168         const u32 per_dqs = write ? RW_MGR_MEM_DQ_PER_WRITE_DQS :
2169                                     RW_MGR_MEM_DQ_PER_READ_DQS;
2170         u32 stop, bit_chk;
2171         int i, d;
2172
2173         for (d = 0; d <= dqs_max - start_dqs; d++) {
2174                 if (write) {    /* WRITE-ONLY */
2175                         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
2176                                                                 d + start_dqs);
2177                 } else {        /* READ-ONLY */
2178                         scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs);
2179                         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2180                                 uint32_t delay = d + start_dqs_en;
2181                                 if (delay > IO_DQS_EN_DELAY_MAX)
2182                                         delay = IO_DQS_EN_DELAY_MAX;
2183                                 scc_mgr_set_dqs_en_delay(read_group, delay);
2184                         }
2185                         scc_mgr_load_dqs(read_group);
2186                 }
2187
2188                 writel(0, &sdr_scc_mgr->update);
2189
2190                 stop = search_stop_check(write, d, rank_bgn, write_group,
2191                                          read_group, &bit_chk, sticky_bit_chk,
2192                                          use_read_test);
2193                 if (stop == 1) {
2194                         if (write && (d == 0)) {        /* WRITE-ONLY */
2195                                 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2196                                         /*
2197                                          * d = 0 failed, but it passed when
2198                                          * testing the left edge, so it must be
2199                                          * marginal, set it to -1
2200                                          */
2201                                         if (right_edge[i] == delay_max + 1 &&
2202                                             left_edge[i] != delay_max + 1)
2203                                                 right_edge[i] = -1;
2204                                 }
2205                         }
2206                         break;
2207                 }
2208
2209                 /* stop != 1 */
2210                 for (i = 0; i < per_dqs; i++) {
2211                         if (bit_chk & 1) {
2212                                 /*
2213                                  * Remember a passing test as
2214                                  * the right_edge.
2215                                  */
2216                                 right_edge[i] = d;
2217                         } else {
2218                                 if (d != 0) {
2219                                         /*
2220                                          * If a right edge has not
2221                                          * been seen yet, then a future
2222                                          * passing test will mark this
2223                                          * edge as the left edge.
2224                                          */
2225                                         if (right_edge[i] == delay_max + 1)
2226                                                 left_edge[i] = -(d + 1);
2227                                 } else {
2228                                         /*
2229                                          * d = 0 failed, but it passed
2230                                          * when testing the left edge,
2231                                          * so it must be marginal, set
2232                                          * it to -1
2233                                          */
2234                                         if (right_edge[i] == delay_max + 1 &&
2235                                             left_edge[i] != delay_max + 1)
2236                                                 right_edge[i] = -1;
2237                                         /*
2238                                          * If a right edge has not been
2239                                          * seen yet, then a future
2240                                          * passing test will mark this
2241                                          * edge as the left edge.
2242                                          */
2243                                         else if (right_edge[i] == delay_max + 1)
2244                                                 left_edge[i] = -(d + 1);
2245                                 }
2246                         }
2247
2248                         debug_cond(DLEVEL == 2, "%s:%d center[r,d=%u]: ",
2249                                    __func__, __LINE__, d);
2250                         debug_cond(DLEVEL == 2,
2251                                    "bit_chk_test=%i left_edge[%u]: %d ",
2252                                    bit_chk & 1, i, left_edge[i]);
2253                         debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2254                                    right_edge[i]);
2255                         bit_chk >>= 1;
2256                 }
2257         }
2258
2259         /* Check that all bits have a window */
2260         for (i = 0; i < per_dqs; i++) {
2261                 debug_cond(DLEVEL == 2,
2262                            "%s:%d write_center: left_edge[%u]: %d right_edge[%u]: %d",
2263                            __func__, __LINE__, i, left_edge[i],
2264                            i, right_edge[i]);
2265                 if ((left_edge[i] == dqs_max + 1) ||
2266                     (right_edge[i] == dqs_max + 1))
2267                         return i + 1;   /* FIXME: If we fail, retval > 0 */
2268         }
2269
2270         return 0;
2271 }
2272
2273 /**
2274  * get_window_mid_index() - Find the best middle setting of DQ/DQS phase
2275  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2276  * @left_edge:          Left edge of the DQ/DQS phase
2277  * @right_edge:         Right edge of the DQ/DQS phase
2278  * @mid_min:            Best DQ/DQS phase middle setting
2279  *
2280  * Find index and value of the middle of the DQ/DQS working phase.
2281  */
2282 static int get_window_mid_index(const int write, int *left_edge,
2283                                 int *right_edge, int *mid_min)
2284 {
2285         const u32 per_dqs = write ? RW_MGR_MEM_DQ_PER_WRITE_DQS :
2286                                     RW_MGR_MEM_DQ_PER_READ_DQS;
2287         int i, mid, min_index;
2288
2289         /* Find middle of window for each DQ bit */
2290         *mid_min = left_edge[0] - right_edge[0];
2291         min_index = 0;
2292         for (i = 1; i < per_dqs; i++) {
2293                 mid = left_edge[i] - right_edge[i];
2294                 if (mid < *mid_min) {
2295                         *mid_min = mid;
2296                         min_index = i;
2297                 }
2298         }
2299
2300         /*
2301          * -mid_min/2 represents the amount that we need to move DQS.
2302          * If mid_min is odd and positive we'll need to add one to make
2303          * sure the rounding in further calculations is correct (always
2304          * bias to the right), so just add 1 for all positive values.
2305          */
2306         if (*mid_min > 0)
2307                 (*mid_min)++;
2308         *mid_min = *mid_min / 2;
2309
2310         debug_cond(DLEVEL == 1, "%s:%d vfifo_center: *mid_min=%d (index=%u)\n",
2311                    __func__, __LINE__, *mid_min, min_index);
2312         return min_index;
2313 }
2314
2315 /**
2316  * center_dq_windows() - Center the DQ/DQS windows
2317  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2318  * @left_edge:          Left edge of the DQ/DQS phase
2319  * @right_edge:         Right edge of the DQ/DQS phase
2320  * @mid_min:            Adjusted DQ/DQS phase middle setting
2321  * @orig_mid_min:       Original DQ/DQS phase middle setting
2322  * @min_index:          DQ/DQS phase middle setting index
2323  * @test_bgn:           Rank number to begin the test
2324  * @dq_margin:          Amount of shift for the DQ
2325  * @dqs_margin:         Amount of shift for the DQS
2326  *
2327  * Align the DQ/DQS windows in each group.
2328  */
2329 static void center_dq_windows(const int write, int *left_edge, int *right_edge,
2330                               const int mid_min, const int orig_mid_min,
2331                               const int min_index, const int test_bgn,
2332                               int *dq_margin, int *dqs_margin)
2333 {
2334         const u32 delay_max = write ? IO_IO_OUT1_DELAY_MAX : IO_IO_IN_DELAY_MAX;
2335         const u32 per_dqs = write ? RW_MGR_MEM_DQ_PER_WRITE_DQS :
2336                                     RW_MGR_MEM_DQ_PER_READ_DQS;
2337         const u32 delay_off = write ? SCC_MGR_IO_OUT1_DELAY_OFFSET :
2338                                       SCC_MGR_IO_IN_DELAY_OFFSET;
2339         const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | delay_off;
2340
2341         u32 temp_dq_io_delay1, temp_dq_io_delay2;
2342         int shift_dq, i, p;
2343
2344         /* Initialize data for export structures */
2345         *dqs_margin = delay_max + 1;
2346         *dq_margin  = delay_max + 1;
2347
2348         /* add delay to bring centre of all DQ windows to the same "level" */
2349         for (i = 0, p = test_bgn; i < per_dqs; i++, p++) {
2350                 /* Use values before divide by 2 to reduce round off error */
2351                 shift_dq = (left_edge[i] - right_edge[i] -
2352                         (left_edge[min_index] - right_edge[min_index]))/2  +
2353                         (orig_mid_min - mid_min);
2354
2355                 debug_cond(DLEVEL == 2,
2356                            "vfifo_center: before: shift_dq[%u]=%d\n",
2357                            i, shift_dq);
2358
2359                 temp_dq_io_delay1 = readl(addr + (p << 2));
2360                 temp_dq_io_delay2 = readl(addr + (i << 2));
2361
2362                 if (shift_dq + temp_dq_io_delay1 > delay_max)
2363                         shift_dq = delay_max - temp_dq_io_delay2;
2364                 else if (shift_dq + temp_dq_io_delay1 < 0)
2365                         shift_dq = -temp_dq_io_delay1;
2366
2367                 debug_cond(DLEVEL == 2,
2368                            "vfifo_center: after: shift_dq[%u]=%d\n",
2369                            i, shift_dq);
2370
2371                 if (write)
2372                         scc_mgr_set_dq_out1_delay(i, temp_dq_io_delay1 + shift_dq);
2373                 else
2374                         scc_mgr_set_dq_in_delay(p, temp_dq_io_delay1 + shift_dq);
2375
2376                 scc_mgr_load_dq(p);
2377
2378                 debug_cond(DLEVEL == 2,
2379                            "vfifo_center: margin[%u]=[%d,%d]\n", i,
2380                            left_edge[i] - shift_dq + (-mid_min),
2381                            right_edge[i] + shift_dq - (-mid_min));
2382
2383                 /* To determine values for export structures */
2384                 if (left_edge[i] - shift_dq + (-mid_min) < *dq_margin)
2385                         *dq_margin = left_edge[i] - shift_dq + (-mid_min);
2386
2387                 if (right_edge[i] + shift_dq - (-mid_min) < *dqs_margin)
2388                         *dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2389         }
2390
2391 }
2392
2393 /**
2394  * rw_mgr_mem_calibrate_vfifo_center() - Per-bit deskew DQ and centering
2395  * @rank_bgn:           Rank number
2396  * @rw_group:           Read/Write Group
2397  * @test_bgn:           Rank at which the test begins
2398  * @use_read_test:      Perform a read test
2399  * @update_fom:         Update FOM
2400  *
2401  * Per-bit deskew DQ and centering.
2402  */
2403 static int rw_mgr_mem_calibrate_vfifo_center(const u32 rank_bgn,
2404                         const u32 rw_group, const u32 test_bgn,
2405                         const int use_read_test, const int update_fom)
2406 {
2407         const u32 addr =
2408                 SDR_PHYGRP_SCCGRP_ADDRESS + SCC_MGR_DQS_IN_DELAY_OFFSET +
2409                 (rw_group << 2);
2410         /*
2411          * Store these as signed since there are comparisons with
2412          * signed numbers.
2413          */
2414         uint32_t sticky_bit_chk;
2415         int32_t left_edge[RW_MGR_MEM_DQ_PER_READ_DQS];
2416         int32_t right_edge[RW_MGR_MEM_DQ_PER_READ_DQS];
2417         int32_t orig_mid_min, mid_min;
2418         int32_t new_dqs, start_dqs, start_dqs_en, final_dqs_en;
2419         int32_t dq_margin, dqs_margin;
2420         int i, min_index;
2421         int ret;
2422
2423         debug("%s:%d: %u %u", __func__, __LINE__, rw_group, test_bgn);
2424
2425         start_dqs = readl(addr);
2426         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS)
2427                 start_dqs_en = readl(addr - IO_DQS_EN_DELAY_OFFSET);
2428
2429         /* set the left and right edge of each bit to an illegal value */
2430         /* use (IO_IO_IN_DELAY_MAX + 1) as an illegal value */
2431         sticky_bit_chk = 0;
2432         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
2433                 left_edge[i]  = IO_IO_IN_DELAY_MAX + 1;
2434                 right_edge[i] = IO_IO_IN_DELAY_MAX + 1;
2435         }
2436
2437         /* Search for the left edge of the window for each bit */
2438         search_left_edge(0, rank_bgn, rw_group, rw_group, test_bgn,
2439                          &sticky_bit_chk,
2440                          left_edge, right_edge, use_read_test);
2441
2442
2443         /* Search for the right edge of the window for each bit */
2444         ret = search_right_edge(0, rank_bgn, rw_group, rw_group,
2445                                 start_dqs, start_dqs_en,
2446                                 &sticky_bit_chk,
2447                                 left_edge, right_edge, use_read_test);
2448         if (ret) {
2449                 /*
2450                  * Restore delay chain settings before letting the loop
2451                  * in rw_mgr_mem_calibrate_vfifo to retry different
2452                  * dqs/ck relationships.
2453                  */
2454                 scc_mgr_set_dqs_bus_in_delay(rw_group, start_dqs);
2455                 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS)
2456                         scc_mgr_set_dqs_en_delay(rw_group, start_dqs_en);
2457
2458                 scc_mgr_load_dqs(rw_group);
2459                 writel(0, &sdr_scc_mgr->update);
2460
2461                 debug_cond(DLEVEL == 1,
2462                            "%s:%d vfifo_center: failed to find edge [%u]: %d %d",
2463                            __func__, __LINE__, i, left_edge[i], right_edge[i]);
2464                 if (use_read_test) {
2465                         set_failing_group_stage(rw_group *
2466                                 RW_MGR_MEM_DQ_PER_READ_DQS + i,
2467                                 CAL_STAGE_VFIFO,
2468                                 CAL_SUBSTAGE_VFIFO_CENTER);
2469                 } else {
2470                         set_failing_group_stage(rw_group *
2471                                 RW_MGR_MEM_DQ_PER_READ_DQS + i,
2472                                 CAL_STAGE_VFIFO_AFTER_WRITES,
2473                                 CAL_SUBSTAGE_VFIFO_CENTER);
2474                 }
2475                 return -EIO;
2476         }
2477
2478         min_index = get_window_mid_index(0, left_edge, right_edge, &mid_min);
2479
2480         /* Determine the amount we can change DQS (which is -mid_min) */
2481         orig_mid_min = mid_min;
2482         new_dqs = start_dqs - mid_min;
2483         if (new_dqs > IO_DQS_IN_DELAY_MAX)
2484                 new_dqs = IO_DQS_IN_DELAY_MAX;
2485         else if (new_dqs < 0)
2486                 new_dqs = 0;
2487
2488         mid_min = start_dqs - new_dqs;
2489         debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n",
2490                    mid_min, new_dqs);
2491
2492         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2493                 if (start_dqs_en - mid_min > IO_DQS_EN_DELAY_MAX)
2494                         mid_min += start_dqs_en - mid_min - IO_DQS_EN_DELAY_MAX;
2495                 else if (start_dqs_en - mid_min < 0)
2496                         mid_min += start_dqs_en - mid_min;
2497         }
2498         new_dqs = start_dqs - mid_min;
2499
2500         debug_cond(DLEVEL == 1,
2501                    "vfifo_center: start_dqs=%d start_dqs_en=%d new_dqs=%d mid_min=%d\n",
2502                    start_dqs,
2503                    IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1,
2504                    new_dqs, mid_min);
2505
2506         /* Add delay to bring centre of all DQ windows to the same "level". */
2507         center_dq_windows(0, left_edge, right_edge, mid_min, orig_mid_min,
2508                           min_index, test_bgn, &dq_margin, &dqs_margin);
2509
2510         /* Move DQS-en */
2511         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2512                 final_dqs_en = start_dqs_en - mid_min;
2513                 scc_mgr_set_dqs_en_delay(rw_group, final_dqs_en);
2514                 scc_mgr_load_dqs(rw_group);
2515         }
2516
2517         /* Move DQS */
2518         scc_mgr_set_dqs_bus_in_delay(rw_group, new_dqs);
2519         scc_mgr_load_dqs(rw_group);
2520         debug_cond(DLEVEL == 2,
2521                    "%s:%d vfifo_center: dq_margin=%d dqs_margin=%d",
2522                    __func__, __LINE__, dq_margin, dqs_margin);
2523
2524         /*
2525          * Do not remove this line as it makes sure all of our decisions
2526          * have been applied. Apply the update bit.
2527          */
2528         writel(0, &sdr_scc_mgr->update);
2529
2530         if ((dq_margin < 0) || (dqs_margin < 0))
2531                 return -EINVAL;
2532
2533         return 0;
2534 }
2535
2536 /**
2537  * rw_mgr_mem_calibrate_guaranteed_write() - Perform guaranteed write into the device
2538  * @rw_group:   Read/Write Group
2539  * @phase:      DQ/DQS phase
2540  *
2541  * Because initially no communication ca be reliably performed with the memory
2542  * device, the sequencer uses a guaranteed write mechanism to write data into
2543  * the memory device.
2544  */
2545 static int rw_mgr_mem_calibrate_guaranteed_write(const u32 rw_group,
2546                                                  const u32 phase)
2547 {
2548         int ret;
2549
2550         /* Set a particular DQ/DQS phase. */
2551         scc_mgr_set_dqdqs_output_phase_all_ranks(rw_group, phase);
2552
2553         debug_cond(DLEVEL == 1, "%s:%d guaranteed write: g=%u p=%u\n",
2554                    __func__, __LINE__, rw_group, phase);
2555
2556         /*
2557          * Altera EMI_RM 2015.05.04 :: Figure 1-25
2558          * Load up the patterns used by read calibration using the
2559          * current DQDQS phase.
2560          */
2561         rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2562
2563         if (gbl->phy_debug_mode_flags & PHY_DEBUG_DISABLE_GUARANTEED_READ)
2564                 return 0;
2565
2566         /*
2567          * Altera EMI_RM 2015.05.04 :: Figure 1-26
2568          * Back-to-Back reads of the patterns used for calibration.
2569          */
2570         ret = rw_mgr_mem_calibrate_read_test_patterns(0, rw_group, 1);
2571         if (ret)
2572                 debug_cond(DLEVEL == 1,
2573                            "%s:%d Guaranteed read test failed: g=%u p=%u\n",
2574                            __func__, __LINE__, rw_group, phase);
2575         return ret;
2576 }
2577
2578 /**
2579  * rw_mgr_mem_calibrate_dqs_enable_calibration() - DQS Enable Calibration
2580  * @rw_group:   Read/Write Group
2581  * @test_bgn:   Rank at which the test begins
2582  *
2583  * DQS enable calibration ensures reliable capture of the DQ signal without
2584  * glitches on the DQS line.
2585  */
2586 static int rw_mgr_mem_calibrate_dqs_enable_calibration(const u32 rw_group,
2587                                                        const u32 test_bgn)
2588 {
2589         /*
2590          * Altera EMI_RM 2015.05.04 :: Figure 1-27
2591          * DQS and DQS Eanble Signal Relationships.
2592          */
2593
2594         /* We start at zero, so have one less dq to devide among */
2595         const u32 delay_step = IO_IO_IN_DELAY_MAX /
2596                                (RW_MGR_MEM_DQ_PER_READ_DQS - 1);
2597         int ret;
2598         u32 i, p, d, r;
2599
2600         debug("%s:%d (%u,%u)\n", __func__, __LINE__, rw_group, test_bgn);
2601
2602         /* Try different dq_in_delays since the DQ path is shorter than DQS. */
2603         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
2604              r += NUM_RANKS_PER_SHADOW_REG) {
2605                 for (i = 0, p = test_bgn, d = 0;
2606                      i < RW_MGR_MEM_DQ_PER_READ_DQS;
2607                      i++, p++, d += delay_step) {
2608                         debug_cond(DLEVEL == 1,
2609                                    "%s:%d: g=%u r=%u i=%u p=%u d=%u\n",
2610                                    __func__, __LINE__, rw_group, r, i, p, d);
2611
2612                         scc_mgr_set_dq_in_delay(p, d);
2613                         scc_mgr_load_dq(p);
2614                 }
2615
2616                 writel(0, &sdr_scc_mgr->update);
2617         }
2618
2619         /*
2620          * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different
2621          * dq_in_delay values
2622          */
2623         ret = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(rw_group);
2624
2625         debug_cond(DLEVEL == 1,
2626                    "%s:%d: g=%u found=%u; Reseting delay chain to zero\n",
2627                    __func__, __LINE__, rw_group, !ret);
2628
2629         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
2630              r += NUM_RANKS_PER_SHADOW_REG) {
2631                 scc_mgr_apply_group_dq_in_delay(test_bgn, 0);
2632                 writel(0, &sdr_scc_mgr->update);
2633         }
2634
2635         return ret;
2636 }
2637
2638 /**
2639  * rw_mgr_mem_calibrate_dq_dqs_centering() - Centering DQ/DQS
2640  * @rw_group:           Read/Write Group
2641  * @test_bgn:           Rank at which the test begins
2642  * @use_read_test:      Perform a read test
2643  * @update_fom:         Update FOM
2644  *
2645  * The centerin DQ/DQS stage attempts to align DQ and DQS signals on reads
2646  * within a group.
2647  */
2648 static int
2649 rw_mgr_mem_calibrate_dq_dqs_centering(const u32 rw_group, const u32 test_bgn,
2650                                       const int use_read_test,
2651                                       const int update_fom)
2652
2653 {
2654         int ret, grp_calibrated;
2655         u32 rank_bgn, sr;
2656
2657         /*
2658          * Altera EMI_RM 2015.05.04 :: Figure 1-28
2659          * Read per-bit deskew can be done on a per shadow register basis.
2660          */
2661         grp_calibrated = 1;
2662         for (rank_bgn = 0, sr = 0;
2663              rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
2664              rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) {
2665                 /* Check if this set of ranks should be skipped entirely. */
2666                 if (param->skip_shadow_regs[sr])
2667                         continue;
2668
2669                 ret = rw_mgr_mem_calibrate_vfifo_center(rank_bgn, rw_group,
2670                                                         test_bgn,
2671                                                         use_read_test,
2672                                                         update_fom);
2673                 if (!ret)
2674                         continue;
2675
2676                 grp_calibrated = 0;
2677         }
2678
2679         if (!grp_calibrated)
2680                 return -EIO;
2681
2682         return 0;
2683 }
2684
2685 /**
2686  * rw_mgr_mem_calibrate_vfifo() - Calibrate the read valid prediction FIFO
2687  * @rw_group:           Read/Write Group
2688  * @test_bgn:           Rank at which the test begins
2689  *
2690  * Stage 1: Calibrate the read valid prediction FIFO.
2691  *
2692  * This function implements UniPHY calibration Stage 1, as explained in
2693  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2694  *
2695  * - read valid prediction will consist of finding:
2696  *   - DQS enable phase and DQS enable delay (DQS Enable Calibration)
2697  *   - DQS input phase  and DQS input delay (DQ/DQS Centering)
2698  *  - we also do a per-bit deskew on the DQ lines.
2699  */
2700 static int rw_mgr_mem_calibrate_vfifo(const u32 rw_group, const u32 test_bgn)
2701 {
2702         uint32_t p, d;
2703         uint32_t dtaps_per_ptap;
2704         uint32_t failed_substage;
2705
2706         int ret;
2707
2708         debug("%s:%d: %u %u\n", __func__, __LINE__, rw_group, test_bgn);
2709
2710         /* Update info for sims */
2711         reg_file_set_group(rw_group);
2712         reg_file_set_stage(CAL_STAGE_VFIFO);
2713         reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ);
2714
2715         failed_substage = CAL_SUBSTAGE_GUARANTEED_READ;
2716
2717         /* USER Determine number of delay taps for each phase tap. */
2718         dtaps_per_ptap = DIV_ROUND_UP(IO_DELAY_PER_OPA_TAP,
2719                                       IO_DELAY_PER_DQS_EN_DCHAIN_TAP) - 1;
2720
2721         for (d = 0; d <= dtaps_per_ptap; d += 2) {
2722                 /*
2723                  * In RLDRAMX we may be messing the delay of pins in
2724                  * the same write rw_group but outside of the current read
2725                  * the rw_group, but that's ok because we haven't calibrated
2726                  * output side yet.
2727                  */
2728                 if (d > 0) {
2729                         scc_mgr_apply_group_all_out_delay_add_all_ranks(
2730                                                                 rw_group, d);
2731                 }
2732
2733                 for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX; p++) {
2734                         /* 1) Guaranteed Write */
2735                         ret = rw_mgr_mem_calibrate_guaranteed_write(rw_group, p);
2736                         if (ret)
2737                                 break;
2738
2739                         /* 2) DQS Enable Calibration */
2740                         ret = rw_mgr_mem_calibrate_dqs_enable_calibration(rw_group,
2741                                                                           test_bgn);
2742                         if (ret) {
2743                                 failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE;
2744                                 continue;
2745                         }
2746
2747                         /* 3) Centering DQ/DQS */
2748                         /*
2749                          * If doing read after write calibration, do not update
2750                          * FOM now. Do it then.
2751                          */
2752                         ret = rw_mgr_mem_calibrate_dq_dqs_centering(rw_group,
2753                                                                 test_bgn, 1, 0);
2754                         if (ret) {
2755                                 failed_substage = CAL_SUBSTAGE_VFIFO_CENTER;
2756                                 continue;
2757                         }
2758
2759                         /* All done. */
2760                         goto cal_done_ok;
2761                 }
2762         }
2763
2764         /* Calibration Stage 1 failed. */
2765         set_failing_group_stage(rw_group, CAL_STAGE_VFIFO, failed_substage);
2766         return 0;
2767
2768         /* Calibration Stage 1 completed OK. */
2769 cal_done_ok:
2770         /*
2771          * Reset the delay chains back to zero if they have moved > 1
2772          * (check for > 1 because loop will increase d even when pass in
2773          * first case).
2774          */
2775         if (d > 2)
2776                 scc_mgr_zero_group(rw_group, 1);
2777
2778         return 1;
2779 }
2780
2781 /**
2782  * rw_mgr_mem_calibrate_vfifo_end() - DQ/DQS Centering.
2783  * @rw_group:           Read/Write Group
2784  * @test_bgn:           Rank at which the test begins
2785  *
2786  * Stage 3: DQ/DQS Centering.
2787  *
2788  * This function implements UniPHY calibration Stage 3, as explained in
2789  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2790  */
2791 static int rw_mgr_mem_calibrate_vfifo_end(const u32 rw_group,
2792                                           const u32 test_bgn)
2793 {
2794         int ret;
2795
2796         debug("%s:%d %u %u", __func__, __LINE__, rw_group, test_bgn);
2797
2798         /* Update info for sims. */
2799         reg_file_set_group(rw_group);
2800         reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES);
2801         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
2802
2803         ret = rw_mgr_mem_calibrate_dq_dqs_centering(rw_group, test_bgn, 0, 1);
2804         if (ret)
2805                 set_failing_group_stage(rw_group,
2806                                         CAL_STAGE_VFIFO_AFTER_WRITES,
2807                                         CAL_SUBSTAGE_VFIFO_CENTER);
2808         return ret;
2809 }
2810
2811 /**
2812  * rw_mgr_mem_calibrate_lfifo() - Minimize latency
2813  *
2814  * Stage 4: Minimize latency.
2815  *
2816  * This function implements UniPHY calibration Stage 4, as explained in
2817  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2818  * Calibrate LFIFO to find smallest read latency.
2819  */
2820 static uint32_t rw_mgr_mem_calibrate_lfifo(void)
2821 {
2822         int found_one = 0;
2823
2824         debug("%s:%d\n", __func__, __LINE__);
2825
2826         /* Update info for sims. */
2827         reg_file_set_stage(CAL_STAGE_LFIFO);
2828         reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY);
2829
2830         /* Load up the patterns used by read calibration for all ranks */
2831         rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2832
2833         do {
2834                 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2835                 debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u",
2836                            __func__, __LINE__, gbl->curr_read_lat);
2837
2838                 if (!rw_mgr_mem_calibrate_read_test_all_ranks(0, NUM_READ_TESTS,
2839                                                               PASS_ALL_BITS, 1))
2840                         break;
2841
2842                 found_one = 1;
2843                 /*
2844                  * Reduce read latency and see if things are
2845                  * working correctly.
2846                  */
2847                 gbl->curr_read_lat--;
2848         } while (gbl->curr_read_lat > 0);
2849
2850         /* Reset the fifos to get pointers to known state. */
2851         writel(0, &phy_mgr_cmd->fifo_reset);
2852
2853         if (found_one) {
2854                 /* Add a fudge factor to the read latency that was determined */
2855                 gbl->curr_read_lat += 2;
2856                 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2857                 debug_cond(DLEVEL == 2,
2858                            "%s:%d lfifo: success: using read_lat=%u\n",
2859                            __func__, __LINE__, gbl->curr_read_lat);
2860         } else {
2861                 set_failing_group_stage(0xff, CAL_STAGE_LFIFO,
2862                                         CAL_SUBSTAGE_READ_LATENCY);
2863
2864                 debug_cond(DLEVEL == 2,
2865                            "%s:%d lfifo: failed at initial read_lat=%u\n",
2866                            __func__, __LINE__, gbl->curr_read_lat);
2867         }
2868
2869         return found_one;
2870 }
2871
2872 /**
2873  * search_window() - Search for the/part of the window with DM/DQS shift
2874  * @search_dm:          If 1, search for the DM shift, if 0, search for DQS shift
2875  * @rank_bgn:           Rank number
2876  * @write_group:        Write Group
2877  * @bgn_curr:           Current window begin
2878  * @end_curr:           Current window end
2879  * @bgn_best:           Current best window begin
2880  * @end_best:           Current best window end
2881  * @win_best:           Size of the best window
2882  * @new_dqs:            New DQS value (only applicable if search_dm = 0).
2883  *
2884  * Search for the/part of the window with DM/DQS shift.
2885  */
2886 static void search_window(const int search_dm,
2887                           const u32 rank_bgn, const u32 write_group,
2888                           int *bgn_curr, int *end_curr, int *bgn_best,
2889                           int *end_best, int *win_best, int new_dqs)
2890 {
2891         u32 bit_chk;
2892         const int max = IO_IO_OUT1_DELAY_MAX - new_dqs;
2893         int d, di;
2894
2895         /* Search for the/part of the window with DM/DQS shift. */
2896         for (di = max; di >= 0; di -= DELTA_D) {
2897                 if (search_dm) {
2898                         d = di;
2899                         scc_mgr_apply_group_dm_out1_delay(d);
2900                 } else {
2901                         /* For DQS, we go from 0...max */
2902                         d = max - di;
2903                         /*
2904                          * Note: This only shifts DQS, so are we limiting ourselve to
2905                          * width of DQ unnecessarily.
2906                          */
2907                         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
2908                                                                 d + new_dqs);
2909                 }
2910
2911                 writel(0, &sdr_scc_mgr->update);
2912
2913                 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1,
2914                                                     PASS_ALL_BITS, &bit_chk,
2915                                                     0)) {
2916                         /* Set current end of the window. */
2917                         *end_curr = search_dm ? -d : d;
2918
2919                         /*
2920                          * If a starting edge of our window has not been seen
2921                          * this is our current start of the DM window.
2922                          */
2923                         if (*bgn_curr == IO_IO_OUT1_DELAY_MAX + 1)
2924                                 *bgn_curr = search_dm ? -d : d;
2925
2926                         /*
2927                          * If current window is bigger than best seen.
2928                          * Set best seen to be current window.
2929                          */
2930                         if ((*end_curr - *bgn_curr + 1) > *win_best) {
2931                                 *win_best = *end_curr - *bgn_curr + 1;
2932                                 *bgn_best = *bgn_curr;
2933                                 *end_best = *end_curr;
2934                         }
2935                 } else {
2936                         /* We just saw a failing test. Reset temp edge. */
2937                         *bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2938                         *end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2939
2940                         /* Early exit is only applicable to DQS. */
2941                         if (search_dm)
2942                                 continue;
2943
2944                         /*
2945                          * Early exit optimization: if the remaining delay
2946                          * chain space is less than already seen largest
2947                          * window we can exit.
2948                          */
2949                         if (*win_best - 1 > IO_IO_OUT1_DELAY_MAX - new_dqs - d)
2950                                 break;
2951                 }
2952         }
2953 }
2954
2955 /*
2956  * rw_mgr_mem_calibrate_writes_center() - Center all windows
2957  * @rank_bgn:           Rank number
2958  * @write_group:        Write group
2959  * @test_bgn:           Rank at which the test begins
2960  *
2961  * Center all windows. Do per-bit-deskew to possibly increase size of
2962  * certain windows.
2963  */
2964 static int
2965 rw_mgr_mem_calibrate_writes_center(const u32 rank_bgn, const u32 write_group,
2966                                    const u32 test_bgn)
2967 {
2968         int i;
2969         u32 sticky_bit_chk;
2970         u32 min_index;
2971         int left_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
2972         int right_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
2973         int mid;
2974         int mid_min, orig_mid_min;
2975         int new_dqs, start_dqs;
2976         int dq_margin, dqs_margin, dm_margin;
2977         int bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2978         int end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2979         int bgn_best = IO_IO_OUT1_DELAY_MAX + 1;
2980         int end_best = IO_IO_OUT1_DELAY_MAX + 1;
2981         int win_best = 0;
2982
2983         int ret;
2984
2985         debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn);
2986
2987         dm_margin = 0;
2988
2989         start_dqs = readl((SDR_PHYGRP_SCCGRP_ADDRESS |
2990                           SCC_MGR_IO_OUT1_DELAY_OFFSET) +
2991                           (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2));
2992
2993         /* Per-bit deskew. */
2994
2995         /*
2996          * Set the left and right edge of each bit to an illegal value.
2997          * Use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value.
2998          */
2999         sticky_bit_chk = 0;
3000         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
3001                 left_edge[i]  = IO_IO_OUT1_DELAY_MAX + 1;
3002                 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1;
3003         }
3004
3005         /* Search for the left edge of the window for each bit. */
3006         search_left_edge(1, rank_bgn, write_group, 0, test_bgn,
3007                          &sticky_bit_chk,
3008                          left_edge, right_edge, 0);
3009
3010         /* Search for the right edge of the window for each bit. */
3011         ret = search_right_edge(1, rank_bgn, write_group, 0,
3012                                 start_dqs, 0,
3013                                 &sticky_bit_chk,
3014                                 left_edge, right_edge, 0);
3015         if (ret) {
3016                 set_failing_group_stage(test_bgn + ret - 1, CAL_STAGE_WRITES,
3017                                         CAL_SUBSTAGE_WRITES_CENTER);
3018                 return -EINVAL;
3019         }
3020
3021         min_index = get_window_mid_index(1, left_edge, right_edge, &mid_min);
3022
3023         /* Determine the amount we can change DQS (which is -mid_min). */
3024         orig_mid_min = mid_min;
3025         new_dqs = start_dqs;
3026         mid_min = 0;
3027         debug_cond(DLEVEL == 1,
3028                    "%s:%d write_center: start_dqs=%d new_dqs=%d mid_min=%d\n",
3029                    __func__, __LINE__, start_dqs, new_dqs, mid_min);
3030
3031         /* Add delay to bring centre of all DQ windows to the same "level". */
3032         center_dq_windows(1, left_edge, right_edge, mid_min, orig_mid_min,
3033                           min_index, 0, &dq_margin, &dqs_margin);
3034
3035         /* Move DQS */
3036         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
3037         writel(0, &sdr_scc_mgr->update);
3038
3039         /* Centre DM */
3040         debug_cond(DLEVEL == 2, "%s:%d write_center: DM\n", __func__, __LINE__);
3041
3042         /*
3043          * Set the left and right edge of each bit to an illegal value.
3044          * Use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value.
3045          */
3046         left_edge[0]  = IO_IO_OUT1_DELAY_MAX + 1;
3047         right_edge[0] = IO_IO_OUT1_DELAY_MAX + 1;
3048
3049         /* Search for the/part of the window with DM shift. */
3050         search_window(1, rank_bgn, write_group, &bgn_curr, &end_curr,
3051                       &bgn_best, &end_best, &win_best, 0);
3052
3053         /* Reset DM delay chains to 0. */
3054         scc_mgr_apply_group_dm_out1_delay(0);
3055
3056         /*
3057          * Check to see if the current window nudges up aganist 0 delay.
3058          * If so we need to continue the search by shifting DQS otherwise DQS
3059          * search begins as a new search.
3060          */
3061         if (end_curr != 0) {
3062                 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
3063                 end_curr = IO_IO_OUT1_DELAY_MAX + 1;
3064         }
3065
3066         /* Search for the/part of the window with DQS shifts. */
3067         search_window(0, rank_bgn, write_group, &bgn_curr, &end_curr,
3068                       &bgn_best, &end_best, &win_best, new_dqs);
3069
3070         /* Assign left and right edge for cal and reporting. */
3071         left_edge[0] = -1 * bgn_best;
3072         right_edge[0] = end_best;
3073
3074         debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n",
3075                    __func__, __LINE__, left_edge[0], right_edge[0]);
3076
3077         /* Move DQS (back to orig). */
3078         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
3079
3080         /* Move DM */
3081
3082         /* Find middle of window for the DM bit. */
3083         mid = (left_edge[0] - right_edge[0]) / 2;
3084
3085         /* Only move right, since we are not moving DQS/DQ. */
3086         if (mid < 0)
3087                 mid = 0;
3088
3089         /* dm_marign should fail if we never find a window. */
3090         if (win_best == 0)
3091                 dm_margin = -1;
3092         else
3093                 dm_margin = left_edge[0] - mid;
3094
3095         scc_mgr_apply_group_dm_out1_delay(mid);
3096         writel(0, &sdr_scc_mgr->update);
3097
3098         debug_cond(DLEVEL == 2,
3099                    "%s:%d dm_calib: left=%d right=%d mid=%d dm_margin=%d\n",
3100                    __func__, __LINE__, left_edge[0], right_edge[0],
3101                    mid, dm_margin);
3102         /* Export values. */
3103         gbl->fom_out += dq_margin + dqs_margin;
3104
3105         debug_cond(DLEVEL == 2,
3106                    "%s:%d write_center: dq_margin=%d dqs_margin=%d dm_margin=%d\n",
3107                    __func__, __LINE__, dq_margin, dqs_margin, dm_margin);
3108
3109         /*
3110          * Do not remove this line as it makes sure all of our
3111          * decisions have been applied.
3112          */
3113         writel(0, &sdr_scc_mgr->update);
3114
3115         if ((dq_margin < 0) || (dqs_margin < 0) || (dm_margin < 0))
3116                 return -EINVAL;
3117
3118         return 0;
3119 }
3120
3121 /**
3122  * rw_mgr_mem_calibrate_writes() - Write Calibration Part One
3123  * @rank_bgn:           Rank number
3124  * @group:              Read/Write Group
3125  * @test_bgn:           Rank at which the test begins
3126  *
3127  * Stage 2: Write Calibration Part One.
3128  *
3129  * This function implements UniPHY calibration Stage 2, as explained in
3130  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
3131  */
3132 static int rw_mgr_mem_calibrate_writes(const u32 rank_bgn, const u32 group,
3133                                        const u32 test_bgn)
3134 {
3135         int ret;
3136
3137         /* Update info for sims */
3138         debug("%s:%d %u %u\n", __func__, __LINE__, group, test_bgn);
3139
3140         reg_file_set_group(group);
3141         reg_file_set_stage(CAL_STAGE_WRITES);
3142         reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER);
3143
3144         ret = rw_mgr_mem_calibrate_writes_center(rank_bgn, group, test_bgn);
3145         if (ret)
3146                 set_failing_group_stage(group, CAL_STAGE_WRITES,
3147                                         CAL_SUBSTAGE_WRITES_CENTER);
3148
3149         return ret;
3150 }
3151
3152 /**
3153  * mem_precharge_and_activate() - Precharge all banks and activate
3154  *
3155  * Precharge all banks and activate row 0 in bank "000..." and bank "111...".
3156  */
3157 static void mem_precharge_and_activate(void)
3158 {
3159         int r;
3160
3161         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
3162                 /* Test if the rank should be skipped. */
3163                 if (param->skip_ranks[r])
3164                         continue;
3165
3166                 /* Set rank. */
3167                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
3168
3169                 /* Precharge all banks. */
3170                 writel(RW_MGR_PRECHARGE_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3171                                              RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3172
3173                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0);
3174                 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT1,
3175                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
3176
3177                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1);
3178                 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT2,
3179                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
3180
3181                 /* Activate rows. */
3182                 writel(RW_MGR_ACTIVATE_0_AND_1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3183                                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3184         }
3185 }
3186
3187 /**
3188  * mem_init_latency() - Configure memory RLAT and WLAT settings
3189  *
3190  * Configure memory RLAT and WLAT parameters.
3191  */
3192 static void mem_init_latency(void)
3193 {
3194         /*
3195          * For AV/CV, LFIFO is hardened and always runs at full rate
3196          * so max latency in AFI clocks, used here, is correspondingly
3197          * smaller.
3198          */
3199         const u32 max_latency = (1 << MAX_LATENCY_COUNT_WIDTH) - 1;
3200         u32 rlat, wlat;
3201
3202         debug("%s:%d\n", __func__, __LINE__);
3203
3204         /*
3205          * Read in write latency.
3206          * WL for Hard PHY does not include additive latency.
3207          */
3208         wlat = readl(&data_mgr->t_wl_add);
3209         wlat += readl(&data_mgr->mem_t_add);
3210
3211         gbl->rw_wl_nop_cycles = wlat - 1;
3212
3213         /* Read in readl latency. */
3214         rlat = readl(&data_mgr->t_rl_add);
3215
3216         /* Set a pretty high read latency initially. */
3217         gbl->curr_read_lat = rlat + 16;
3218         if (gbl->curr_read_lat > max_latency)
3219                 gbl->curr_read_lat = max_latency;
3220
3221         writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3222
3223         /* Advertise write latency. */
3224         writel(wlat, &phy_mgr_cfg->afi_wlat);
3225 }
3226
3227 /**
3228  * @mem_skip_calibrate() - Set VFIFO and LFIFO to instant-on settings
3229  *
3230  * Set VFIFO and LFIFO to instant-on settings in skip calibration mode.
3231  */
3232 static void mem_skip_calibrate(void)
3233 {
3234         uint32_t vfifo_offset;
3235         uint32_t i, j, r;
3236
3237         debug("%s:%d\n", __func__, __LINE__);
3238         /* Need to update every shadow register set used by the interface */
3239         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
3240              r += NUM_RANKS_PER_SHADOW_REG) {
3241                 /*
3242                  * Set output phase alignment settings appropriate for
3243                  * skip calibration.
3244                  */
3245                 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3246                         scc_mgr_set_dqs_en_phase(i, 0);
3247 #if IO_DLL_CHAIN_LENGTH == 6
3248                         scc_mgr_set_dqdqs_output_phase(i, 6);
3249 #else
3250                         scc_mgr_set_dqdqs_output_phase(i, 7);
3251 #endif
3252                         /*
3253                          * Case:33398
3254                          *
3255                          * Write data arrives to the I/O two cycles before write
3256                          * latency is reached (720 deg).
3257                          *   -> due to bit-slip in a/c bus
3258                          *   -> to allow board skew where dqs is longer than ck
3259                          *      -> how often can this happen!?
3260                          *      -> can claim back some ptaps for high freq
3261                          *       support if we can relax this, but i digress...
3262                          *
3263                          * The write_clk leads mem_ck by 90 deg
3264                          * The minimum ptap of the OPA is 180 deg
3265                          * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay
3266                          * The write_clk is always delayed by 2 ptaps
3267                          *
3268                          * Hence, to make DQS aligned to CK, we need to delay
3269                          * DQS by:
3270                          *    (720 - 90 - 180 - 2 * (360 / IO_DLL_CHAIN_LENGTH))
3271                          *
3272                          * Dividing the above by (360 / IO_DLL_CHAIN_LENGTH)
3273                          * gives us the number of ptaps, which simplies to:
3274                          *
3275                          *    (1.25 * IO_DLL_CHAIN_LENGTH - 2)
3276                          */
3277                         scc_mgr_set_dqdqs_output_phase(i,
3278                                         1.25 * IO_DLL_CHAIN_LENGTH - 2);
3279                 }
3280                 writel(0xff, &sdr_scc_mgr->dqs_ena);
3281                 writel(0xff, &sdr_scc_mgr->dqs_io_ena);
3282
3283                 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
3284                         writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3285                                   SCC_MGR_GROUP_COUNTER_OFFSET);
3286                 }
3287                 writel(0xff, &sdr_scc_mgr->dq_ena);
3288                 writel(0xff, &sdr_scc_mgr->dm_ena);
3289                 writel(0, &sdr_scc_mgr->update);
3290         }
3291
3292         /* Compensate for simulation model behaviour */
3293         for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3294                 scc_mgr_set_dqs_bus_in_delay(i, 10);
3295                 scc_mgr_load_dqs(i);
3296         }
3297         writel(0, &sdr_scc_mgr->update);
3298
3299         /*
3300          * ArriaV has hard FIFOs that can only be initialized by incrementing
3301          * in sequencer.
3302          */
3303         vfifo_offset = CALIB_VFIFO_OFFSET;
3304         for (j = 0; j < vfifo_offset; j++)
3305                 writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy);
3306         writel(0, &phy_mgr_cmd->fifo_reset);
3307
3308         /*
3309          * For Arria V and Cyclone V with hard LFIFO, we get the skip-cal
3310          * setting from generation-time constant.
3311          */
3312         gbl->curr_read_lat = CALIB_LFIFO_OFFSET;
3313         writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3314 }
3315
3316 /**
3317  * mem_calibrate() - Memory calibration entry point.
3318  *
3319  * Perform memory calibration.
3320  */
3321 static uint32_t mem_calibrate(void)
3322 {
3323         uint32_t i;
3324         uint32_t rank_bgn, sr;
3325         uint32_t write_group, write_test_bgn;
3326         uint32_t read_group, read_test_bgn;
3327         uint32_t run_groups, current_run;
3328         uint32_t failing_groups = 0;
3329         uint32_t group_failed = 0;
3330
3331         const u32 rwdqs_ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
3332                                 RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
3333
3334         debug("%s:%d\n", __func__, __LINE__);
3335
3336         /* Initialize the data settings */
3337         gbl->error_substage = CAL_SUBSTAGE_NIL;
3338         gbl->error_stage = CAL_STAGE_NIL;
3339         gbl->error_group = 0xff;
3340         gbl->fom_in = 0;
3341         gbl->fom_out = 0;
3342
3343         /* Initialize WLAT and RLAT. */
3344         mem_init_latency();
3345
3346         /* Initialize bit slips. */
3347         mem_precharge_and_activate();
3348
3349         for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3350                 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3351                           SCC_MGR_GROUP_COUNTER_OFFSET);
3352                 /* Only needed once to set all groups, pins, DQ, DQS, DM. */
3353                 if (i == 0)
3354                         scc_mgr_set_hhp_extras();
3355
3356                 scc_set_bypass_mode(i);
3357         }
3358
3359         /* Calibration is skipped. */
3360         if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) {
3361                 /*
3362                  * Set VFIFO and LFIFO to instant-on settings in skip
3363                  * calibration mode.
3364                  */
3365                 mem_skip_calibrate();
3366
3367                 /*
3368                  * Do not remove this line as it makes sure all of our
3369                  * decisions have been applied.
3370                  */
3371                 writel(0, &sdr_scc_mgr->update);
3372                 return 1;
3373         }
3374
3375         /* Calibration is not skipped. */
3376         for (i = 0; i < NUM_CALIB_REPEAT; i++) {
3377                 /*
3378                  * Zero all delay chain/phase settings for all
3379                  * groups and all shadow register sets.
3380                  */
3381                 scc_mgr_zero_all();
3382
3383                 run_groups = ~param->skip_groups;
3384
3385                 for (write_group = 0, write_test_bgn = 0; write_group
3386                         < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; write_group++,
3387                         write_test_bgn += RW_MGR_MEM_DQ_PER_WRITE_DQS) {
3388
3389                         /* Initialize the group failure */
3390                         group_failed = 0;
3391
3392                         current_run = run_groups & ((1 <<
3393                                 RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1);
3394                         run_groups = run_groups >>
3395                                 RW_MGR_NUM_DQS_PER_WRITE_GROUP;
3396
3397                         if (current_run == 0)
3398                                 continue;
3399
3400                         writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS |
3401                                             SCC_MGR_GROUP_COUNTER_OFFSET);
3402                         scc_mgr_zero_group(write_group, 0);
3403
3404                         for (read_group = write_group * rwdqs_ratio,
3405                              read_test_bgn = 0;
3406                              read_group < (write_group + 1) * rwdqs_ratio;
3407                              read_group++,
3408                              read_test_bgn += RW_MGR_MEM_DQ_PER_READ_DQS) {
3409                                 if (STATIC_CALIB_STEPS & CALIB_SKIP_VFIFO)
3410                                         continue;
3411
3412                                 /* Calibrate the VFIFO */
3413                                 if (rw_mgr_mem_calibrate_vfifo(read_group,
3414                                                                read_test_bgn))
3415                                         continue;
3416
3417                                 if (!(gbl->phy_debug_mode_flags & PHY_DEBUG_SWEEP_ALL_GROUPS))
3418                                         return 0;
3419
3420                                 /* The group failed, we're done. */
3421                                 goto grp_failed;
3422                         }
3423
3424                         /* Calibrate the output side */
3425                         for (rank_bgn = 0, sr = 0;
3426                              rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
3427                              rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) {
3428                                 if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES)
3429                                         continue;
3430
3431                                 /* Not needed in quick mode! */
3432                                 if (STATIC_CALIB_STEPS & CALIB_SKIP_DELAY_SWEEPS)
3433                                         continue;
3434
3435                                 /*
3436                                  * Determine if this set of ranks
3437                                  * should be skipped entirely.
3438                                  */
3439                                 if (param->skip_shadow_regs[sr])
3440                                         continue;
3441
3442                                 /* Calibrate WRITEs */
3443                                 if (!rw_mgr_mem_calibrate_writes(rank_bgn,
3444                                                 write_group, write_test_bgn))
3445                                         continue;
3446
3447                                 group_failed = 1;
3448                                 if (!(gbl->phy_debug_mode_flags & PHY_DEBUG_SWEEP_ALL_GROUPS))
3449                                         return 0;
3450                         }
3451
3452                         /* Some group failed, we're done. */
3453                         if (group_failed)
3454                                 goto grp_failed;
3455
3456                         for (read_group = write_group * rwdqs_ratio,
3457                              read_test_bgn = 0;
3458                              read_group < (write_group + 1) * rwdqs_ratio;
3459                              read_group++,
3460                              read_test_bgn += RW_MGR_MEM_DQ_PER_READ_DQS) {
3461                                 if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES)
3462                                         continue;
3463
3464                                 if (!rw_mgr_mem_calibrate_vfifo_end(read_group,
3465                                                                 read_test_bgn))
3466                                         continue;
3467
3468                                 if (!(gbl->phy_debug_mode_flags & PHY_DEBUG_SWEEP_ALL_GROUPS))
3469                                         return 0;
3470
3471                                 /* The group failed, we're done. */
3472                                 goto grp_failed;
3473                         }
3474
3475                         /* No group failed, continue as usual. */
3476                         continue;
3477
3478 grp_failed:             /* A group failed, increment the counter. */
3479                         failing_groups++;
3480                 }
3481
3482                 /*
3483                  * USER If there are any failing groups then report
3484                  * the failure.
3485                  */
3486                 if (failing_groups != 0)
3487                         return 0;
3488
3489                 if (STATIC_CALIB_STEPS & CALIB_SKIP_LFIFO)
3490                         continue;
3491
3492                 /*
3493                  * If we're skipping groups as part of debug,
3494                  * don't calibrate LFIFO.
3495                  */
3496                 if (param->skip_groups != 0)
3497                         continue;
3498
3499                 /* Calibrate the LFIFO */
3500                 if (!rw_mgr_mem_calibrate_lfifo())
3501                         return 0;
3502         }
3503
3504         /*
3505          * Do not remove this line as it makes sure all of our decisions
3506          * have been applied.
3507          */
3508         writel(0, &sdr_scc_mgr->update);
3509         return 1;
3510 }
3511
3512 /**
3513  * run_mem_calibrate() - Perform memory calibration
3514  *
3515  * This function triggers the entire memory calibration procedure.
3516  */
3517 static int run_mem_calibrate(void)
3518 {
3519         int pass;
3520
3521         debug("%s:%d\n", __func__, __LINE__);
3522
3523         /* Reset pass/fail status shown on afi_cal_success/fail */
3524         writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status);
3525
3526         /* Stop tracking manager. */
3527         clrbits_le32(&sdr_ctrl->ctrl_cfg, 1 << 22);
3528
3529         phy_mgr_initialize();
3530         rw_mgr_mem_initialize();
3531
3532         /* Perform the actual memory calibration. */
3533         pass = mem_calibrate();
3534
3535         mem_precharge_and_activate();
3536         writel(0, &phy_mgr_cmd->fifo_reset);
3537
3538         /* Handoff. */
3539         rw_mgr_mem_handoff();
3540         /*
3541          * In Hard PHY this is a 2-bit control:
3542          * 0: AFI Mux Select
3543          * 1: DDIO Mux Select
3544          */
3545         writel(0x2, &phy_mgr_cfg->mux_sel);
3546
3547         /* Start tracking manager. */
3548         setbits_le32(&sdr_ctrl->ctrl_cfg, 1 << 22);
3549
3550         return pass;
3551 }
3552
3553 /**
3554  * debug_mem_calibrate() - Report result of memory calibration
3555  * @pass:       Value indicating whether calibration passed or failed
3556  *
3557  * This function reports the results of the memory calibration
3558  * and writes debug information into the register file.
3559  */
3560 static void debug_mem_calibrate(int pass)
3561 {
3562         uint32_t debug_info;
3563
3564         if (pass) {
3565                 printf("%s: CALIBRATION PASSED\n", __FILE__);
3566
3567                 gbl->fom_in /= 2;
3568                 gbl->fom_out /= 2;
3569
3570                 if (gbl->fom_in > 0xff)
3571                         gbl->fom_in = 0xff;
3572
3573                 if (gbl->fom_out > 0xff)
3574                         gbl->fom_out = 0xff;
3575
3576                 /* Update the FOM in the register file */
3577                 debug_info = gbl->fom_in;
3578                 debug_info |= gbl->fom_out << 8;
3579                 writel(debug_info, &sdr_reg_file->fom);
3580
3581                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3582                 writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status);
3583         } else {
3584                 printf("%s: CALIBRATION FAILED\n", __FILE__);
3585
3586                 debug_info = gbl->error_stage;
3587                 debug_info |= gbl->error_substage << 8;
3588                 debug_info |= gbl->error_group << 16;
3589
3590                 writel(debug_info, &sdr_reg_file->failing_stage);
3591                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3592                 writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status);
3593
3594                 /* Update the failing group/stage in the register file */
3595                 debug_info = gbl->error_stage;
3596                 debug_info |= gbl->error_substage << 8;
3597                 debug_info |= gbl->error_group << 16;
3598                 writel(debug_info, &sdr_reg_file->failing_stage);
3599         }
3600
3601         printf("%s: Calibration complete\n", __FILE__);
3602 }
3603
3604 /**
3605  * hc_initialize_rom_data() - Initialize ROM data
3606  *
3607  * Initialize ROM data.
3608  */
3609 static void hc_initialize_rom_data(void)
3610 {
3611         u32 i, addr;
3612
3613         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET;
3614         for (i = 0; i < ARRAY_SIZE(inst_rom_init); i++)
3615                 writel(inst_rom_init[i], addr + (i << 2));
3616
3617         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET;
3618         for (i = 0; i < ARRAY_SIZE(ac_rom_init); i++)
3619                 writel(ac_rom_init[i], addr + (i << 2));
3620 }
3621
3622 /**
3623  * initialize_reg_file() - Initialize SDR register file
3624  *
3625  * Initialize SDR register file.
3626  */
3627 static void initialize_reg_file(void)
3628 {
3629         /* Initialize the register file with the correct data */
3630         writel(REG_FILE_INIT_SEQ_SIGNATURE, &sdr_reg_file->signature);
3631         writel(0, &sdr_reg_file->debug_data_addr);
3632         writel(0, &sdr_reg_file->cur_stage);
3633         writel(0, &sdr_reg_file->fom);
3634         writel(0, &sdr_reg_file->failing_stage);
3635         writel(0, &sdr_reg_file->debug1);
3636         writel(0, &sdr_reg_file->debug2);
3637 }
3638
3639 /**
3640  * initialize_hps_phy() - Initialize HPS PHY
3641  *
3642  * Initialize HPS PHY.
3643  */
3644 static void initialize_hps_phy(void)
3645 {
3646         uint32_t reg;
3647         /*
3648          * Tracking also gets configured here because it's in the
3649          * same register.
3650          */
3651         uint32_t trk_sample_count = 7500;
3652         uint32_t trk_long_idle_sample_count = (10 << 16) | 100;
3653         /*
3654          * Format is number of outer loops in the 16 MSB, sample
3655          * count in 16 LSB.
3656          */
3657
3658         reg = 0;
3659         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2);
3660         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1);
3661         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1);
3662         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1);
3663         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0);
3664         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1);
3665         /*
3666          * This field selects the intrinsic latency to RDATA_EN/FULL path.
3667          * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles.
3668          */
3669         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0);
3670         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET(
3671                 trk_sample_count);
3672         writel(reg, &sdr_ctrl->phy_ctrl0);
3673
3674         reg = 0;
3675         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET(
3676                 trk_sample_count >>
3677                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH);
3678         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET(
3679                 trk_long_idle_sample_count);
3680         writel(reg, &sdr_ctrl->phy_ctrl1);
3681
3682         reg = 0;
3683         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET(
3684                 trk_long_idle_sample_count >>
3685                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH);
3686         writel(reg, &sdr_ctrl->phy_ctrl2);
3687 }
3688
3689 /**
3690  * initialize_tracking() - Initialize tracking
3691  *
3692  * Initialize the register file with usable initial data.
3693  */
3694 static void initialize_tracking(void)
3695 {
3696         /*
3697          * Initialize the register file with the correct data.
3698          * Compute usable version of value in case we skip full
3699          * computation later.
3700          */
3701         writel(DIV_ROUND_UP(IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP) - 1,
3702                &sdr_reg_file->dtaps_per_ptap);
3703
3704         /* trk_sample_count */
3705         writel(7500, &sdr_reg_file->trk_sample_count);
3706
3707         /* longidle outer loop [15:0] */
3708         writel((10 << 16) | (100 << 0), &sdr_reg_file->trk_longidle);
3709
3710         /*
3711          * longidle sample count [31:24]
3712          * trfc, worst case of 933Mhz 4Gb [23:16]
3713          * trcd, worst case [15:8]
3714          * vfifo wait [7:0]
3715          */
3716         writel((243 << 24) | (14 << 16) | (10 << 8) | (4 << 0),
3717                &sdr_reg_file->delays);
3718
3719         /* mux delay */
3720         writel((RW_MGR_IDLE << 24) | (RW_MGR_ACTIVATE_1 << 16) |
3721                (RW_MGR_SGLE_READ << 8) | (RW_MGR_PRECHARGE_ALL << 0),
3722                &sdr_reg_file->trk_rw_mgr_addr);
3723
3724         writel(RW_MGR_MEM_IF_READ_DQS_WIDTH,
3725                &sdr_reg_file->trk_read_dqs_width);
3726
3727         /* trefi [7:0] */
3728         writel((RW_MGR_REFRESH_ALL << 24) | (1000 << 0),
3729                &sdr_reg_file->trk_rfsh);
3730 }
3731
3732 int sdram_calibration_full(void)
3733 {
3734         struct param_type my_param;
3735         struct gbl_type my_gbl;
3736         uint32_t pass;
3737
3738         memset(&my_param, 0, sizeof(my_param));
3739         memset(&my_gbl, 0, sizeof(my_gbl));
3740
3741         param = &my_param;
3742         gbl = &my_gbl;
3743
3744         /* Set the calibration enabled by default */
3745         gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT;
3746         /*
3747          * Only sweep all groups (regardless of fail state) by default
3748          * Set enabled read test by default.
3749          */
3750 #if DISABLE_GUARANTEED_READ
3751         gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ;
3752 #endif
3753         /* Initialize the register file */
3754         initialize_reg_file();
3755
3756         /* Initialize any PHY CSR */
3757         initialize_hps_phy();
3758
3759         scc_mgr_initialize();
3760
3761         initialize_tracking();
3762
3763         printf("%s: Preparing to start memory calibration\n", __FILE__);
3764
3765         debug("%s:%d\n", __func__, __LINE__);
3766         debug_cond(DLEVEL == 1,
3767                    "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ",
3768                    RW_MGR_MEM_NUMBER_OF_RANKS, RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM,
3769                    RW_MGR_MEM_DQ_PER_READ_DQS, RW_MGR_MEM_DQ_PER_WRITE_DQS,
3770                    RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS,
3771                    RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS);
3772         debug_cond(DLEVEL == 1,
3773                    "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ",
3774                    RW_MGR_MEM_IF_READ_DQS_WIDTH, RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3775                    RW_MGR_MEM_DATA_WIDTH, RW_MGR_MEM_DATA_MASK_WIDTH,
3776                    IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP);
3777         debug_cond(DLEVEL == 1, "dtap_dqsen_delay=%u, dll=%u",
3778                    IO_DELAY_PER_DQS_EN_DCHAIN_TAP, IO_DLL_CHAIN_LENGTH);
3779         debug_cond(DLEVEL == 1, "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ",
3780                    IO_DQS_EN_PHASE_MAX, IO_DQDQS_OUT_PHASE_MAX,
3781                    IO_DQS_EN_DELAY_MAX, IO_DQS_IN_DELAY_MAX);
3782         debug_cond(DLEVEL == 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ",
3783                    IO_IO_IN_DELAY_MAX, IO_IO_OUT1_DELAY_MAX,
3784                    IO_IO_OUT2_DELAY_MAX);
3785         debug_cond(DLEVEL == 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n",
3786                    IO_DQS_IN_RESERVE, IO_DQS_OUT_RESERVE);
3787
3788         hc_initialize_rom_data();
3789
3790         /* update info for sims */
3791         reg_file_set_stage(CAL_STAGE_NIL);
3792         reg_file_set_group(0);
3793
3794         /*
3795          * Load global needed for those actions that require
3796          * some dynamic calibration support.
3797          */
3798         dyn_calib_steps = STATIC_CALIB_STEPS;
3799         /*
3800          * Load global to allow dynamic selection of delay loop settings
3801          * based on calibration mode.
3802          */
3803         if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS))
3804                 skip_delay_mask = 0xff;
3805         else
3806                 skip_delay_mask = 0x0;
3807
3808         pass = run_mem_calibrate();
3809         debug_mem_calibrate(pass);
3810         return pass;
3811 }