]> git.kernelconcepts.de Git - karo-tx-uboot.git/blob - drivers/ddr/altera/sequencer.c
ddr: altera: sequencer: Wrap ac_rom_init and inst_rom_init
[karo-tx-uboot.git] / drivers / ddr / altera / sequencer.c
1 /*
2  * Copyright Altera Corporation (C) 2012-2015
3  *
4  * SPDX-License-Identifier:    BSD-3-Clause
5  */
6
7 #include <common.h>
8 #include <asm/io.h>
9 #include <asm/arch/sdram.h>
10 #include <errno.h>
11 #include "sequencer.h"
12
13 /*
14  * FIXME: This path is temporary until the SDRAM driver gets
15  *        a proper thorough cleanup.
16  */
17 #include "../../../board/altera/socfpga/qts/sequencer_auto.h"
18 #include "../../../board/altera/socfpga/qts/sequencer_defines.h"
19
20 static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs =
21         (struct socfpga_sdr_rw_load_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800);
22
23 static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs =
24         (struct socfpga_sdr_rw_load_jump_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00);
25
26 static struct socfpga_sdr_reg_file *sdr_reg_file =
27         (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS;
28
29 static struct socfpga_sdr_scc_mgr *sdr_scc_mgr =
30         (struct socfpga_sdr_scc_mgr *)(SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00);
31
32 static struct socfpga_phy_mgr_cmd *phy_mgr_cmd =
33         (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS;
34
35 static struct socfpga_phy_mgr_cfg *phy_mgr_cfg =
36         (struct socfpga_phy_mgr_cfg *)(SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40);
37
38 static struct socfpga_data_mgr *data_mgr =
39         (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS;
40
41 static struct socfpga_sdr_ctrl *sdr_ctrl =
42         (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS;
43
44 #define DELTA_D         1
45
46 /*
47  * In order to reduce ROM size, most of the selectable calibration steps are
48  * decided at compile time based on the user's calibration mode selection,
49  * as captured by the STATIC_CALIB_STEPS selection below.
50  *
51  * However, to support simulation-time selection of fast simulation mode, where
52  * we skip everything except the bare minimum, we need a few of the steps to
53  * be dynamic.  In those cases, we either use the DYNAMIC_CALIB_STEPS for the
54  * check, which is based on the rtl-supplied value, or we dynamically compute
55  * the value to use based on the dynamically-chosen calibration mode
56  */
57
58 #define DLEVEL 0
59 #define STATIC_IN_RTL_SIM 0
60 #define STATIC_SKIP_DELAY_LOOPS 0
61
62 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \
63         STATIC_SKIP_DELAY_LOOPS)
64
65 /* calibration steps requested by the rtl */
66 uint16_t dyn_calib_steps;
67
68 /*
69  * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option
70  * instead of static, we use boolean logic to select between
71  * non-skip and skip values
72  *
73  * The mask is set to include all bits when not-skipping, but is
74  * zero when skipping
75  */
76
77 uint16_t skip_delay_mask;       /* mask off bits when skipping/not-skipping */
78
79 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \
80         ((non_skip_value) & skip_delay_mask)
81
82 struct gbl_type *gbl;
83 struct param_type *param;
84
85 static void set_failing_group_stage(uint32_t group, uint32_t stage,
86         uint32_t substage)
87 {
88         /*
89          * Only set the global stage if there was not been any other
90          * failing group
91          */
92         if (gbl->error_stage == CAL_STAGE_NIL)  {
93                 gbl->error_substage = substage;
94                 gbl->error_stage = stage;
95                 gbl->error_group = group;
96         }
97 }
98
99 static void reg_file_set_group(u16 set_group)
100 {
101         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16);
102 }
103
104 static void reg_file_set_stage(u8 set_stage)
105 {
106         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff);
107 }
108
109 static void reg_file_set_sub_stage(u8 set_sub_stage)
110 {
111         set_sub_stage &= 0xff;
112         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8);
113 }
114
115 /**
116  * phy_mgr_initialize() - Initialize PHY Manager
117  *
118  * Initialize PHY Manager.
119  */
120 static void phy_mgr_initialize(void)
121 {
122         u32 ratio;
123
124         debug("%s:%d\n", __func__, __LINE__);
125         /* Calibration has control over path to memory */
126         /*
127          * In Hard PHY this is a 2-bit control:
128          * 0: AFI Mux Select
129          * 1: DDIO Mux Select
130          */
131         writel(0x3, &phy_mgr_cfg->mux_sel);
132
133         /* USER memory clock is not stable we begin initialization  */
134         writel(0, &phy_mgr_cfg->reset_mem_stbl);
135
136         /* USER calibration status all set to zero */
137         writel(0, &phy_mgr_cfg->cal_status);
138
139         writel(0, &phy_mgr_cfg->cal_debug_info);
140
141         /* Init params only if we do NOT skip calibration. */
142         if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL)
143                 return;
144
145         ratio = RW_MGR_MEM_DQ_PER_READ_DQS /
146                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS;
147         param->read_correct_mask_vg = (1 << ratio) - 1;
148         param->write_correct_mask_vg = (1 << ratio) - 1;
149         param->read_correct_mask = (1 << RW_MGR_MEM_DQ_PER_READ_DQS) - 1;
150         param->write_correct_mask = (1 << RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1;
151 }
152
153 /**
154  * set_rank_and_odt_mask() - Set Rank and ODT mask
155  * @rank:       Rank mask
156  * @odt_mode:   ODT mode, OFF or READ_WRITE
157  *
158  * Set Rank and ODT mask (On-Die Termination).
159  */
160 static void set_rank_and_odt_mask(const u32 rank, const u32 odt_mode)
161 {
162         u32 odt_mask_0 = 0;
163         u32 odt_mask_1 = 0;
164         u32 cs_and_odt_mask;
165
166         if (odt_mode == RW_MGR_ODT_MODE_OFF) {
167                 odt_mask_0 = 0x0;
168                 odt_mask_1 = 0x0;
169         } else {        /* RW_MGR_ODT_MODE_READ_WRITE */
170                 switch (RW_MGR_MEM_NUMBER_OF_RANKS) {
171                 case 1: /* 1 Rank */
172                         /* Read: ODT = 0 ; Write: ODT = 1 */
173                         odt_mask_0 = 0x0;
174                         odt_mask_1 = 0x1;
175                         break;
176                 case 2: /* 2 Ranks */
177                         if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1) {
178                                 /*
179                                  * - Dual-Slot , Single-Rank (1 CS per DIMM)
180                                  *   OR
181                                  * - RDIMM, 4 total CS (2 CS per DIMM, 2 DIMM)
182                                  *
183                                  * Since MEM_NUMBER_OF_RANKS is 2, they
184                                  * are both single rank with 2 CS each
185                                  * (special for RDIMM).
186                                  *
187                                  * Read: Turn on ODT on the opposite rank
188                                  * Write: Turn on ODT on all ranks
189                                  */
190                                 odt_mask_0 = 0x3 & ~(1 << rank);
191                                 odt_mask_1 = 0x3;
192                         } else {
193                                 /*
194                                  * - Single-Slot , Dual-Rank (2 CS per DIMM)
195                                  *
196                                  * Read: Turn on ODT off on all ranks
197                                  * Write: Turn on ODT on active rank
198                                  */
199                                 odt_mask_0 = 0x0;
200                                 odt_mask_1 = 0x3 & (1 << rank);
201                         }
202                         break;
203                 case 4: /* 4 Ranks */
204                         /* Read:
205                          * ----------+-----------------------+
206                          *           |         ODT           |
207                          * Read From +-----------------------+
208                          *   Rank    |  3  |  2  |  1  |  0  |
209                          * ----------+-----+-----+-----+-----+
210                          *     0     |  0  |  1  |  0  |  0  |
211                          *     1     |  1  |  0  |  0  |  0  |
212                          *     2     |  0  |  0  |  0  |  1  |
213                          *     3     |  0  |  0  |  1  |  0  |
214                          * ----------+-----+-----+-----+-----+
215                          *
216                          * Write:
217                          * ----------+-----------------------+
218                          *           |         ODT           |
219                          * Write To  +-----------------------+
220                          *   Rank    |  3  |  2  |  1  |  0  |
221                          * ----------+-----+-----+-----+-----+
222                          *     0     |  0  |  1  |  0  |  1  |
223                          *     1     |  1  |  0  |  1  |  0  |
224                          *     2     |  0  |  1  |  0  |  1  |
225                          *     3     |  1  |  0  |  1  |  0  |
226                          * ----------+-----+-----+-----+-----+
227                          */
228                         switch (rank) {
229                         case 0:
230                                 odt_mask_0 = 0x4;
231                                 odt_mask_1 = 0x5;
232                                 break;
233                         case 1:
234                                 odt_mask_0 = 0x8;
235                                 odt_mask_1 = 0xA;
236                                 break;
237                         case 2:
238                                 odt_mask_0 = 0x1;
239                                 odt_mask_1 = 0x5;
240                                 break;
241                         case 3:
242                                 odt_mask_0 = 0x2;
243                                 odt_mask_1 = 0xA;
244                                 break;
245                         }
246                         break;
247                 }
248         }
249
250         cs_and_odt_mask = (0xFF & ~(1 << rank)) |
251                           ((0xFF & odt_mask_0) << 8) |
252                           ((0xFF & odt_mask_1) << 16);
253         writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS |
254                                 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
255 }
256
257 /**
258  * scc_mgr_set() - Set SCC Manager register
259  * @off:        Base offset in SCC Manager space
260  * @grp:        Read/Write group
261  * @val:        Value to be set
262  *
263  * This function sets the SCC Manager (Scan Chain Control Manager) register.
264  */
265 static void scc_mgr_set(u32 off, u32 grp, u32 val)
266 {
267         writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2));
268 }
269
270 /**
271  * scc_mgr_initialize() - Initialize SCC Manager registers
272  *
273  * Initialize SCC Manager registers.
274  */
275 static void scc_mgr_initialize(void)
276 {
277         /*
278          * Clear register file for HPS. 16 (2^4) is the size of the
279          * full register file in the scc mgr:
280          *      RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS +
281          *                             MEM_IF_READ_DQS_WIDTH - 1);
282          */
283         int i;
284
285         for (i = 0; i < 16; i++) {
286                 debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u\n",
287                            __func__, __LINE__, i);
288                 scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, 0, i);
289         }
290 }
291
292 static void scc_mgr_set_dqdqs_output_phase(uint32_t write_group, uint32_t phase)
293 {
294         scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase);
295 }
296
297 static void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, uint32_t delay)
298 {
299         scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay);
300 }
301
302 static void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase)
303 {
304         scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase);
305 }
306
307 static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay)
308 {
309         scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay);
310 }
311
312 static void scc_mgr_set_dqs_io_in_delay(uint32_t delay)
313 {
314         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
315                     delay);
316 }
317
318 static void scc_mgr_set_dq_in_delay(uint32_t dq_in_group, uint32_t delay)
319 {
320         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay);
321 }
322
323 static void scc_mgr_set_dq_out1_delay(uint32_t dq_in_group, uint32_t delay)
324 {
325         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay);
326 }
327
328 static void scc_mgr_set_dqs_out1_delay(uint32_t delay)
329 {
330         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
331                     delay);
332 }
333
334 static void scc_mgr_set_dm_out1_delay(uint32_t dm, uint32_t delay)
335 {
336         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET,
337                     RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + dm,
338                     delay);
339 }
340
341 /* load up dqs config settings */
342 static void scc_mgr_load_dqs(uint32_t dqs)
343 {
344         writel(dqs, &sdr_scc_mgr->dqs_ena);
345 }
346
347 /* load up dqs io config settings */
348 static void scc_mgr_load_dqs_io(void)
349 {
350         writel(0, &sdr_scc_mgr->dqs_io_ena);
351 }
352
353 /* load up dq config settings */
354 static void scc_mgr_load_dq(uint32_t dq_in_group)
355 {
356         writel(dq_in_group, &sdr_scc_mgr->dq_ena);
357 }
358
359 /* load up dm config settings */
360 static void scc_mgr_load_dm(uint32_t dm)
361 {
362         writel(dm, &sdr_scc_mgr->dm_ena);
363 }
364
365 /**
366  * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks
367  * @off:        Base offset in SCC Manager space
368  * @grp:        Read/Write group
369  * @val:        Value to be set
370  * @update:     If non-zero, trigger SCC Manager update for all ranks
371  *
372  * This function sets the SCC Manager (Scan Chain Control Manager) register
373  * and optionally triggers the SCC update for all ranks.
374  */
375 static void scc_mgr_set_all_ranks(const u32 off, const u32 grp, const u32 val,
376                                   const int update)
377 {
378         u32 r;
379
380         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
381              r += NUM_RANKS_PER_SHADOW_REG) {
382                 scc_mgr_set(off, grp, val);
383
384                 if (update || (r == 0)) {
385                         writel(grp, &sdr_scc_mgr->dqs_ena);
386                         writel(0, &sdr_scc_mgr->update);
387                 }
388         }
389 }
390
391 static void scc_mgr_set_dqs_en_phase_all_ranks(u32 read_group, u32 phase)
392 {
393         /*
394          * USER although the h/w doesn't support different phases per
395          * shadow register, for simplicity our scc manager modeling
396          * keeps different phase settings per shadow reg, and it's
397          * important for us to keep them in sync to match h/w.
398          * for efficiency, the scan chain update should occur only
399          * once to sr0.
400          */
401         scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_PHASE_OFFSET,
402                               read_group, phase, 0);
403 }
404
405 static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group,
406                                                      uint32_t phase)
407 {
408         /*
409          * USER although the h/w doesn't support different phases per
410          * shadow register, for simplicity our scc manager modeling
411          * keeps different phase settings per shadow reg, and it's
412          * important for us to keep them in sync to match h/w.
413          * for efficiency, the scan chain update should occur only
414          * once to sr0.
415          */
416         scc_mgr_set_all_ranks(SCC_MGR_DQDQS_OUT_PHASE_OFFSET,
417                               write_group, phase, 0);
418 }
419
420 static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group,
421                                                uint32_t delay)
422 {
423         /*
424          * In shadow register mode, the T11 settings are stored in
425          * registers in the core, which are updated by the DQS_ENA
426          * signals. Not issuing the SCC_MGR_UPD command allows us to
427          * save lots of rank switching overhead, by calling
428          * select_shadow_regs_for_update with update_scan_chains
429          * set to 0.
430          */
431         scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_DELAY_OFFSET,
432                               read_group, delay, 1);
433         writel(0, &sdr_scc_mgr->update);
434 }
435
436 /**
437  * scc_mgr_set_oct_out1_delay() - Set OCT output delay
438  * @write_group:        Write group
439  * @delay:              Delay value
440  *
441  * This function sets the OCT output delay in SCC manager.
442  */
443 static void scc_mgr_set_oct_out1_delay(const u32 write_group, const u32 delay)
444 {
445         const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
446                           RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
447         const int base = write_group * ratio;
448         int i;
449         /*
450          * Load the setting in the SCC manager
451          * Although OCT affects only write data, the OCT delay is controlled
452          * by the DQS logic block which is instantiated once per read group.
453          * For protocols where a write group consists of multiple read groups,
454          * the setting must be set multiple times.
455          */
456         for (i = 0; i < ratio; i++)
457                 scc_mgr_set(SCC_MGR_OCT_OUT1_DELAY_OFFSET, base + i, delay);
458 }
459
460 /**
461  * scc_mgr_set_hhp_extras() - Set HHP extras.
462  *
463  * Load the fixed setting in the SCC manager HHP extras.
464  */
465 static void scc_mgr_set_hhp_extras(void)
466 {
467         /*
468          * Load the fixed setting in the SCC manager
469          * bits: 0:0 = 1'b1     - DQS bypass
470          * bits: 1:1 = 1'b1     - DQ bypass
471          * bits: 4:2 = 3'b001   - rfifo_mode
472          * bits: 6:5 = 2'b01    - rfifo clock_select
473          * bits: 7:7 = 1'b0     - separate gating from ungating setting
474          * bits: 8:8 = 1'b0     - separate OE from Output delay setting
475          */
476         const u32 value = (0 << 8) | (0 << 7) | (1 << 5) |
477                           (1 << 2) | (1 << 1) | (1 << 0);
478         const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS |
479                          SCC_MGR_HHP_GLOBALS_OFFSET |
480                          SCC_MGR_HHP_EXTRAS_OFFSET;
481
482         debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n",
483                    __func__, __LINE__);
484         writel(value, addr);
485         debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n",
486                    __func__, __LINE__);
487 }
488
489 /**
490  * scc_mgr_zero_all() - Zero all DQS config
491  *
492  * Zero all DQS config.
493  */
494 static void scc_mgr_zero_all(void)
495 {
496         int i, r;
497
498         /*
499          * USER Zero all DQS config settings, across all groups and all
500          * shadow registers
501          */
502         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
503              r += NUM_RANKS_PER_SHADOW_REG) {
504                 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
505                         /*
506                          * The phases actually don't exist on a per-rank basis,
507                          * but there's no harm updating them several times, so
508                          * let's keep the code simple.
509                          */
510                         scc_mgr_set_dqs_bus_in_delay(i, IO_DQS_IN_RESERVE);
511                         scc_mgr_set_dqs_en_phase(i, 0);
512                         scc_mgr_set_dqs_en_delay(i, 0);
513                 }
514
515                 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
516                         scc_mgr_set_dqdqs_output_phase(i, 0);
517                         /* Arria V/Cyclone V don't have out2. */
518                         scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE);
519                 }
520         }
521
522         /* Multicast to all DQS group enables. */
523         writel(0xff, &sdr_scc_mgr->dqs_ena);
524         writel(0, &sdr_scc_mgr->update);
525 }
526
527 /**
528  * scc_set_bypass_mode() - Set bypass mode and trigger SCC update
529  * @write_group:        Write group
530  *
531  * Set bypass mode and trigger SCC update.
532  */
533 static void scc_set_bypass_mode(const u32 write_group)
534 {
535         /* Multicast to all DQ enables. */
536         writel(0xff, &sdr_scc_mgr->dq_ena);
537         writel(0xff, &sdr_scc_mgr->dm_ena);
538
539         /* Update current DQS IO enable. */
540         writel(0, &sdr_scc_mgr->dqs_io_ena);
541
542         /* Update the DQS logic. */
543         writel(write_group, &sdr_scc_mgr->dqs_ena);
544
545         /* Hit update. */
546         writel(0, &sdr_scc_mgr->update);
547 }
548
549 /**
550  * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group
551  * @write_group:        Write group
552  *
553  * Load DQS settings for Write Group, do not trigger SCC update.
554  */
555 static void scc_mgr_load_dqs_for_write_group(const u32 write_group)
556 {
557         const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
558                           RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
559         const int base = write_group * ratio;
560         int i;
561         /*
562          * Load the setting in the SCC manager
563          * Although OCT affects only write data, the OCT delay is controlled
564          * by the DQS logic block which is instantiated once per read group.
565          * For protocols where a write group consists of multiple read groups,
566          * the setting must be set multiple times.
567          */
568         for (i = 0; i < ratio; i++)
569                 writel(base + i, &sdr_scc_mgr->dqs_ena);
570 }
571
572 /**
573  * scc_mgr_zero_group() - Zero all configs for a group
574  *
575  * Zero DQ, DM, DQS and OCT configs for a group.
576  */
577 static void scc_mgr_zero_group(const u32 write_group, const int out_only)
578 {
579         int i, r;
580
581         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
582              r += NUM_RANKS_PER_SHADOW_REG) {
583                 /* Zero all DQ config settings. */
584                 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
585                         scc_mgr_set_dq_out1_delay(i, 0);
586                         if (!out_only)
587                                 scc_mgr_set_dq_in_delay(i, 0);
588                 }
589
590                 /* Multicast to all DQ enables. */
591                 writel(0xff, &sdr_scc_mgr->dq_ena);
592
593                 /* Zero all DM config settings. */
594                 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
595                         scc_mgr_set_dm_out1_delay(i, 0);
596
597                 /* Multicast to all DM enables. */
598                 writel(0xff, &sdr_scc_mgr->dm_ena);
599
600                 /* Zero all DQS IO settings. */
601                 if (!out_only)
602                         scc_mgr_set_dqs_io_in_delay(0);
603
604                 /* Arria V/Cyclone V don't have out2. */
605                 scc_mgr_set_dqs_out1_delay(IO_DQS_OUT_RESERVE);
606                 scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE);
607                 scc_mgr_load_dqs_for_write_group(write_group);
608
609                 /* Multicast to all DQS IO enables (only 1 in total). */
610                 writel(0, &sdr_scc_mgr->dqs_io_ena);
611
612                 /* Hit update to zero everything. */
613                 writel(0, &sdr_scc_mgr->update);
614         }
615 }
616
617 /*
618  * apply and load a particular input delay for the DQ pins in a group
619  * group_bgn is the index of the first dq pin (in the write group)
620  */
621 static void scc_mgr_apply_group_dq_in_delay(uint32_t group_bgn, uint32_t delay)
622 {
623         uint32_t i, p;
624
625         for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) {
626                 scc_mgr_set_dq_in_delay(p, delay);
627                 scc_mgr_load_dq(p);
628         }
629 }
630
631 /**
632  * scc_mgr_apply_group_dq_out1_delay() - Apply and load an output delay for the DQ pins in a group
633  * @delay:              Delay value
634  *
635  * Apply and load a particular output delay for the DQ pins in a group.
636  */
637 static void scc_mgr_apply_group_dq_out1_delay(const u32 delay)
638 {
639         int i;
640
641         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
642                 scc_mgr_set_dq_out1_delay(i, delay);
643                 scc_mgr_load_dq(i);
644         }
645 }
646
647 /* apply and load a particular output delay for the DM pins in a group */
648 static void scc_mgr_apply_group_dm_out1_delay(uint32_t delay1)
649 {
650         uint32_t i;
651
652         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
653                 scc_mgr_set_dm_out1_delay(i, delay1);
654                 scc_mgr_load_dm(i);
655         }
656 }
657
658
659 /* apply and load delay on both DQS and OCT out1 */
660 static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group,
661                                                     uint32_t delay)
662 {
663         scc_mgr_set_dqs_out1_delay(delay);
664         scc_mgr_load_dqs_io();
665
666         scc_mgr_set_oct_out1_delay(write_group, delay);
667         scc_mgr_load_dqs_for_write_group(write_group);
668 }
669
670 /**
671  * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side: DQ, DM, DQS, OCT
672  * @write_group:        Write group
673  * @delay:              Delay value
674  *
675  * Apply a delay to the entire output side: DQ, DM, DQS, OCT.
676  */
677 static void scc_mgr_apply_group_all_out_delay_add(const u32 write_group,
678                                                   const u32 delay)
679 {
680         u32 i, new_delay;
681
682         /* DQ shift */
683         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++)
684                 scc_mgr_load_dq(i);
685
686         /* DM shift */
687         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
688                 scc_mgr_load_dm(i);
689
690         /* DQS shift */
691         new_delay = READ_SCC_DQS_IO_OUT2_DELAY + delay;
692         if (new_delay > IO_IO_OUT2_DELAY_MAX) {
693                 debug_cond(DLEVEL == 1,
694                            "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
695                            __func__, __LINE__, write_group, delay, new_delay,
696                            IO_IO_OUT2_DELAY_MAX,
697                            new_delay - IO_IO_OUT2_DELAY_MAX);
698                 new_delay -= IO_IO_OUT2_DELAY_MAX;
699                 scc_mgr_set_dqs_out1_delay(new_delay);
700         }
701
702         scc_mgr_load_dqs_io();
703
704         /* OCT shift */
705         new_delay = READ_SCC_OCT_OUT2_DELAY + delay;
706         if (new_delay > IO_IO_OUT2_DELAY_MAX) {
707                 debug_cond(DLEVEL == 1,
708                            "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
709                            __func__, __LINE__, write_group, delay,
710                            new_delay, IO_IO_OUT2_DELAY_MAX,
711                            new_delay - IO_IO_OUT2_DELAY_MAX);
712                 new_delay -= IO_IO_OUT2_DELAY_MAX;
713                 scc_mgr_set_oct_out1_delay(write_group, new_delay);
714         }
715
716         scc_mgr_load_dqs_for_write_group(write_group);
717 }
718
719 /**
720  * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side to all ranks
721  * @write_group:        Write group
722  * @delay:              Delay value
723  *
724  * Apply a delay to the entire output side (DQ, DM, DQS, OCT) to all ranks.
725  */
726 static void
727 scc_mgr_apply_group_all_out_delay_add_all_ranks(const u32 write_group,
728                                                 const u32 delay)
729 {
730         int r;
731
732         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
733              r += NUM_RANKS_PER_SHADOW_REG) {
734                 scc_mgr_apply_group_all_out_delay_add(write_group, delay);
735                 writel(0, &sdr_scc_mgr->update);
736         }
737 }
738
739 /**
740  * set_jump_as_return() - Return instruction optimization
741  *
742  * Optimization used to recover some slots in ddr3 inst_rom could be
743  * applied to other protocols if we wanted to
744  */
745 static void set_jump_as_return(void)
746 {
747         /*
748          * To save space, we replace return with jump to special shared
749          * RETURN instruction so we set the counter to large value so that
750          * we always jump.
751          */
752         writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0);
753         writel(RW_MGR_RETURN, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
754 }
755
756 /**
757  * delay_for_n_mem_clocks() - Delay for N memory clocks
758  * @clocks:     Length of the delay
759  *
760  * Delay for N memory clocks.
761  */
762 static void delay_for_n_mem_clocks(const u32 clocks)
763 {
764         u32 afi_clocks;
765         u16 c_loop;
766         u8 inner;
767         u8 outer;
768
769         debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks);
770
771         /* Scale (rounding up) to get afi clocks. */
772         afi_clocks = DIV_ROUND_UP(clocks, AFI_RATE_RATIO);
773         if (afi_clocks) /* Temporary underflow protection */
774                 afi_clocks--;
775
776         /*
777          * Note, we don't bother accounting for being off a little
778          * bit because of a few extra instructions in outer loops.
779          * Note, the loops have a test at the end, and do the test
780          * before the decrement, and so always perform the loop
781          * 1 time more than the counter value
782          */
783         c_loop = afi_clocks >> 16;
784         outer = c_loop ? 0xff : (afi_clocks >> 8);
785         inner = outer ? 0xff : afi_clocks;
786
787         /*
788          * rom instructions are structured as follows:
789          *
790          *    IDLE_LOOP2: jnz cntr0, TARGET_A
791          *    IDLE_LOOP1: jnz cntr1, TARGET_B
792          *                return
793          *
794          * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and
795          * TARGET_B is set to IDLE_LOOP2 as well
796          *
797          * if we have no outer loop, though, then we can use IDLE_LOOP1 only,
798          * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely
799          *
800          * a little confusing, but it helps save precious space in the inst_rom
801          * and sequencer rom and keeps the delays more accurate and reduces
802          * overhead
803          */
804         if (afi_clocks < 0x100) {
805                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
806                         &sdr_rw_load_mgr_regs->load_cntr1);
807
808                 writel(RW_MGR_IDLE_LOOP1,
809                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
810
811                 writel(RW_MGR_IDLE_LOOP1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
812                                           RW_MGR_RUN_SINGLE_GROUP_OFFSET);
813         } else {
814                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
815                         &sdr_rw_load_mgr_regs->load_cntr0);
816
817                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer),
818                         &sdr_rw_load_mgr_regs->load_cntr1);
819
820                 writel(RW_MGR_IDLE_LOOP2,
821                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
822
823                 writel(RW_MGR_IDLE_LOOP2,
824                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
825
826                 do {
827                         writel(RW_MGR_IDLE_LOOP2,
828                                 SDR_PHYGRP_RWMGRGRP_ADDRESS |
829                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
830                 } while (c_loop-- != 0);
831         }
832         debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks);
833 }
834
835 /**
836  * rw_mgr_mem_init_load_regs() - Load instruction registers
837  * @cntr0:      Counter 0 value
838  * @cntr1:      Counter 1 value
839  * @cntr2:      Counter 2 value
840  * @jump:       Jump instruction value
841  *
842  * Load instruction registers.
843  */
844 static void rw_mgr_mem_init_load_regs(u32 cntr0, u32 cntr1, u32 cntr2, u32 jump)
845 {
846         uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
847                            RW_MGR_RUN_SINGLE_GROUP_OFFSET;
848
849         /* Load counters */
850         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr0),
851                &sdr_rw_load_mgr_regs->load_cntr0);
852         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr1),
853                &sdr_rw_load_mgr_regs->load_cntr1);
854         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr2),
855                &sdr_rw_load_mgr_regs->load_cntr2);
856
857         /* Load jump address */
858         writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
859         writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add1);
860         writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
861
862         /* Execute count instruction */
863         writel(jump, grpaddr);
864 }
865
866 /**
867  * rw_mgr_mem_load_user() - Load user calibration values
868  * @fin1:       Final instruction 1
869  * @fin2:       Final instruction 2
870  * @precharge:  If 1, precharge the banks at the end
871  *
872  * Load user calibration values and optionally precharge the banks.
873  */
874 static void rw_mgr_mem_load_user(const u32 fin1, const u32 fin2,
875                                  const int precharge)
876 {
877         u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
878                       RW_MGR_RUN_SINGLE_GROUP_OFFSET;
879         u32 r;
880
881         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
882                 /* set rank */
883                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
884
885                 /* precharge all banks ... */
886                 if (precharge)
887                         writel(RW_MGR_PRECHARGE_ALL, grpaddr);
888
889                 /*
890                  * USER Use Mirror-ed commands for odd ranks if address
891                  * mirrorring is on
892                  */
893                 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) {
894                         set_jump_as_return();
895                         writel(RW_MGR_MRS2_MIRR, grpaddr);
896                         delay_for_n_mem_clocks(4);
897                         set_jump_as_return();
898                         writel(RW_MGR_MRS3_MIRR, grpaddr);
899                         delay_for_n_mem_clocks(4);
900                         set_jump_as_return();
901                         writel(RW_MGR_MRS1_MIRR, grpaddr);
902                         delay_for_n_mem_clocks(4);
903                         set_jump_as_return();
904                         writel(fin1, grpaddr);
905                 } else {
906                         set_jump_as_return();
907                         writel(RW_MGR_MRS2, grpaddr);
908                         delay_for_n_mem_clocks(4);
909                         set_jump_as_return();
910                         writel(RW_MGR_MRS3, grpaddr);
911                         delay_for_n_mem_clocks(4);
912                         set_jump_as_return();
913                         writel(RW_MGR_MRS1, grpaddr);
914                         set_jump_as_return();
915                         writel(fin2, grpaddr);
916                 }
917
918                 if (precharge)
919                         continue;
920
921                 set_jump_as_return();
922                 writel(RW_MGR_ZQCL, grpaddr);
923
924                 /* tZQinit = tDLLK = 512 ck cycles */
925                 delay_for_n_mem_clocks(512);
926         }
927 }
928
929 /**
930  * rw_mgr_mem_initialize() - Initialize RW Manager
931  *
932  * Initialize RW Manager.
933  */
934 static void rw_mgr_mem_initialize(void)
935 {
936         debug("%s:%d\n", __func__, __LINE__);
937
938         /* The reset / cke part of initialization is broadcasted to all ranks */
939         writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
940                                 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
941
942         /*
943          * Here's how you load register for a loop
944          * Counters are located @ 0x800
945          * Jump address are located @ 0xC00
946          * For both, registers 0 to 3 are selected using bits 3 and 2, like
947          * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C
948          * I know this ain't pretty, but Avalon bus throws away the 2 least
949          * significant bits
950          */
951
952         /* Start with memory RESET activated */
953
954         /* tINIT = 200us */
955
956         /*
957          * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles
958          * If a and b are the number of iteration in 2 nested loops
959          * it takes the following number of cycles to complete the operation:
960          * number_of_cycles = ((2 + n) * a + 2) * b
961          * where n is the number of instruction in the inner loop
962          * One possible solution is n = 0 , a = 256 , b = 106 => a = FF,
963          * b = 6A
964          */
965         rw_mgr_mem_init_load_regs(SEQ_TINIT_CNTR0_VAL, SEQ_TINIT_CNTR1_VAL,
966                                   SEQ_TINIT_CNTR2_VAL,
967                                   RW_MGR_INIT_RESET_0_CKE_0);
968
969         /* Indicate that memory is stable. */
970         writel(1, &phy_mgr_cfg->reset_mem_stbl);
971
972         /*
973          * transition the RESET to high
974          * Wait for 500us
975          */
976
977         /*
978          * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles
979          * If a and b are the number of iteration in 2 nested loops
980          * it takes the following number of cycles to complete the operation
981          * number_of_cycles = ((2 + n) * a + 2) * b
982          * where n is the number of instruction in the inner loop
983          * One possible solution is n = 2 , a = 131 , b = 256 => a = 83,
984          * b = FF
985          */
986         rw_mgr_mem_init_load_regs(SEQ_TRESET_CNTR0_VAL, SEQ_TRESET_CNTR1_VAL,
987                                   SEQ_TRESET_CNTR2_VAL,
988                                   RW_MGR_INIT_RESET_1_CKE_0);
989
990         /* Bring up clock enable. */
991
992         /* tXRP < 250 ck cycles */
993         delay_for_n_mem_clocks(250);
994
995         rw_mgr_mem_load_user(RW_MGR_MRS0_DLL_RESET_MIRR, RW_MGR_MRS0_DLL_RESET,
996                              0);
997 }
998
999 /**
1000  * rw_mgr_mem_handoff() - Hand off the memory to user
1001  *
1002  * At the end of calibration we have to program the user settings in
1003  * and hand off the memory to the user.
1004  */
1005 static void rw_mgr_mem_handoff(void)
1006 {
1007         rw_mgr_mem_load_user(RW_MGR_MRS0_USER_MIRR, RW_MGR_MRS0_USER, 1);
1008         /*
1009          * Need to wait tMOD (12CK or 15ns) time before issuing other
1010          * commands, but we will have plenty of NIOS cycles before actual
1011          * handoff so its okay.
1012          */
1013 }
1014
1015 /**
1016  * rw_mgr_mem_calibrate_write_test_issue() - Issue write test command
1017  * @group:      Write Group
1018  * @use_dm:     Use DM
1019  *
1020  * Issue write test command. Two variants are provided, one that just tests
1021  * a write pattern and another that tests datamask functionality.
1022  */
1023 static void rw_mgr_mem_calibrate_write_test_issue(u32 group,
1024                                                   u32 test_dm)
1025 {
1026         const u32 quick_write_mode =
1027                 (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES) &&
1028                 ENABLE_SUPER_QUICK_CALIBRATION;
1029         u32 mcc_instruction;
1030         u32 rw_wl_nop_cycles;
1031
1032         /*
1033          * Set counter and jump addresses for the right
1034          * number of NOP cycles.
1035          * The number of supported NOP cycles can range from -1 to infinity
1036          * Three different cases are handled:
1037          *
1038          * 1. For a number of NOP cycles greater than 0, the RW Mgr looping
1039          *    mechanism will be used to insert the right number of NOPs
1040          *
1041          * 2. For a number of NOP cycles equals to 0, the micro-instruction
1042          *    issuing the write command will jump straight to the
1043          *    micro-instruction that turns on DQS (for DDRx), or outputs write
1044          *    data (for RLD), skipping
1045          *    the NOP micro-instruction all together
1046          *
1047          * 3. A number of NOP cycles equal to -1 indicates that DQS must be
1048          *    turned on in the same micro-instruction that issues the write
1049          *    command. Then we need
1050          *    to directly jump to the micro-instruction that sends out the data
1051          *
1052          * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters
1053          *       (2 and 3). One jump-counter (0) is used to perform multiple
1054          *       write-read operations.
1055          *       one counter left to issue this command in "multiple-group" mode
1056          */
1057
1058         rw_wl_nop_cycles = gbl->rw_wl_nop_cycles;
1059
1060         if (rw_wl_nop_cycles == -1) {
1061                 /*
1062                  * CNTR 2 - We want to execute the special write operation that
1063                  * turns on DQS right away and then skip directly to the
1064                  * instruction that sends out the data. We set the counter to a
1065                  * large number so that the jump is always taken.
1066                  */
1067                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
1068
1069                 /* CNTR 3 - Not used */
1070                 if (test_dm) {
1071                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1;
1072                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA,
1073                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1074                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
1075                                &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1076                 } else {
1077                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0_WL_1;
1078                         writel(RW_MGR_LFSR_WR_RD_BANK_0_DATA,
1079                                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1080                         writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
1081                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1082                 }
1083         } else if (rw_wl_nop_cycles == 0) {
1084                 /*
1085                  * CNTR 2 - We want to skip the NOP operation and go straight
1086                  * to the DQS enable instruction. We set the counter to a large
1087                  * number so that the jump is always taken.
1088                  */
1089                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
1090
1091                 /* CNTR 3 - Not used */
1092                 if (test_dm) {
1093                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
1094                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS,
1095                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1096                 } else {
1097                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
1098                         writel(RW_MGR_LFSR_WR_RD_BANK_0_DQS,
1099                                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1100                 }
1101         } else {
1102                 /*
1103                  * CNTR 2 - In this case we want to execute the next instruction
1104                  * and NOT take the jump. So we set the counter to 0. The jump
1105                  * address doesn't count.
1106                  */
1107                 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2);
1108                 writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1109
1110                 /*
1111                  * CNTR 3 - Set the nop counter to the number of cycles we
1112                  * need to loop for, minus 1.
1113                  */
1114                 writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3);
1115                 if (test_dm) {
1116                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
1117                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
1118                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1119                 } else {
1120                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
1121                         writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
1122                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1123                 }
1124         }
1125
1126         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1127                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1128
1129         if (quick_write_mode)
1130                 writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0);
1131         else
1132                 writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0);
1133
1134         writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1135
1136         /*
1137          * CNTR 1 - This is used to ensure enough time elapses
1138          * for read data to come back.
1139          */
1140         writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1);
1141
1142         if (test_dm) {
1143                 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT,
1144                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1145         } else {
1146                 writel(RW_MGR_LFSR_WR_RD_BANK_0_WAIT,
1147                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1148         }
1149
1150         writel(mcc_instruction, (SDR_PHYGRP_RWMGRGRP_ADDRESS |
1151                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET) +
1152                                 (group << 2));
1153 }
1154
1155 /**
1156  * rw_mgr_mem_calibrate_write_test() - Test writes, check for single/multiple pass
1157  * @rank_bgn:           Rank number
1158  * @write_group:        Write Group
1159  * @use_dm:             Use DM
1160  * @all_correct:        All bits must be correct in the mask
1161  * @bit_chk:            Resulting bit mask after the test
1162  * @all_ranks:          Test all ranks
1163  *
1164  * Test writes, can check for a single bit pass or multiple bit pass.
1165  */
1166 static int
1167 rw_mgr_mem_calibrate_write_test(const u32 rank_bgn, const u32 write_group,
1168                                 const u32 use_dm, const u32 all_correct,
1169                                 u32 *bit_chk, const u32 all_ranks)
1170 {
1171         const u32 rank_end = all_ranks ?
1172                                 RW_MGR_MEM_NUMBER_OF_RANKS :
1173                                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1174         const u32 shift_ratio = RW_MGR_MEM_DQ_PER_WRITE_DQS /
1175                                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS;
1176         const u32 correct_mask_vg = param->write_correct_mask_vg;
1177
1178         u32 tmp_bit_chk, base_rw_mgr;
1179         int vg, r;
1180
1181         *bit_chk = param->write_correct_mask;
1182
1183         for (r = rank_bgn; r < rank_end; r++) {
1184                 /* Set rank */
1185                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1186
1187                 tmp_bit_chk = 0;
1188                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS - 1;
1189                      vg >= 0; vg--) {
1190                         /* Reset the FIFOs to get pointers to known state. */
1191                         writel(0, &phy_mgr_cmd->fifo_reset);
1192
1193                         rw_mgr_mem_calibrate_write_test_issue(
1194                                 write_group *
1195                                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS + vg,
1196                                 use_dm);
1197
1198                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1199                         tmp_bit_chk <<= shift_ratio;
1200                         tmp_bit_chk |= (correct_mask_vg & ~(base_rw_mgr));
1201                 }
1202
1203                 *bit_chk &= tmp_bit_chk;
1204         }
1205
1206         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1207         if (all_correct) {
1208                 debug_cond(DLEVEL == 2,
1209                            "write_test(%u,%u,ALL) : %u == %u => %i\n",
1210                            write_group, use_dm, *bit_chk,
1211                            param->write_correct_mask,
1212                            *bit_chk == param->write_correct_mask);
1213                 return *bit_chk == param->write_correct_mask;
1214         } else {
1215                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1216                 debug_cond(DLEVEL == 2,
1217                            "write_test(%u,%u,ONE) : %u != %i => %i\n",
1218                            write_group, use_dm, *bit_chk, 0, *bit_chk != 0);
1219                 return *bit_chk != 0x00;
1220         }
1221 }
1222
1223 /**
1224  * rw_mgr_mem_calibrate_read_test_patterns() - Read back test patterns
1225  * @rank_bgn:   Rank number
1226  * @group:      Read/Write Group
1227  * @all_ranks:  Test all ranks
1228  *
1229  * Performs a guaranteed read on the patterns we are going to use during a
1230  * read test to ensure memory works.
1231  */
1232 static int
1233 rw_mgr_mem_calibrate_read_test_patterns(const u32 rank_bgn, const u32 group,
1234                                         const u32 all_ranks)
1235 {
1236         const u32 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1237                          RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1238         const u32 addr_offset =
1239                          (group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS) << 2;
1240         const u32 rank_end = all_ranks ?
1241                                 RW_MGR_MEM_NUMBER_OF_RANKS :
1242                                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1243         const u32 shift_ratio = RW_MGR_MEM_DQ_PER_READ_DQS /
1244                                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS;
1245         const u32 correct_mask_vg = param->read_correct_mask_vg;
1246
1247         u32 tmp_bit_chk, base_rw_mgr, bit_chk;
1248         int vg, r;
1249         int ret = 0;
1250
1251         bit_chk = param->read_correct_mask;
1252
1253         for (r = rank_bgn; r < rank_end; r++) {
1254                 /* Set rank */
1255                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1256
1257                 /* Load up a constant bursts of read commands */
1258                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1259                 writel(RW_MGR_GUARANTEED_READ,
1260                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1261
1262                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1263                 writel(RW_MGR_GUARANTEED_READ_CONT,
1264                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1265
1266                 tmp_bit_chk = 0;
1267                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1;
1268                      vg >= 0; vg--) {
1269                         /* Reset the FIFOs to get pointers to known state. */
1270                         writel(0, &phy_mgr_cmd->fifo_reset);
1271                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1272                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1273                         writel(RW_MGR_GUARANTEED_READ,
1274                                addr + addr_offset + (vg << 2));
1275
1276                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1277                         tmp_bit_chk <<= shift_ratio;
1278                         tmp_bit_chk |= correct_mask_vg & ~base_rw_mgr;
1279                 }
1280
1281                 bit_chk &= tmp_bit_chk;
1282         }
1283
1284         writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2));
1285
1286         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1287
1288         if (bit_chk != param->read_correct_mask)
1289                 ret = -EIO;
1290
1291         debug_cond(DLEVEL == 1,
1292                    "%s:%d test_load_patterns(%u,ALL) => (%u == %u) => %i\n",
1293                    __func__, __LINE__, group, bit_chk,
1294                    param->read_correct_mask, ret);
1295
1296         return ret;
1297 }
1298
1299 /**
1300  * rw_mgr_mem_calibrate_read_load_patterns() - Load up the patterns for read test
1301  * @rank_bgn:   Rank number
1302  * @all_ranks:  Test all ranks
1303  *
1304  * Load up the patterns we are going to use during a read test.
1305  */
1306 static void rw_mgr_mem_calibrate_read_load_patterns(const u32 rank_bgn,
1307                                                     const int all_ranks)
1308 {
1309         const u32 rank_end = all_ranks ?
1310                         RW_MGR_MEM_NUMBER_OF_RANKS :
1311                         (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1312         u32 r;
1313
1314         debug("%s:%d\n", __func__, __LINE__);
1315
1316         for (r = rank_bgn; r < rank_end; r++) {
1317                 /* set rank */
1318                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1319
1320                 /* Load up a constant bursts */
1321                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1322
1323                 writel(RW_MGR_GUARANTEED_WRITE_WAIT0,
1324                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1325
1326                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1327
1328                 writel(RW_MGR_GUARANTEED_WRITE_WAIT1,
1329                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1330
1331                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2);
1332
1333                 writel(RW_MGR_GUARANTEED_WRITE_WAIT2,
1334                         &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1335
1336                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3);
1337
1338                 writel(RW_MGR_GUARANTEED_WRITE_WAIT3,
1339                         &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1340
1341                 writel(RW_MGR_GUARANTEED_WRITE, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1342                                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
1343         }
1344
1345         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1346 }
1347
1348 /**
1349  * rw_mgr_mem_calibrate_read_test() - Perform READ test on single rank
1350  * @rank_bgn:           Rank number
1351  * @group:              Read/Write group
1352  * @num_tries:          Number of retries of the test
1353  * @all_correct:        All bits must be correct in the mask
1354  * @bit_chk:            Resulting bit mask after the test
1355  * @all_groups:         Test all R/W groups
1356  * @all_ranks:          Test all ranks
1357  *
1358  * Try a read and see if it returns correct data back. Test has dummy reads
1359  * inserted into the mix used to align DQS enable. Test has more thorough
1360  * checks than the regular read test.
1361  */
1362 static int
1363 rw_mgr_mem_calibrate_read_test(const u32 rank_bgn, const u32 group,
1364                                const u32 num_tries, const u32 all_correct,
1365                                u32 *bit_chk,
1366                                const u32 all_groups, const u32 all_ranks)
1367 {
1368         const u32 rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1369                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1370         const u32 quick_read_mode =
1371                 ((STATIC_CALIB_STEPS & CALIB_SKIP_DELAY_SWEEPS) &&
1372                  ENABLE_SUPER_QUICK_CALIBRATION);
1373         u32 correct_mask_vg = param->read_correct_mask_vg;
1374         u32 tmp_bit_chk;
1375         u32 base_rw_mgr;
1376         u32 addr;
1377
1378         int r, vg, ret;
1379
1380         *bit_chk = param->read_correct_mask;
1381
1382         for (r = rank_bgn; r < rank_end; r++) {
1383                 /* set rank */
1384                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1385
1386                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1);
1387
1388                 writel(RW_MGR_READ_B2B_WAIT1,
1389                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1390
1391                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2);
1392                 writel(RW_MGR_READ_B2B_WAIT2,
1393                         &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1394
1395                 if (quick_read_mode)
1396                         writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0);
1397                         /* need at least two (1+1) reads to capture failures */
1398                 else if (all_groups)
1399                         writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0);
1400                 else
1401                         writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0);
1402
1403                 writel(RW_MGR_READ_B2B,
1404                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1405                 if (all_groups)
1406                         writel(RW_MGR_MEM_IF_READ_DQS_WIDTH *
1407                                RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1,
1408                                &sdr_rw_load_mgr_regs->load_cntr3);
1409                 else
1410                         writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3);
1411
1412                 writel(RW_MGR_READ_B2B,
1413                         &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1414
1415                 tmp_bit_chk = 0;
1416                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1; vg >= 0;
1417                      vg--) {
1418                         /* Reset the FIFOs to get pointers to known state. */
1419                         writel(0, &phy_mgr_cmd->fifo_reset);
1420                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1421                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1422
1423                         if (all_groups) {
1424                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1425                                        RW_MGR_RUN_ALL_GROUPS_OFFSET;
1426                         } else {
1427                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1428                                        RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1429                         }
1430
1431                         writel(RW_MGR_READ_B2B, addr +
1432                                ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS +
1433                                vg) << 2));
1434
1435                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1436                         tmp_bit_chk <<= RW_MGR_MEM_DQ_PER_READ_DQS /
1437                                         RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS;
1438                         tmp_bit_chk |= correct_mask_vg & ~(base_rw_mgr);
1439                 }
1440
1441                 *bit_chk &= tmp_bit_chk;
1442         }
1443
1444         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1445         writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2));
1446
1447         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1448
1449         if (all_correct) {
1450                 ret = (*bit_chk == param->read_correct_mask);
1451                 debug_cond(DLEVEL == 2,
1452                            "%s:%d read_test(%u,ALL,%u) => (%u == %u) => %i\n",
1453                            __func__, __LINE__, group, all_groups, *bit_chk,
1454                            param->read_correct_mask, ret);
1455         } else  {
1456                 ret = (*bit_chk != 0x00);
1457                 debug_cond(DLEVEL == 2,
1458                            "%s:%d read_test(%u,ONE,%u) => (%u != %u) => %i\n",
1459                            __func__, __LINE__, group, all_groups, *bit_chk,
1460                            0, ret);
1461         }
1462
1463         return ret;
1464 }
1465
1466 /**
1467  * rw_mgr_mem_calibrate_read_test_all_ranks() - Perform READ test on all ranks
1468  * @grp:                Read/Write group
1469  * @num_tries:          Number of retries of the test
1470  * @all_correct:        All bits must be correct in the mask
1471  * @all_groups:         Test all R/W groups
1472  *
1473  * Perform a READ test across all memory ranks.
1474  */
1475 static int
1476 rw_mgr_mem_calibrate_read_test_all_ranks(const u32 grp, const u32 num_tries,
1477                                          const u32 all_correct,
1478                                          const u32 all_groups)
1479 {
1480         u32 bit_chk;
1481         return rw_mgr_mem_calibrate_read_test(0, grp, num_tries, all_correct,
1482                                               &bit_chk, all_groups, 1);
1483 }
1484
1485 /**
1486  * rw_mgr_incr_vfifo() - Increase VFIFO value
1487  * @grp:        Read/Write group
1488  *
1489  * Increase VFIFO value.
1490  */
1491 static void rw_mgr_incr_vfifo(const u32 grp)
1492 {
1493         writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy);
1494 }
1495
1496 /**
1497  * rw_mgr_decr_vfifo() - Decrease VFIFO value
1498  * @grp:        Read/Write group
1499  *
1500  * Decrease VFIFO value.
1501  */
1502 static void rw_mgr_decr_vfifo(const u32 grp)
1503 {
1504         u32 i;
1505
1506         for (i = 0; i < VFIFO_SIZE - 1; i++)
1507                 rw_mgr_incr_vfifo(grp);
1508 }
1509
1510 /**
1511  * find_vfifo_failing_read() - Push VFIFO to get a failing read
1512  * @grp:        Read/Write group
1513  *
1514  * Push VFIFO until a failing read happens.
1515  */
1516 static int find_vfifo_failing_read(const u32 grp)
1517 {
1518         u32 v, ret, fail_cnt = 0;
1519
1520         for (v = 0; v < VFIFO_SIZE; v++) {
1521                 debug_cond(DLEVEL == 2, "%s:%d: vfifo %u\n",
1522                            __func__, __LINE__, v);
1523                 ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1524                                                 PASS_ONE_BIT, 0);
1525                 if (!ret) {
1526                         fail_cnt++;
1527
1528                         if (fail_cnt == 2)
1529                                 return v;
1530                 }
1531
1532                 /* Fiddle with FIFO. */
1533                 rw_mgr_incr_vfifo(grp);
1534         }
1535
1536         /* No failing read found! Something must have gone wrong. */
1537         debug_cond(DLEVEL == 2, "%s:%d: vfifo failed\n", __func__, __LINE__);
1538         return 0;
1539 }
1540
1541 /**
1542  * sdr_find_phase_delay() - Find DQS enable phase or delay
1543  * @working:    If 1, look for working phase/delay, if 0, look for non-working
1544  * @delay:      If 1, look for delay, if 0, look for phase
1545  * @grp:        Read/Write group
1546  * @work:       Working window position
1547  * @work_inc:   Working window increment
1548  * @pd:         DQS Phase/Delay Iterator
1549  *
1550  * Find working or non-working DQS enable phase setting.
1551  */
1552 static int sdr_find_phase_delay(int working, int delay, const u32 grp,
1553                                 u32 *work, const u32 work_inc, u32 *pd)
1554 {
1555         const u32 max = delay ? IO_DQS_EN_DELAY_MAX : IO_DQS_EN_PHASE_MAX;
1556         u32 ret;
1557
1558         for (; *pd <= max; (*pd)++) {
1559                 if (delay)
1560                         scc_mgr_set_dqs_en_delay_all_ranks(grp, *pd);
1561                 else
1562                         scc_mgr_set_dqs_en_phase_all_ranks(grp, *pd);
1563
1564                 ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1565                                         PASS_ONE_BIT, 0);
1566                 if (!working)
1567                         ret = !ret;
1568
1569                 if (ret)
1570                         return 0;
1571
1572                 if (work)
1573                         *work += work_inc;
1574         }
1575
1576         return -EINVAL;
1577 }
1578 /**
1579  * sdr_find_phase() - Find DQS enable phase
1580  * @working:    If 1, look for working phase, if 0, look for non-working phase
1581  * @grp:        Read/Write group
1582  * @work:       Working window position
1583  * @i:          Iterator
1584  * @p:          DQS Phase Iterator
1585  *
1586  * Find working or non-working DQS enable phase setting.
1587  */
1588 static int sdr_find_phase(int working, const u32 grp, u32 *work,
1589                           u32 *i, u32 *p)
1590 {
1591         const u32 end = VFIFO_SIZE + (working ? 0 : 1);
1592         int ret;
1593
1594         for (; *i < end; (*i)++) {
1595                 if (working)
1596                         *p = 0;
1597
1598                 ret = sdr_find_phase_delay(working, 0, grp, work,
1599                                            IO_DELAY_PER_OPA_TAP, p);
1600                 if (!ret)
1601                         return 0;
1602
1603                 if (*p > IO_DQS_EN_PHASE_MAX) {
1604                         /* Fiddle with FIFO. */
1605                         rw_mgr_incr_vfifo(grp);
1606                         if (!working)
1607                                 *p = 0;
1608                 }
1609         }
1610
1611         return -EINVAL;
1612 }
1613
1614 /**
1615  * sdr_working_phase() - Find working DQS enable phase
1616  * @grp:        Read/Write group
1617  * @work_bgn:   Working window start position
1618  * @d:          dtaps output value
1619  * @p:          DQS Phase Iterator
1620  * @i:          Iterator
1621  *
1622  * Find working DQS enable phase setting.
1623  */
1624 static int sdr_working_phase(const u32 grp, u32 *work_bgn, u32 *d,
1625                              u32 *p, u32 *i)
1626 {
1627         const u32 dtaps_per_ptap = IO_DELAY_PER_OPA_TAP /
1628                                    IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1629         int ret;
1630
1631         *work_bgn = 0;
1632
1633         for (*d = 0; *d <= dtaps_per_ptap; (*d)++) {
1634                 *i = 0;
1635                 scc_mgr_set_dqs_en_delay_all_ranks(grp, *d);
1636                 ret = sdr_find_phase(1, grp, work_bgn, i, p);
1637                 if (!ret)
1638                         return 0;
1639                 *work_bgn += IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1640         }
1641
1642         /* Cannot find working solution */
1643         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/ptap/dtap\n",
1644                    __func__, __LINE__);
1645         return -EINVAL;
1646 }
1647
1648 /**
1649  * sdr_backup_phase() - Find DQS enable backup phase
1650  * @grp:        Read/Write group
1651  * @work_bgn:   Working window start position
1652  * @p:          DQS Phase Iterator
1653  *
1654  * Find DQS enable backup phase setting.
1655  */
1656 static void sdr_backup_phase(const u32 grp, u32 *work_bgn, u32 *p)
1657 {
1658         u32 tmp_delay, d;
1659         int ret;
1660
1661         /* Special case code for backing up a phase */
1662         if (*p == 0) {
1663                 *p = IO_DQS_EN_PHASE_MAX;
1664                 rw_mgr_decr_vfifo(grp);
1665         } else {
1666                 (*p)--;
1667         }
1668         tmp_delay = *work_bgn - IO_DELAY_PER_OPA_TAP;
1669         scc_mgr_set_dqs_en_phase_all_ranks(grp, *p);
1670
1671         for (d = 0; d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_bgn; d++) {
1672                 scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1673
1674                 ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1675                                         PASS_ONE_BIT, 0);
1676                 if (ret) {
1677                         *work_bgn = tmp_delay;
1678                         break;
1679                 }
1680
1681                 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1682         }
1683
1684         /* Restore VFIFO to old state before we decremented it (if needed). */
1685         (*p)++;
1686         if (*p > IO_DQS_EN_PHASE_MAX) {
1687                 *p = 0;
1688                 rw_mgr_incr_vfifo(grp);
1689         }
1690
1691         scc_mgr_set_dqs_en_delay_all_ranks(grp, 0);
1692 }
1693
1694 /**
1695  * sdr_nonworking_phase() - Find non-working DQS enable phase
1696  * @grp:        Read/Write group
1697  * @work_end:   Working window end position
1698  * @p:          DQS Phase Iterator
1699  * @i:          Iterator
1700  *
1701  * Find non-working DQS enable phase setting.
1702  */
1703 static int sdr_nonworking_phase(const u32 grp, u32 *work_end, u32 *p, u32 *i)
1704 {
1705         int ret;
1706
1707         (*p)++;
1708         *work_end += IO_DELAY_PER_OPA_TAP;
1709         if (*p > IO_DQS_EN_PHASE_MAX) {
1710                 /* Fiddle with FIFO. */
1711                 *p = 0;
1712                 rw_mgr_incr_vfifo(grp);
1713         }
1714
1715         ret = sdr_find_phase(0, grp, work_end, i, p);
1716         if (ret) {
1717                 /* Cannot see edge of failing read. */
1718                 debug_cond(DLEVEL == 2, "%s:%d: end: failed\n",
1719                            __func__, __LINE__);
1720         }
1721
1722         return ret;
1723 }
1724
1725 /**
1726  * sdr_find_window_center() - Find center of the working DQS window.
1727  * @grp:        Read/Write group
1728  * @work_bgn:   First working settings
1729  * @work_end:   Last working settings
1730  *
1731  * Find center of the working DQS enable window.
1732  */
1733 static int sdr_find_window_center(const u32 grp, const u32 work_bgn,
1734                                   const u32 work_end)
1735 {
1736         u32 work_mid;
1737         int tmp_delay = 0;
1738         int i, p, d;
1739
1740         work_mid = (work_bgn + work_end) / 2;
1741
1742         debug_cond(DLEVEL == 2, "work_bgn=%d work_end=%d work_mid=%d\n",
1743                    work_bgn, work_end, work_mid);
1744         /* Get the middle delay to be less than a VFIFO delay */
1745         tmp_delay = (IO_DQS_EN_PHASE_MAX + 1) * IO_DELAY_PER_OPA_TAP;
1746
1747         debug_cond(DLEVEL == 2, "vfifo ptap delay %d\n", tmp_delay);
1748         work_mid %= tmp_delay;
1749         debug_cond(DLEVEL == 2, "new work_mid %d\n", work_mid);
1750
1751         tmp_delay = rounddown(work_mid, IO_DELAY_PER_OPA_TAP);
1752         if (tmp_delay > IO_DQS_EN_PHASE_MAX * IO_DELAY_PER_OPA_TAP)
1753                 tmp_delay = IO_DQS_EN_PHASE_MAX * IO_DELAY_PER_OPA_TAP;
1754         p = tmp_delay / IO_DELAY_PER_OPA_TAP;
1755
1756         debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", p, tmp_delay);
1757
1758         d = DIV_ROUND_UP(work_mid - tmp_delay, IO_DELAY_PER_DQS_EN_DCHAIN_TAP);
1759         if (d > IO_DQS_EN_DELAY_MAX)
1760                 d = IO_DQS_EN_DELAY_MAX;
1761         tmp_delay += d * IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1762
1763         debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", d, tmp_delay);
1764
1765         scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1766         scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1767
1768         /*
1769          * push vfifo until we can successfully calibrate. We can do this
1770          * because the largest possible margin in 1 VFIFO cycle.
1771          */
1772         for (i = 0; i < VFIFO_SIZE; i++) {
1773                 debug_cond(DLEVEL == 2, "find_dqs_en_phase: center\n");
1774                 if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1775                                                              PASS_ONE_BIT,
1776                                                              0)) {
1777                         debug_cond(DLEVEL == 2,
1778                                    "%s:%d center: found: ptap=%u dtap=%u\n",
1779                                    __func__, __LINE__, p, d);
1780                         return 0;
1781                 }
1782
1783                 /* Fiddle with FIFO. */
1784                 rw_mgr_incr_vfifo(grp);
1785         }
1786
1787         debug_cond(DLEVEL == 2, "%s:%d center: failed.\n",
1788                    __func__, __LINE__);
1789         return -EINVAL;
1790 }
1791
1792 /**
1793  * rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase() - Find a good DQS enable to use
1794  * @grp:        Read/Write Group
1795  *
1796  * Find a good DQS enable to use.
1797  */
1798 static int rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(const u32 grp)
1799 {
1800         u32 d, p, i;
1801         u32 dtaps_per_ptap;
1802         u32 work_bgn, work_end;
1803         u32 found_passing_read, found_failing_read, initial_failing_dtap;
1804         int ret;
1805
1806         debug("%s:%d %u\n", __func__, __LINE__, grp);
1807
1808         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
1809
1810         scc_mgr_set_dqs_en_delay_all_ranks(grp, 0);
1811         scc_mgr_set_dqs_en_phase_all_ranks(grp, 0);
1812
1813         /* Step 0: Determine number of delay taps for each phase tap. */
1814         dtaps_per_ptap = IO_DELAY_PER_OPA_TAP / IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1815
1816         /* Step 1: First push vfifo until we get a failing read. */
1817         find_vfifo_failing_read(grp);
1818
1819         /* Step 2: Find first working phase, increment in ptaps. */
1820         work_bgn = 0;
1821         ret = sdr_working_phase(grp, &work_bgn, &d, &p, &i);
1822         if (ret)
1823                 return ret;
1824
1825         work_end = work_bgn;
1826
1827         /*
1828          * If d is 0 then the working window covers a phase tap and we can
1829          * follow the old procedure. Otherwise, we've found the beginning
1830          * and we need to increment the dtaps until we find the end.
1831          */
1832         if (d == 0) {
1833                 /*
1834                  * Step 3a: If we have room, back off by one and
1835                  *          increment in dtaps.
1836                  */
1837                 sdr_backup_phase(grp, &work_bgn, &p);
1838
1839                 /*
1840                  * Step 4a: go forward from working phase to non working
1841                  * phase, increment in ptaps.
1842                  */
1843                 ret = sdr_nonworking_phase(grp, &work_end, &p, &i);
1844                 if (ret)
1845                         return ret;
1846
1847                 /* Step 5a: Back off one from last, increment in dtaps. */
1848
1849                 /* Special case code for backing up a phase */
1850                 if (p == 0) {
1851                         p = IO_DQS_EN_PHASE_MAX;
1852                         rw_mgr_decr_vfifo(grp);
1853                 } else {
1854                         p = p - 1;
1855                 }
1856
1857                 work_end -= IO_DELAY_PER_OPA_TAP;
1858                 scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1859
1860                 d = 0;
1861
1862                 debug_cond(DLEVEL == 2, "%s:%d p: ptap=%u\n",
1863                            __func__, __LINE__, p);
1864         }
1865
1866         /* The dtap increment to find the failing edge is done here. */
1867         sdr_find_phase_delay(0, 1, grp, &work_end,
1868                              IO_DELAY_PER_DQS_EN_DCHAIN_TAP, &d);
1869
1870         /* Go back to working dtap */
1871         if (d != 0)
1872                 work_end -= IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1873
1874         debug_cond(DLEVEL == 2,
1875                    "%s:%d p/d: ptap=%u dtap=%u end=%u\n",
1876                    __func__, __LINE__, p, d - 1, work_end);
1877
1878         if (work_end < work_bgn) {
1879                 /* nil range */
1880                 debug_cond(DLEVEL == 2, "%s:%d end-2: failed\n",
1881                            __func__, __LINE__);
1882                 return -EINVAL;
1883         }
1884
1885         debug_cond(DLEVEL == 2, "%s:%d found range [%u,%u]\n",
1886                    __func__, __LINE__, work_bgn, work_end);
1887
1888         /*
1889          * We need to calculate the number of dtaps that equal a ptap.
1890          * To do that we'll back up a ptap and re-find the edge of the
1891          * window using dtaps
1892          */
1893         debug_cond(DLEVEL == 2, "%s:%d calculate dtaps_per_ptap for tracking\n",
1894                    __func__, __LINE__);
1895
1896         /* Special case code for backing up a phase */
1897         if (p == 0) {
1898                 p = IO_DQS_EN_PHASE_MAX;
1899                 rw_mgr_decr_vfifo(grp);
1900                 debug_cond(DLEVEL == 2, "%s:%d backedup cycle/phase: p=%u\n",
1901                            __func__, __LINE__, p);
1902         } else {
1903                 p = p - 1;
1904                 debug_cond(DLEVEL == 2, "%s:%d backedup phase only: p=%u",
1905                            __func__, __LINE__, p);
1906         }
1907
1908         scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1909
1910         /*
1911          * Increase dtap until we first see a passing read (in case the
1912          * window is smaller than a ptap), and then a failing read to
1913          * mark the edge of the window again.
1914          */
1915
1916         /* Find a passing read. */
1917         debug_cond(DLEVEL == 2, "%s:%d find passing read\n",
1918                    __func__, __LINE__);
1919
1920         initial_failing_dtap = d;
1921
1922         found_passing_read = !sdr_find_phase_delay(1, 1, grp, NULL, 0, &d);
1923         if (found_passing_read) {
1924                 /* Find a failing read. */
1925                 debug_cond(DLEVEL == 2, "%s:%d find failing read\n",
1926                            __func__, __LINE__);
1927                 d++;
1928                 found_failing_read = !sdr_find_phase_delay(0, 1, grp, NULL, 0,
1929                                                            &d);
1930         } else {
1931                 debug_cond(DLEVEL == 1,
1932                            "%s:%d failed to calculate dtaps per ptap. Fall back on static value\n",
1933                            __func__, __LINE__);
1934         }
1935
1936         /*
1937          * The dynamically calculated dtaps_per_ptap is only valid if we
1938          * found a passing/failing read. If we didn't, it means d hit the max
1939          * (IO_DQS_EN_DELAY_MAX). Otherwise, dtaps_per_ptap retains its
1940          * statically calculated value.
1941          */
1942         if (found_passing_read && found_failing_read)
1943                 dtaps_per_ptap = d - initial_failing_dtap;
1944
1945         writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap);
1946         debug_cond(DLEVEL == 2, "%s:%d dtaps_per_ptap=%u - %u = %u",
1947                    __func__, __LINE__, d, initial_failing_dtap, dtaps_per_ptap);
1948
1949         /* Step 6: Find the centre of the window. */
1950         ret = sdr_find_window_center(grp, work_bgn, work_end);
1951
1952         return ret;
1953 }
1954
1955 /**
1956  * search_stop_check() - Check if the detected edge is valid
1957  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
1958  * @d:                  DQS delay
1959  * @rank_bgn:           Rank number
1960  * @write_group:        Write Group
1961  * @read_group:         Read Group
1962  * @bit_chk:            Resulting bit mask after the test
1963  * @sticky_bit_chk:     Resulting sticky bit mask after the test
1964  * @use_read_test:      Perform read test
1965  *
1966  * Test if the found edge is valid.
1967  */
1968 static u32 search_stop_check(const int write, const int d, const int rank_bgn,
1969                              const u32 write_group, const u32 read_group,
1970                              u32 *bit_chk, u32 *sticky_bit_chk,
1971                              const u32 use_read_test)
1972 {
1973         const u32 ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
1974                           RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
1975         const u32 correct_mask = write ? param->write_correct_mask :
1976                                          param->read_correct_mask;
1977         const u32 per_dqs = write ? RW_MGR_MEM_DQ_PER_WRITE_DQS :
1978                                     RW_MGR_MEM_DQ_PER_READ_DQS;
1979         u32 ret;
1980         /*
1981          * Stop searching when the read test doesn't pass AND when
1982          * we've seen a passing read on every bit.
1983          */
1984         if (write) {                    /* WRITE-ONLY */
1985                 ret = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
1986                                                          0, PASS_ONE_BIT,
1987                                                          bit_chk, 0);
1988         } else if (use_read_test) {     /* READ-ONLY */
1989                 ret = !rw_mgr_mem_calibrate_read_test(rank_bgn, read_group,
1990                                                         NUM_READ_PB_TESTS,
1991                                                         PASS_ONE_BIT, bit_chk,
1992                                                         0, 0);
1993         } else {                        /* READ-ONLY */
1994                 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 0,
1995                                                 PASS_ONE_BIT, bit_chk, 0);
1996                 *bit_chk = *bit_chk >> (per_dqs *
1997                         (read_group - (write_group * ratio)));
1998                 ret = (*bit_chk == 0);
1999         }
2000         *sticky_bit_chk = *sticky_bit_chk | *bit_chk;
2001         ret = ret && (*sticky_bit_chk == correct_mask);
2002         debug_cond(DLEVEL == 2,
2003                    "%s:%d center(left): dtap=%u => %u == %u && %u",
2004                    __func__, __LINE__, d,
2005                    *sticky_bit_chk, correct_mask, ret);
2006         return ret;
2007 }
2008
2009 /**
2010  * search_left_edge() - Find left edge of DQ/DQS working phase
2011  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2012  * @rank_bgn:           Rank number
2013  * @write_group:        Write Group
2014  * @read_group:         Read Group
2015  * @test_bgn:           Rank number to begin the test
2016  * @sticky_bit_chk:     Resulting sticky bit mask after the test
2017  * @left_edge:          Left edge of the DQ/DQS phase
2018  * @right_edge:         Right edge of the DQ/DQS phase
2019  * @use_read_test:      Perform read test
2020  *
2021  * Find left edge of DQ/DQS working phase.
2022  */
2023 static void search_left_edge(const int write, const int rank_bgn,
2024         const u32 write_group, const u32 read_group, const u32 test_bgn,
2025         u32 *sticky_bit_chk,
2026         int *left_edge, int *right_edge, const u32 use_read_test)
2027 {
2028         const u32 delay_max = write ? IO_IO_OUT1_DELAY_MAX : IO_IO_IN_DELAY_MAX;
2029         const u32 dqs_max = write ? IO_IO_OUT1_DELAY_MAX : IO_DQS_IN_DELAY_MAX;
2030         const u32 per_dqs = write ? RW_MGR_MEM_DQ_PER_WRITE_DQS :
2031                                     RW_MGR_MEM_DQ_PER_READ_DQS;
2032         u32 stop, bit_chk;
2033         int i, d;
2034
2035         for (d = 0; d <= dqs_max; d++) {
2036                 if (write)
2037                         scc_mgr_apply_group_dq_out1_delay(d);
2038                 else
2039                         scc_mgr_apply_group_dq_in_delay(test_bgn, d);
2040
2041                 writel(0, &sdr_scc_mgr->update);
2042
2043                 stop = search_stop_check(write, d, rank_bgn, write_group,
2044                                          read_group, &bit_chk, sticky_bit_chk,
2045                                          use_read_test);
2046                 if (stop == 1)
2047                         break;
2048
2049                 /* stop != 1 */
2050                 for (i = 0; i < per_dqs; i++) {
2051                         if (bit_chk & 1) {
2052                                 /*
2053                                  * Remember a passing test as
2054                                  * the left_edge.
2055                                  */
2056                                 left_edge[i] = d;
2057                         } else {
2058                                 /*
2059                                  * If a left edge has not been seen
2060                                  * yet, then a future passing test
2061                                  * will mark this edge as the right
2062                                  * edge.
2063                                  */
2064                                 if (left_edge[i] == delay_max + 1)
2065                                         right_edge[i] = -(d + 1);
2066                         }
2067                         bit_chk >>= 1;
2068                 }
2069         }
2070
2071         /* Reset DQ delay chains to 0 */
2072         if (write)
2073                 scc_mgr_apply_group_dq_out1_delay(0);
2074         else
2075                 scc_mgr_apply_group_dq_in_delay(test_bgn, 0);
2076
2077         *sticky_bit_chk = 0;
2078         for (i = per_dqs - 1; i >= 0; i--) {
2079                 debug_cond(DLEVEL == 2,
2080                            "%s:%d vfifo_center: left_edge[%u]: %d right_edge[%u]: %d\n",
2081                            __func__, __LINE__, i, left_edge[i],
2082                            i, right_edge[i]);
2083
2084                 /*
2085                  * Check for cases where we haven't found the left edge,
2086                  * which makes our assignment of the the right edge invalid.
2087                  * Reset it to the illegal value.
2088                  */
2089                 if ((left_edge[i] == delay_max + 1) &&
2090                     (right_edge[i] != delay_max + 1)) {
2091                         right_edge[i] = delay_max + 1;
2092                         debug_cond(DLEVEL == 2,
2093                                    "%s:%d vfifo_center: reset right_edge[%u]: %d\n",
2094                                    __func__, __LINE__, i, right_edge[i]);
2095                 }
2096
2097                 /*
2098                  * Reset sticky bit
2099                  * READ: except for bits where we have seen both
2100                  *       the left and right edge.
2101                  * WRITE: except for bits where we have seen the
2102                  *        left edge.
2103                  */
2104                 *sticky_bit_chk <<= 1;
2105                 if (write) {
2106                         if (left_edge[i] != delay_max + 1)
2107                                 *sticky_bit_chk |= 1;
2108                 } else {
2109                         if ((left_edge[i] != delay_max + 1) &&
2110                             (right_edge[i] != delay_max + 1))
2111                                 *sticky_bit_chk |= 1;
2112                 }
2113         }
2114
2115
2116 }
2117
2118 /**
2119  * search_right_edge() - Find right edge of DQ/DQS working phase
2120  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2121  * @rank_bgn:           Rank number
2122  * @write_group:        Write Group
2123  * @read_group:         Read Group
2124  * @start_dqs:          DQS start phase
2125  * @start_dqs_en:       DQS enable start phase
2126  * @sticky_bit_chk:     Resulting sticky bit mask after the test
2127  * @left_edge:          Left edge of the DQ/DQS phase
2128  * @right_edge:         Right edge of the DQ/DQS phase
2129  * @use_read_test:      Perform read test
2130  *
2131  * Find right edge of DQ/DQS working phase.
2132  */
2133 static int search_right_edge(const int write, const int rank_bgn,
2134         const u32 write_group, const u32 read_group,
2135         const int start_dqs, const int start_dqs_en,
2136         u32 *sticky_bit_chk,
2137         int *left_edge, int *right_edge, const u32 use_read_test)
2138 {
2139         const u32 delay_max = write ? IO_IO_OUT1_DELAY_MAX : IO_IO_IN_DELAY_MAX;
2140         const u32 dqs_max = write ? IO_IO_OUT1_DELAY_MAX : IO_DQS_IN_DELAY_MAX;
2141         const u32 per_dqs = write ? RW_MGR_MEM_DQ_PER_WRITE_DQS :
2142                                     RW_MGR_MEM_DQ_PER_READ_DQS;
2143         u32 stop, bit_chk;
2144         int i, d;
2145
2146         for (d = 0; d <= dqs_max - start_dqs; d++) {
2147                 if (write) {    /* WRITE-ONLY */
2148                         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
2149                                                                 d + start_dqs);
2150                 } else {        /* READ-ONLY */
2151                         scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs);
2152                         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2153                                 uint32_t delay = d + start_dqs_en;
2154                                 if (delay > IO_DQS_EN_DELAY_MAX)
2155                                         delay = IO_DQS_EN_DELAY_MAX;
2156                                 scc_mgr_set_dqs_en_delay(read_group, delay);
2157                         }
2158                         scc_mgr_load_dqs(read_group);
2159                 }
2160
2161                 writel(0, &sdr_scc_mgr->update);
2162
2163                 stop = search_stop_check(write, d, rank_bgn, write_group,
2164                                          read_group, &bit_chk, sticky_bit_chk,
2165                                          use_read_test);
2166                 if (stop == 1) {
2167                         if (write && (d == 0)) {        /* WRITE-ONLY */
2168                                 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2169                                         /*
2170                                          * d = 0 failed, but it passed when
2171                                          * testing the left edge, so it must be
2172                                          * marginal, set it to -1
2173                                          */
2174                                         if (right_edge[i] == delay_max + 1 &&
2175                                             left_edge[i] != delay_max + 1)
2176                                                 right_edge[i] = -1;
2177                                 }
2178                         }
2179                         break;
2180                 }
2181
2182                 /* stop != 1 */
2183                 for (i = 0; i < per_dqs; i++) {
2184                         if (bit_chk & 1) {
2185                                 /*
2186                                  * Remember a passing test as
2187                                  * the right_edge.
2188                                  */
2189                                 right_edge[i] = d;
2190                         } else {
2191                                 if (d != 0) {
2192                                         /*
2193                                          * If a right edge has not
2194                                          * been seen yet, then a future
2195                                          * passing test will mark this
2196                                          * edge as the left edge.
2197                                          */
2198                                         if (right_edge[i] == delay_max + 1)
2199                                                 left_edge[i] = -(d + 1);
2200                                 } else {
2201                                         /*
2202                                          * d = 0 failed, but it passed
2203                                          * when testing the left edge,
2204                                          * so it must be marginal, set
2205                                          * it to -1
2206                                          */
2207                                         if (right_edge[i] == delay_max + 1 &&
2208                                             left_edge[i] != delay_max + 1)
2209                                                 right_edge[i] = -1;
2210                                         /*
2211                                          * If a right edge has not been
2212                                          * seen yet, then a future
2213                                          * passing test will mark this
2214                                          * edge as the left edge.
2215                                          */
2216                                         else if (right_edge[i] == delay_max + 1)
2217                                                 left_edge[i] = -(d + 1);
2218                                 }
2219                         }
2220
2221                         debug_cond(DLEVEL == 2, "%s:%d center[r,d=%u]: ",
2222                                    __func__, __LINE__, d);
2223                         debug_cond(DLEVEL == 2,
2224                                    "bit_chk_test=%i left_edge[%u]: %d ",
2225                                    bit_chk & 1, i, left_edge[i]);
2226                         debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2227                                    right_edge[i]);
2228                         bit_chk >>= 1;
2229                 }
2230         }
2231
2232         /* Check that all bits have a window */
2233         for (i = 0; i < per_dqs; i++) {
2234                 debug_cond(DLEVEL == 2,
2235                            "%s:%d write_center: left_edge[%u]: %d right_edge[%u]: %d",
2236                            __func__, __LINE__, i, left_edge[i],
2237                            i, right_edge[i]);
2238                 if ((left_edge[i] == dqs_max + 1) ||
2239                     (right_edge[i] == dqs_max + 1))
2240                         return i + 1;   /* FIXME: If we fail, retval > 0 */
2241         }
2242
2243         return 0;
2244 }
2245
2246 /**
2247  * get_window_mid_index() - Find the best middle setting of DQ/DQS phase
2248  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2249  * @left_edge:          Left edge of the DQ/DQS phase
2250  * @right_edge:         Right edge of the DQ/DQS phase
2251  * @mid_min:            Best DQ/DQS phase middle setting
2252  *
2253  * Find index and value of the middle of the DQ/DQS working phase.
2254  */
2255 static int get_window_mid_index(const int write, int *left_edge,
2256                                 int *right_edge, int *mid_min)
2257 {
2258         const u32 per_dqs = write ? RW_MGR_MEM_DQ_PER_WRITE_DQS :
2259                                     RW_MGR_MEM_DQ_PER_READ_DQS;
2260         int i, mid, min_index;
2261
2262         /* Find middle of window for each DQ bit */
2263         *mid_min = left_edge[0] - right_edge[0];
2264         min_index = 0;
2265         for (i = 1; i < per_dqs; i++) {
2266                 mid = left_edge[i] - right_edge[i];
2267                 if (mid < *mid_min) {
2268                         *mid_min = mid;
2269                         min_index = i;
2270                 }
2271         }
2272
2273         /*
2274          * -mid_min/2 represents the amount that we need to move DQS.
2275          * If mid_min is odd and positive we'll need to add one to make
2276          * sure the rounding in further calculations is correct (always
2277          * bias to the right), so just add 1 for all positive values.
2278          */
2279         if (*mid_min > 0)
2280                 (*mid_min)++;
2281         *mid_min = *mid_min / 2;
2282
2283         debug_cond(DLEVEL == 1, "%s:%d vfifo_center: *mid_min=%d (index=%u)\n",
2284                    __func__, __LINE__, *mid_min, min_index);
2285         return min_index;
2286 }
2287
2288 /**
2289  * center_dq_windows() - Center the DQ/DQS windows
2290  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2291  * @left_edge:          Left edge of the DQ/DQS phase
2292  * @right_edge:         Right edge of the DQ/DQS phase
2293  * @mid_min:            Adjusted DQ/DQS phase middle setting
2294  * @orig_mid_min:       Original DQ/DQS phase middle setting
2295  * @min_index:          DQ/DQS phase middle setting index
2296  * @test_bgn:           Rank number to begin the test
2297  * @dq_margin:          Amount of shift for the DQ
2298  * @dqs_margin:         Amount of shift for the DQS
2299  *
2300  * Align the DQ/DQS windows in each group.
2301  */
2302 static void center_dq_windows(const int write, int *left_edge, int *right_edge,
2303                               const int mid_min, const int orig_mid_min,
2304                               const int min_index, const int test_bgn,
2305                               int *dq_margin, int *dqs_margin)
2306 {
2307         const u32 delay_max = write ? IO_IO_OUT1_DELAY_MAX : IO_IO_IN_DELAY_MAX;
2308         const u32 per_dqs = write ? RW_MGR_MEM_DQ_PER_WRITE_DQS :
2309                                     RW_MGR_MEM_DQ_PER_READ_DQS;
2310         const u32 delay_off = write ? SCC_MGR_IO_OUT1_DELAY_OFFSET :
2311                                       SCC_MGR_IO_IN_DELAY_OFFSET;
2312         const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | delay_off;
2313
2314         u32 temp_dq_io_delay1, temp_dq_io_delay2;
2315         int shift_dq, i, p;
2316
2317         /* Initialize data for export structures */
2318         *dqs_margin = delay_max + 1;
2319         *dq_margin  = delay_max + 1;
2320
2321         /* add delay to bring centre of all DQ windows to the same "level" */
2322         for (i = 0, p = test_bgn; i < per_dqs; i++, p++) {
2323                 /* Use values before divide by 2 to reduce round off error */
2324                 shift_dq = (left_edge[i] - right_edge[i] -
2325                         (left_edge[min_index] - right_edge[min_index]))/2  +
2326                         (orig_mid_min - mid_min);
2327
2328                 debug_cond(DLEVEL == 2,
2329                            "vfifo_center: before: shift_dq[%u]=%d\n",
2330                            i, shift_dq);
2331
2332                 temp_dq_io_delay1 = readl(addr + (p << 2));
2333                 temp_dq_io_delay2 = readl(addr + (i << 2));
2334
2335                 if (shift_dq + temp_dq_io_delay1 > delay_max)
2336                         shift_dq = delay_max - temp_dq_io_delay2;
2337                 else if (shift_dq + temp_dq_io_delay1 < 0)
2338                         shift_dq = -temp_dq_io_delay1;
2339
2340                 debug_cond(DLEVEL == 2,
2341                            "vfifo_center: after: shift_dq[%u]=%d\n",
2342                            i, shift_dq);
2343
2344                 if (write)
2345                         scc_mgr_set_dq_out1_delay(i, temp_dq_io_delay1 + shift_dq);
2346                 else
2347                         scc_mgr_set_dq_in_delay(p, temp_dq_io_delay1 + shift_dq);
2348
2349                 scc_mgr_load_dq(p);
2350
2351                 debug_cond(DLEVEL == 2,
2352                            "vfifo_center: margin[%u]=[%d,%d]\n", i,
2353                            left_edge[i] - shift_dq + (-mid_min),
2354                            right_edge[i] + shift_dq - (-mid_min));
2355
2356                 /* To determine values for export structures */
2357                 if (left_edge[i] - shift_dq + (-mid_min) < *dq_margin)
2358                         *dq_margin = left_edge[i] - shift_dq + (-mid_min);
2359
2360                 if (right_edge[i] + shift_dq - (-mid_min) < *dqs_margin)
2361                         *dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2362         }
2363
2364 }
2365
2366 /**
2367  * rw_mgr_mem_calibrate_vfifo_center() - Per-bit deskew DQ and centering
2368  * @rank_bgn:           Rank number
2369  * @rw_group:           Read/Write Group
2370  * @test_bgn:           Rank at which the test begins
2371  * @use_read_test:      Perform a read test
2372  * @update_fom:         Update FOM
2373  *
2374  * Per-bit deskew DQ and centering.
2375  */
2376 static int rw_mgr_mem_calibrate_vfifo_center(const u32 rank_bgn,
2377                         const u32 rw_group, const u32 test_bgn,
2378                         const int use_read_test, const int update_fom)
2379 {
2380         const u32 addr =
2381                 SDR_PHYGRP_SCCGRP_ADDRESS + SCC_MGR_DQS_IN_DELAY_OFFSET +
2382                 (rw_group << 2);
2383         /*
2384          * Store these as signed since there are comparisons with
2385          * signed numbers.
2386          */
2387         uint32_t sticky_bit_chk;
2388         int32_t left_edge[RW_MGR_MEM_DQ_PER_READ_DQS];
2389         int32_t right_edge[RW_MGR_MEM_DQ_PER_READ_DQS];
2390         int32_t orig_mid_min, mid_min;
2391         int32_t new_dqs, start_dqs, start_dqs_en, final_dqs_en;
2392         int32_t dq_margin, dqs_margin;
2393         int i, min_index;
2394         int ret;
2395
2396         debug("%s:%d: %u %u", __func__, __LINE__, rw_group, test_bgn);
2397
2398         start_dqs = readl(addr);
2399         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS)
2400                 start_dqs_en = readl(addr - IO_DQS_EN_DELAY_OFFSET);
2401
2402         /* set the left and right edge of each bit to an illegal value */
2403         /* use (IO_IO_IN_DELAY_MAX + 1) as an illegal value */
2404         sticky_bit_chk = 0;
2405         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
2406                 left_edge[i]  = IO_IO_IN_DELAY_MAX + 1;
2407                 right_edge[i] = IO_IO_IN_DELAY_MAX + 1;
2408         }
2409
2410         /* Search for the left edge of the window for each bit */
2411         search_left_edge(0, rank_bgn, rw_group, rw_group, test_bgn,
2412                          &sticky_bit_chk,
2413                          left_edge, right_edge, use_read_test);
2414
2415
2416         /* Search for the right edge of the window for each bit */
2417         ret = search_right_edge(0, rank_bgn, rw_group, rw_group,
2418                                 start_dqs, start_dqs_en,
2419                                 &sticky_bit_chk,
2420                                 left_edge, right_edge, use_read_test);
2421         if (ret) {
2422                 /*
2423                  * Restore delay chain settings before letting the loop
2424                  * in rw_mgr_mem_calibrate_vfifo to retry different
2425                  * dqs/ck relationships.
2426                  */
2427                 scc_mgr_set_dqs_bus_in_delay(rw_group, start_dqs);
2428                 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS)
2429                         scc_mgr_set_dqs_en_delay(rw_group, start_dqs_en);
2430
2431                 scc_mgr_load_dqs(rw_group);
2432                 writel(0, &sdr_scc_mgr->update);
2433
2434                 debug_cond(DLEVEL == 1,
2435                            "%s:%d vfifo_center: failed to find edge [%u]: %d %d",
2436                            __func__, __LINE__, i, left_edge[i], right_edge[i]);
2437                 if (use_read_test) {
2438                         set_failing_group_stage(rw_group *
2439                                 RW_MGR_MEM_DQ_PER_READ_DQS + i,
2440                                 CAL_STAGE_VFIFO,
2441                                 CAL_SUBSTAGE_VFIFO_CENTER);
2442                 } else {
2443                         set_failing_group_stage(rw_group *
2444                                 RW_MGR_MEM_DQ_PER_READ_DQS + i,
2445                                 CAL_STAGE_VFIFO_AFTER_WRITES,
2446                                 CAL_SUBSTAGE_VFIFO_CENTER);
2447                 }
2448                 return -EIO;
2449         }
2450
2451         min_index = get_window_mid_index(0, left_edge, right_edge, &mid_min);
2452
2453         /* Determine the amount we can change DQS (which is -mid_min) */
2454         orig_mid_min = mid_min;
2455         new_dqs = start_dqs - mid_min;
2456         if (new_dqs > IO_DQS_IN_DELAY_MAX)
2457                 new_dqs = IO_DQS_IN_DELAY_MAX;
2458         else if (new_dqs < 0)
2459                 new_dqs = 0;
2460
2461         mid_min = start_dqs - new_dqs;
2462         debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n",
2463                    mid_min, new_dqs);
2464
2465         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2466                 if (start_dqs_en - mid_min > IO_DQS_EN_DELAY_MAX)
2467                         mid_min += start_dqs_en - mid_min - IO_DQS_EN_DELAY_MAX;
2468                 else if (start_dqs_en - mid_min < 0)
2469                         mid_min += start_dqs_en - mid_min;
2470         }
2471         new_dqs = start_dqs - mid_min;
2472
2473         debug_cond(DLEVEL == 1,
2474                    "vfifo_center: start_dqs=%d start_dqs_en=%d new_dqs=%d mid_min=%d\n",
2475                    start_dqs,
2476                    IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1,
2477                    new_dqs, mid_min);
2478
2479         /* Add delay to bring centre of all DQ windows to the same "level". */
2480         center_dq_windows(0, left_edge, right_edge, mid_min, orig_mid_min,
2481                           min_index, test_bgn, &dq_margin, &dqs_margin);
2482
2483         /* Move DQS-en */
2484         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2485                 final_dqs_en = start_dqs_en - mid_min;
2486                 scc_mgr_set_dqs_en_delay(rw_group, final_dqs_en);
2487                 scc_mgr_load_dqs(rw_group);
2488         }
2489
2490         /* Move DQS */
2491         scc_mgr_set_dqs_bus_in_delay(rw_group, new_dqs);
2492         scc_mgr_load_dqs(rw_group);
2493         debug_cond(DLEVEL == 2,
2494                    "%s:%d vfifo_center: dq_margin=%d dqs_margin=%d",
2495                    __func__, __LINE__, dq_margin, dqs_margin);
2496
2497         /*
2498          * Do not remove this line as it makes sure all of our decisions
2499          * have been applied. Apply the update bit.
2500          */
2501         writel(0, &sdr_scc_mgr->update);
2502
2503         if ((dq_margin < 0) || (dqs_margin < 0))
2504                 return -EINVAL;
2505
2506         return 0;
2507 }
2508
2509 /**
2510  * rw_mgr_mem_calibrate_guaranteed_write() - Perform guaranteed write into the device
2511  * @rw_group:   Read/Write Group
2512  * @phase:      DQ/DQS phase
2513  *
2514  * Because initially no communication ca be reliably performed with the memory
2515  * device, the sequencer uses a guaranteed write mechanism to write data into
2516  * the memory device.
2517  */
2518 static int rw_mgr_mem_calibrate_guaranteed_write(const u32 rw_group,
2519                                                  const u32 phase)
2520 {
2521         int ret;
2522
2523         /* Set a particular DQ/DQS phase. */
2524         scc_mgr_set_dqdqs_output_phase_all_ranks(rw_group, phase);
2525
2526         debug_cond(DLEVEL == 1, "%s:%d guaranteed write: g=%u p=%u\n",
2527                    __func__, __LINE__, rw_group, phase);
2528
2529         /*
2530          * Altera EMI_RM 2015.05.04 :: Figure 1-25
2531          * Load up the patterns used by read calibration using the
2532          * current DQDQS phase.
2533          */
2534         rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2535
2536         if (gbl->phy_debug_mode_flags & PHY_DEBUG_DISABLE_GUARANTEED_READ)
2537                 return 0;
2538
2539         /*
2540          * Altera EMI_RM 2015.05.04 :: Figure 1-26
2541          * Back-to-Back reads of the patterns used for calibration.
2542          */
2543         ret = rw_mgr_mem_calibrate_read_test_patterns(0, rw_group, 1);
2544         if (ret)
2545                 debug_cond(DLEVEL == 1,
2546                            "%s:%d Guaranteed read test failed: g=%u p=%u\n",
2547                            __func__, __LINE__, rw_group, phase);
2548         return ret;
2549 }
2550
2551 /**
2552  * rw_mgr_mem_calibrate_dqs_enable_calibration() - DQS Enable Calibration
2553  * @rw_group:   Read/Write Group
2554  * @test_bgn:   Rank at which the test begins
2555  *
2556  * DQS enable calibration ensures reliable capture of the DQ signal without
2557  * glitches on the DQS line.
2558  */
2559 static int rw_mgr_mem_calibrate_dqs_enable_calibration(const u32 rw_group,
2560                                                        const u32 test_bgn)
2561 {
2562         /*
2563          * Altera EMI_RM 2015.05.04 :: Figure 1-27
2564          * DQS and DQS Eanble Signal Relationships.
2565          */
2566
2567         /* We start at zero, so have one less dq to devide among */
2568         const u32 delay_step = IO_IO_IN_DELAY_MAX /
2569                                (RW_MGR_MEM_DQ_PER_READ_DQS - 1);
2570         int ret;
2571         u32 i, p, d, r;
2572
2573         debug("%s:%d (%u,%u)\n", __func__, __LINE__, rw_group, test_bgn);
2574
2575         /* Try different dq_in_delays since the DQ path is shorter than DQS. */
2576         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
2577              r += NUM_RANKS_PER_SHADOW_REG) {
2578                 for (i = 0, p = test_bgn, d = 0;
2579                      i < RW_MGR_MEM_DQ_PER_READ_DQS;
2580                      i++, p++, d += delay_step) {
2581                         debug_cond(DLEVEL == 1,
2582                                    "%s:%d: g=%u r=%u i=%u p=%u d=%u\n",
2583                                    __func__, __LINE__, rw_group, r, i, p, d);
2584
2585                         scc_mgr_set_dq_in_delay(p, d);
2586                         scc_mgr_load_dq(p);
2587                 }
2588
2589                 writel(0, &sdr_scc_mgr->update);
2590         }
2591
2592         /*
2593          * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different
2594          * dq_in_delay values
2595          */
2596         ret = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(rw_group);
2597
2598         debug_cond(DLEVEL == 1,
2599                    "%s:%d: g=%u found=%u; Reseting delay chain to zero\n",
2600                    __func__, __LINE__, rw_group, !ret);
2601
2602         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
2603              r += NUM_RANKS_PER_SHADOW_REG) {
2604                 scc_mgr_apply_group_dq_in_delay(test_bgn, 0);
2605                 writel(0, &sdr_scc_mgr->update);
2606         }
2607
2608         return ret;
2609 }
2610
2611 /**
2612  * rw_mgr_mem_calibrate_dq_dqs_centering() - Centering DQ/DQS
2613  * @rw_group:           Read/Write Group
2614  * @test_bgn:           Rank at which the test begins
2615  * @use_read_test:      Perform a read test
2616  * @update_fom:         Update FOM
2617  *
2618  * The centerin DQ/DQS stage attempts to align DQ and DQS signals on reads
2619  * within a group.
2620  */
2621 static int
2622 rw_mgr_mem_calibrate_dq_dqs_centering(const u32 rw_group, const u32 test_bgn,
2623                                       const int use_read_test,
2624                                       const int update_fom)
2625
2626 {
2627         int ret, grp_calibrated;
2628         u32 rank_bgn, sr;
2629
2630         /*
2631          * Altera EMI_RM 2015.05.04 :: Figure 1-28
2632          * Read per-bit deskew can be done on a per shadow register basis.
2633          */
2634         grp_calibrated = 1;
2635         for (rank_bgn = 0, sr = 0;
2636              rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
2637              rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) {
2638                 ret = rw_mgr_mem_calibrate_vfifo_center(rank_bgn, rw_group,
2639                                                         test_bgn,
2640                                                         use_read_test,
2641                                                         update_fom);
2642                 if (!ret)
2643                         continue;
2644
2645                 grp_calibrated = 0;
2646         }
2647
2648         if (!grp_calibrated)
2649                 return -EIO;
2650
2651         return 0;
2652 }
2653
2654 /**
2655  * rw_mgr_mem_calibrate_vfifo() - Calibrate the read valid prediction FIFO
2656  * @rw_group:           Read/Write Group
2657  * @test_bgn:           Rank at which the test begins
2658  *
2659  * Stage 1: Calibrate the read valid prediction FIFO.
2660  *
2661  * This function implements UniPHY calibration Stage 1, as explained in
2662  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2663  *
2664  * - read valid prediction will consist of finding:
2665  *   - DQS enable phase and DQS enable delay (DQS Enable Calibration)
2666  *   - DQS input phase  and DQS input delay (DQ/DQS Centering)
2667  *  - we also do a per-bit deskew on the DQ lines.
2668  */
2669 static int rw_mgr_mem_calibrate_vfifo(const u32 rw_group, const u32 test_bgn)
2670 {
2671         uint32_t p, d;
2672         uint32_t dtaps_per_ptap;
2673         uint32_t failed_substage;
2674
2675         int ret;
2676
2677         debug("%s:%d: %u %u\n", __func__, __LINE__, rw_group, test_bgn);
2678
2679         /* Update info for sims */
2680         reg_file_set_group(rw_group);
2681         reg_file_set_stage(CAL_STAGE_VFIFO);
2682         reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ);
2683
2684         failed_substage = CAL_SUBSTAGE_GUARANTEED_READ;
2685
2686         /* USER Determine number of delay taps for each phase tap. */
2687         dtaps_per_ptap = DIV_ROUND_UP(IO_DELAY_PER_OPA_TAP,
2688                                       IO_DELAY_PER_DQS_EN_DCHAIN_TAP) - 1;
2689
2690         for (d = 0; d <= dtaps_per_ptap; d += 2) {
2691                 /*
2692                  * In RLDRAMX we may be messing the delay of pins in
2693                  * the same write rw_group but outside of the current read
2694                  * the rw_group, but that's ok because we haven't calibrated
2695                  * output side yet.
2696                  */
2697                 if (d > 0) {
2698                         scc_mgr_apply_group_all_out_delay_add_all_ranks(
2699                                                                 rw_group, d);
2700                 }
2701
2702                 for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX; p++) {
2703                         /* 1) Guaranteed Write */
2704                         ret = rw_mgr_mem_calibrate_guaranteed_write(rw_group, p);
2705                         if (ret)
2706                                 break;
2707
2708                         /* 2) DQS Enable Calibration */
2709                         ret = rw_mgr_mem_calibrate_dqs_enable_calibration(rw_group,
2710                                                                           test_bgn);
2711                         if (ret) {
2712                                 failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE;
2713                                 continue;
2714                         }
2715
2716                         /* 3) Centering DQ/DQS */
2717                         /*
2718                          * If doing read after write calibration, do not update
2719                          * FOM now. Do it then.
2720                          */
2721                         ret = rw_mgr_mem_calibrate_dq_dqs_centering(rw_group,
2722                                                                 test_bgn, 1, 0);
2723                         if (ret) {
2724                                 failed_substage = CAL_SUBSTAGE_VFIFO_CENTER;
2725                                 continue;
2726                         }
2727
2728                         /* All done. */
2729                         goto cal_done_ok;
2730                 }
2731         }
2732
2733         /* Calibration Stage 1 failed. */
2734         set_failing_group_stage(rw_group, CAL_STAGE_VFIFO, failed_substage);
2735         return 0;
2736
2737         /* Calibration Stage 1 completed OK. */
2738 cal_done_ok:
2739         /*
2740          * Reset the delay chains back to zero if they have moved > 1
2741          * (check for > 1 because loop will increase d even when pass in
2742          * first case).
2743          */
2744         if (d > 2)
2745                 scc_mgr_zero_group(rw_group, 1);
2746
2747         return 1;
2748 }
2749
2750 /**
2751  * rw_mgr_mem_calibrate_vfifo_end() - DQ/DQS Centering.
2752  * @rw_group:           Read/Write Group
2753  * @test_bgn:           Rank at which the test begins
2754  *
2755  * Stage 3: DQ/DQS Centering.
2756  *
2757  * This function implements UniPHY calibration Stage 3, as explained in
2758  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2759  */
2760 static int rw_mgr_mem_calibrate_vfifo_end(const u32 rw_group,
2761                                           const u32 test_bgn)
2762 {
2763         int ret;
2764
2765         debug("%s:%d %u %u", __func__, __LINE__, rw_group, test_bgn);
2766
2767         /* Update info for sims. */
2768         reg_file_set_group(rw_group);
2769         reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES);
2770         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
2771
2772         ret = rw_mgr_mem_calibrate_dq_dqs_centering(rw_group, test_bgn, 0, 1);
2773         if (ret)
2774                 set_failing_group_stage(rw_group,
2775                                         CAL_STAGE_VFIFO_AFTER_WRITES,
2776                                         CAL_SUBSTAGE_VFIFO_CENTER);
2777         return ret;
2778 }
2779
2780 /**
2781  * rw_mgr_mem_calibrate_lfifo() - Minimize latency
2782  *
2783  * Stage 4: Minimize latency.
2784  *
2785  * This function implements UniPHY calibration Stage 4, as explained in
2786  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2787  * Calibrate LFIFO to find smallest read latency.
2788  */
2789 static uint32_t rw_mgr_mem_calibrate_lfifo(void)
2790 {
2791         int found_one = 0;
2792
2793         debug("%s:%d\n", __func__, __LINE__);
2794
2795         /* Update info for sims. */
2796         reg_file_set_stage(CAL_STAGE_LFIFO);
2797         reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY);
2798
2799         /* Load up the patterns used by read calibration for all ranks */
2800         rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2801
2802         do {
2803                 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2804                 debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u",
2805                            __func__, __LINE__, gbl->curr_read_lat);
2806
2807                 if (!rw_mgr_mem_calibrate_read_test_all_ranks(0, NUM_READ_TESTS,
2808                                                               PASS_ALL_BITS, 1))
2809                         break;
2810
2811                 found_one = 1;
2812                 /*
2813                  * Reduce read latency and see if things are
2814                  * working correctly.
2815                  */
2816                 gbl->curr_read_lat--;
2817         } while (gbl->curr_read_lat > 0);
2818
2819         /* Reset the fifos to get pointers to known state. */
2820         writel(0, &phy_mgr_cmd->fifo_reset);
2821
2822         if (found_one) {
2823                 /* Add a fudge factor to the read latency that was determined */
2824                 gbl->curr_read_lat += 2;
2825                 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2826                 debug_cond(DLEVEL == 2,
2827                            "%s:%d lfifo: success: using read_lat=%u\n",
2828                            __func__, __LINE__, gbl->curr_read_lat);
2829         } else {
2830                 set_failing_group_stage(0xff, CAL_STAGE_LFIFO,
2831                                         CAL_SUBSTAGE_READ_LATENCY);
2832
2833                 debug_cond(DLEVEL == 2,
2834                            "%s:%d lfifo: failed at initial read_lat=%u\n",
2835                            __func__, __LINE__, gbl->curr_read_lat);
2836         }
2837
2838         return found_one;
2839 }
2840
2841 /**
2842  * search_window() - Search for the/part of the window with DM/DQS shift
2843  * @search_dm:          If 1, search for the DM shift, if 0, search for DQS shift
2844  * @rank_bgn:           Rank number
2845  * @write_group:        Write Group
2846  * @bgn_curr:           Current window begin
2847  * @end_curr:           Current window end
2848  * @bgn_best:           Current best window begin
2849  * @end_best:           Current best window end
2850  * @win_best:           Size of the best window
2851  * @new_dqs:            New DQS value (only applicable if search_dm = 0).
2852  *
2853  * Search for the/part of the window with DM/DQS shift.
2854  */
2855 static void search_window(const int search_dm,
2856                           const u32 rank_bgn, const u32 write_group,
2857                           int *bgn_curr, int *end_curr, int *bgn_best,
2858                           int *end_best, int *win_best, int new_dqs)
2859 {
2860         u32 bit_chk;
2861         const int max = IO_IO_OUT1_DELAY_MAX - new_dqs;
2862         int d, di;
2863
2864         /* Search for the/part of the window with DM/DQS shift. */
2865         for (di = max; di >= 0; di -= DELTA_D) {
2866                 if (search_dm) {
2867                         d = di;
2868                         scc_mgr_apply_group_dm_out1_delay(d);
2869                 } else {
2870                         /* For DQS, we go from 0...max */
2871                         d = max - di;
2872                         /*
2873                          * Note: This only shifts DQS, so are we limiting ourselve to
2874                          * width of DQ unnecessarily.
2875                          */
2876                         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
2877                                                                 d + new_dqs);
2878                 }
2879
2880                 writel(0, &sdr_scc_mgr->update);
2881
2882                 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1,
2883                                                     PASS_ALL_BITS, &bit_chk,
2884                                                     0)) {
2885                         /* Set current end of the window. */
2886                         *end_curr = search_dm ? -d : d;
2887
2888                         /*
2889                          * If a starting edge of our window has not been seen
2890                          * this is our current start of the DM window.
2891                          */
2892                         if (*bgn_curr == IO_IO_OUT1_DELAY_MAX + 1)
2893                                 *bgn_curr = search_dm ? -d : d;
2894
2895                         /*
2896                          * If current window is bigger than best seen.
2897                          * Set best seen to be current window.
2898                          */
2899                         if ((*end_curr - *bgn_curr + 1) > *win_best) {
2900                                 *win_best = *end_curr - *bgn_curr + 1;
2901                                 *bgn_best = *bgn_curr;
2902                                 *end_best = *end_curr;
2903                         }
2904                 } else {
2905                         /* We just saw a failing test. Reset temp edge. */
2906                         *bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2907                         *end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2908
2909                         /* Early exit is only applicable to DQS. */
2910                         if (search_dm)
2911                                 continue;
2912
2913                         /*
2914                          * Early exit optimization: if the remaining delay
2915                          * chain space is less than already seen largest
2916                          * window we can exit.
2917                          */
2918                         if (*win_best - 1 > IO_IO_OUT1_DELAY_MAX - new_dqs - d)
2919                                 break;
2920                 }
2921         }
2922 }
2923
2924 /*
2925  * rw_mgr_mem_calibrate_writes_center() - Center all windows
2926  * @rank_bgn:           Rank number
2927  * @write_group:        Write group
2928  * @test_bgn:           Rank at which the test begins
2929  *
2930  * Center all windows. Do per-bit-deskew to possibly increase size of
2931  * certain windows.
2932  */
2933 static int
2934 rw_mgr_mem_calibrate_writes_center(const u32 rank_bgn, const u32 write_group,
2935                                    const u32 test_bgn)
2936 {
2937         int i;
2938         u32 sticky_bit_chk;
2939         u32 min_index;
2940         int left_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
2941         int right_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
2942         int mid;
2943         int mid_min, orig_mid_min;
2944         int new_dqs, start_dqs;
2945         int dq_margin, dqs_margin, dm_margin;
2946         int bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2947         int end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2948         int bgn_best = IO_IO_OUT1_DELAY_MAX + 1;
2949         int end_best = IO_IO_OUT1_DELAY_MAX + 1;
2950         int win_best = 0;
2951
2952         int ret;
2953
2954         debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn);
2955
2956         dm_margin = 0;
2957
2958         start_dqs = readl((SDR_PHYGRP_SCCGRP_ADDRESS |
2959                           SCC_MGR_IO_OUT1_DELAY_OFFSET) +
2960                           (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2));
2961
2962         /* Per-bit deskew. */
2963
2964         /*
2965          * Set the left and right edge of each bit to an illegal value.
2966          * Use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value.
2967          */
2968         sticky_bit_chk = 0;
2969         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2970                 left_edge[i]  = IO_IO_OUT1_DELAY_MAX + 1;
2971                 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1;
2972         }
2973
2974         /* Search for the left edge of the window for each bit. */
2975         search_left_edge(1, rank_bgn, write_group, 0, test_bgn,
2976                          &sticky_bit_chk,
2977                          left_edge, right_edge, 0);
2978
2979         /* Search for the right edge of the window for each bit. */
2980         ret = search_right_edge(1, rank_bgn, write_group, 0,
2981                                 start_dqs, 0,
2982                                 &sticky_bit_chk,
2983                                 left_edge, right_edge, 0);
2984         if (ret) {
2985                 set_failing_group_stage(test_bgn + ret - 1, CAL_STAGE_WRITES,
2986                                         CAL_SUBSTAGE_WRITES_CENTER);
2987                 return -EINVAL;
2988         }
2989
2990         min_index = get_window_mid_index(1, left_edge, right_edge, &mid_min);
2991
2992         /* Determine the amount we can change DQS (which is -mid_min). */
2993         orig_mid_min = mid_min;
2994         new_dqs = start_dqs;
2995         mid_min = 0;
2996         debug_cond(DLEVEL == 1,
2997                    "%s:%d write_center: start_dqs=%d new_dqs=%d mid_min=%d\n",
2998                    __func__, __LINE__, start_dqs, new_dqs, mid_min);
2999
3000         /* Add delay to bring centre of all DQ windows to the same "level". */
3001         center_dq_windows(1, left_edge, right_edge, mid_min, orig_mid_min,
3002                           min_index, 0, &dq_margin, &dqs_margin);
3003
3004         /* Move DQS */
3005         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
3006         writel(0, &sdr_scc_mgr->update);
3007
3008         /* Centre DM */
3009         debug_cond(DLEVEL == 2, "%s:%d write_center: DM\n", __func__, __LINE__);
3010
3011         /*
3012          * Set the left and right edge of each bit to an illegal value.
3013          * Use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value.
3014          */
3015         left_edge[0]  = IO_IO_OUT1_DELAY_MAX + 1;
3016         right_edge[0] = IO_IO_OUT1_DELAY_MAX + 1;
3017
3018         /* Search for the/part of the window with DM shift. */
3019         search_window(1, rank_bgn, write_group, &bgn_curr, &end_curr,
3020                       &bgn_best, &end_best, &win_best, 0);
3021
3022         /* Reset DM delay chains to 0. */
3023         scc_mgr_apply_group_dm_out1_delay(0);
3024
3025         /*
3026          * Check to see if the current window nudges up aganist 0 delay.
3027          * If so we need to continue the search by shifting DQS otherwise DQS
3028          * search begins as a new search.
3029          */
3030         if (end_curr != 0) {
3031                 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
3032                 end_curr = IO_IO_OUT1_DELAY_MAX + 1;
3033         }
3034
3035         /* Search for the/part of the window with DQS shifts. */
3036         search_window(0, rank_bgn, write_group, &bgn_curr, &end_curr,
3037                       &bgn_best, &end_best, &win_best, new_dqs);
3038
3039         /* Assign left and right edge for cal and reporting. */
3040         left_edge[0] = -1 * bgn_best;
3041         right_edge[0] = end_best;
3042
3043         debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n",
3044                    __func__, __LINE__, left_edge[0], right_edge[0]);
3045
3046         /* Move DQS (back to orig). */
3047         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
3048
3049         /* Move DM */
3050
3051         /* Find middle of window for the DM bit. */
3052         mid = (left_edge[0] - right_edge[0]) / 2;
3053
3054         /* Only move right, since we are not moving DQS/DQ. */
3055         if (mid < 0)
3056                 mid = 0;
3057
3058         /* dm_marign should fail if we never find a window. */
3059         if (win_best == 0)
3060                 dm_margin = -1;
3061         else
3062                 dm_margin = left_edge[0] - mid;
3063
3064         scc_mgr_apply_group_dm_out1_delay(mid);
3065         writel(0, &sdr_scc_mgr->update);
3066
3067         debug_cond(DLEVEL == 2,
3068                    "%s:%d dm_calib: left=%d right=%d mid=%d dm_margin=%d\n",
3069                    __func__, __LINE__, left_edge[0], right_edge[0],
3070                    mid, dm_margin);
3071         /* Export values. */
3072         gbl->fom_out += dq_margin + dqs_margin;
3073
3074         debug_cond(DLEVEL == 2,
3075                    "%s:%d write_center: dq_margin=%d dqs_margin=%d dm_margin=%d\n",
3076                    __func__, __LINE__, dq_margin, dqs_margin, dm_margin);
3077
3078         /*
3079          * Do not remove this line as it makes sure all of our
3080          * decisions have been applied.
3081          */
3082         writel(0, &sdr_scc_mgr->update);
3083
3084         if ((dq_margin < 0) || (dqs_margin < 0) || (dm_margin < 0))
3085                 return -EINVAL;
3086
3087         return 0;
3088 }
3089
3090 /**
3091  * rw_mgr_mem_calibrate_writes() - Write Calibration Part One
3092  * @rank_bgn:           Rank number
3093  * @group:              Read/Write Group
3094  * @test_bgn:           Rank at which the test begins
3095  *
3096  * Stage 2: Write Calibration Part One.
3097  *
3098  * This function implements UniPHY calibration Stage 2, as explained in
3099  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
3100  */
3101 static int rw_mgr_mem_calibrate_writes(const u32 rank_bgn, const u32 group,
3102                                        const u32 test_bgn)
3103 {
3104         int ret;
3105
3106         /* Update info for sims */
3107         debug("%s:%d %u %u\n", __func__, __LINE__, group, test_bgn);
3108
3109         reg_file_set_group(group);
3110         reg_file_set_stage(CAL_STAGE_WRITES);
3111         reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER);
3112
3113         ret = rw_mgr_mem_calibrate_writes_center(rank_bgn, group, test_bgn);
3114         if (ret)
3115                 set_failing_group_stage(group, CAL_STAGE_WRITES,
3116                                         CAL_SUBSTAGE_WRITES_CENTER);
3117
3118         return ret;
3119 }
3120
3121 /**
3122  * mem_precharge_and_activate() - Precharge all banks and activate
3123  *
3124  * Precharge all banks and activate row 0 in bank "000..." and bank "111...".
3125  */
3126 static void mem_precharge_and_activate(void)
3127 {
3128         int r;
3129
3130         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
3131                 /* Set rank. */
3132                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
3133
3134                 /* Precharge all banks. */
3135                 writel(RW_MGR_PRECHARGE_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3136                                              RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3137
3138                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0);
3139                 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT1,
3140                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
3141
3142                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1);
3143                 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT2,
3144                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
3145
3146                 /* Activate rows. */
3147                 writel(RW_MGR_ACTIVATE_0_AND_1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3148                                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3149         }
3150 }
3151
3152 /**
3153  * mem_init_latency() - Configure memory RLAT and WLAT settings
3154  *
3155  * Configure memory RLAT and WLAT parameters.
3156  */
3157 static void mem_init_latency(void)
3158 {
3159         /*
3160          * For AV/CV, LFIFO is hardened and always runs at full rate
3161          * so max latency in AFI clocks, used here, is correspondingly
3162          * smaller.
3163          */
3164         const u32 max_latency = (1 << MAX_LATENCY_COUNT_WIDTH) - 1;
3165         u32 rlat, wlat;
3166
3167         debug("%s:%d\n", __func__, __LINE__);
3168
3169         /*
3170          * Read in write latency.
3171          * WL for Hard PHY does not include additive latency.
3172          */
3173         wlat = readl(&data_mgr->t_wl_add);
3174         wlat += readl(&data_mgr->mem_t_add);
3175
3176         gbl->rw_wl_nop_cycles = wlat - 1;
3177
3178         /* Read in readl latency. */
3179         rlat = readl(&data_mgr->t_rl_add);
3180
3181         /* Set a pretty high read latency initially. */
3182         gbl->curr_read_lat = rlat + 16;
3183         if (gbl->curr_read_lat > max_latency)
3184                 gbl->curr_read_lat = max_latency;
3185
3186         writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3187
3188         /* Advertise write latency. */
3189         writel(wlat, &phy_mgr_cfg->afi_wlat);
3190 }
3191
3192 /**
3193  * @mem_skip_calibrate() - Set VFIFO and LFIFO to instant-on settings
3194  *
3195  * Set VFIFO and LFIFO to instant-on settings in skip calibration mode.
3196  */
3197 static void mem_skip_calibrate(void)
3198 {
3199         uint32_t vfifo_offset;
3200         uint32_t i, j, r;
3201
3202         debug("%s:%d\n", __func__, __LINE__);
3203         /* Need to update every shadow register set used by the interface */
3204         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
3205              r += NUM_RANKS_PER_SHADOW_REG) {
3206                 /*
3207                  * Set output phase alignment settings appropriate for
3208                  * skip calibration.
3209                  */
3210                 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3211                         scc_mgr_set_dqs_en_phase(i, 0);
3212 #if IO_DLL_CHAIN_LENGTH == 6
3213                         scc_mgr_set_dqdqs_output_phase(i, 6);
3214 #else
3215                         scc_mgr_set_dqdqs_output_phase(i, 7);
3216 #endif
3217                         /*
3218                          * Case:33398
3219                          *
3220                          * Write data arrives to the I/O two cycles before write
3221                          * latency is reached (720 deg).
3222                          *   -> due to bit-slip in a/c bus
3223                          *   -> to allow board skew where dqs is longer than ck
3224                          *      -> how often can this happen!?
3225                          *      -> can claim back some ptaps for high freq
3226                          *       support if we can relax this, but i digress...
3227                          *
3228                          * The write_clk leads mem_ck by 90 deg
3229                          * The minimum ptap of the OPA is 180 deg
3230                          * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay
3231                          * The write_clk is always delayed by 2 ptaps
3232                          *
3233                          * Hence, to make DQS aligned to CK, we need to delay
3234                          * DQS by:
3235                          *    (720 - 90 - 180 - 2 * (360 / IO_DLL_CHAIN_LENGTH))
3236                          *
3237                          * Dividing the above by (360 / IO_DLL_CHAIN_LENGTH)
3238                          * gives us the number of ptaps, which simplies to:
3239                          *
3240                          *    (1.25 * IO_DLL_CHAIN_LENGTH - 2)
3241                          */
3242                         scc_mgr_set_dqdqs_output_phase(i,
3243                                         1.25 * IO_DLL_CHAIN_LENGTH - 2);
3244                 }
3245                 writel(0xff, &sdr_scc_mgr->dqs_ena);
3246                 writel(0xff, &sdr_scc_mgr->dqs_io_ena);
3247
3248                 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
3249                         writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3250                                   SCC_MGR_GROUP_COUNTER_OFFSET);
3251                 }
3252                 writel(0xff, &sdr_scc_mgr->dq_ena);
3253                 writel(0xff, &sdr_scc_mgr->dm_ena);
3254                 writel(0, &sdr_scc_mgr->update);
3255         }
3256
3257         /* Compensate for simulation model behaviour */
3258         for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3259                 scc_mgr_set_dqs_bus_in_delay(i, 10);
3260                 scc_mgr_load_dqs(i);
3261         }
3262         writel(0, &sdr_scc_mgr->update);
3263
3264         /*
3265          * ArriaV has hard FIFOs that can only be initialized by incrementing
3266          * in sequencer.
3267          */
3268         vfifo_offset = CALIB_VFIFO_OFFSET;
3269         for (j = 0; j < vfifo_offset; j++)
3270                 writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy);
3271         writel(0, &phy_mgr_cmd->fifo_reset);
3272
3273         /*
3274          * For Arria V and Cyclone V with hard LFIFO, we get the skip-cal
3275          * setting from generation-time constant.
3276          */
3277         gbl->curr_read_lat = CALIB_LFIFO_OFFSET;
3278         writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3279 }
3280
3281 /**
3282  * mem_calibrate() - Memory calibration entry point.
3283  *
3284  * Perform memory calibration.
3285  */
3286 static uint32_t mem_calibrate(void)
3287 {
3288         uint32_t i;
3289         uint32_t rank_bgn, sr;
3290         uint32_t write_group, write_test_bgn;
3291         uint32_t read_group, read_test_bgn;
3292         uint32_t run_groups, current_run;
3293         uint32_t failing_groups = 0;
3294         uint32_t group_failed = 0;
3295
3296         const u32 rwdqs_ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
3297                                 RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
3298
3299         debug("%s:%d\n", __func__, __LINE__);
3300
3301         /* Initialize the data settings */
3302         gbl->error_substage = CAL_SUBSTAGE_NIL;
3303         gbl->error_stage = CAL_STAGE_NIL;
3304         gbl->error_group = 0xff;
3305         gbl->fom_in = 0;
3306         gbl->fom_out = 0;
3307
3308         /* Initialize WLAT and RLAT. */
3309         mem_init_latency();
3310
3311         /* Initialize bit slips. */
3312         mem_precharge_and_activate();
3313
3314         for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3315                 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3316                           SCC_MGR_GROUP_COUNTER_OFFSET);
3317                 /* Only needed once to set all groups, pins, DQ, DQS, DM. */
3318                 if (i == 0)
3319                         scc_mgr_set_hhp_extras();
3320
3321                 scc_set_bypass_mode(i);
3322         }
3323
3324         /* Calibration is skipped. */
3325         if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) {
3326                 /*
3327                  * Set VFIFO and LFIFO to instant-on settings in skip
3328                  * calibration mode.
3329                  */
3330                 mem_skip_calibrate();
3331
3332                 /*
3333                  * Do not remove this line as it makes sure all of our
3334                  * decisions have been applied.
3335                  */
3336                 writel(0, &sdr_scc_mgr->update);
3337                 return 1;
3338         }
3339
3340         /* Calibration is not skipped. */
3341         for (i = 0; i < NUM_CALIB_REPEAT; i++) {
3342                 /*
3343                  * Zero all delay chain/phase settings for all
3344                  * groups and all shadow register sets.
3345                  */
3346                 scc_mgr_zero_all();
3347
3348                 run_groups = ~0;
3349
3350                 for (write_group = 0, write_test_bgn = 0; write_group
3351                         < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; write_group++,
3352                         write_test_bgn += RW_MGR_MEM_DQ_PER_WRITE_DQS) {
3353
3354                         /* Initialize the group failure */
3355                         group_failed = 0;
3356
3357                         current_run = run_groups & ((1 <<
3358                                 RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1);
3359                         run_groups = run_groups >>
3360                                 RW_MGR_NUM_DQS_PER_WRITE_GROUP;
3361
3362                         if (current_run == 0)
3363                                 continue;
3364
3365                         writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS |
3366                                             SCC_MGR_GROUP_COUNTER_OFFSET);
3367                         scc_mgr_zero_group(write_group, 0);
3368
3369                         for (read_group = write_group * rwdqs_ratio,
3370                              read_test_bgn = 0;
3371                              read_group < (write_group + 1) * rwdqs_ratio;
3372                              read_group++,
3373                              read_test_bgn += RW_MGR_MEM_DQ_PER_READ_DQS) {
3374                                 if (STATIC_CALIB_STEPS & CALIB_SKIP_VFIFO)
3375                                         continue;
3376
3377                                 /* Calibrate the VFIFO */
3378                                 if (rw_mgr_mem_calibrate_vfifo(read_group,
3379                                                                read_test_bgn))
3380                                         continue;
3381
3382                                 if (!(gbl->phy_debug_mode_flags & PHY_DEBUG_SWEEP_ALL_GROUPS))
3383                                         return 0;
3384
3385                                 /* The group failed, we're done. */
3386                                 goto grp_failed;
3387                         }
3388
3389                         /* Calibrate the output side */
3390                         for (rank_bgn = 0, sr = 0;
3391                              rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
3392                              rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) {
3393                                 if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES)
3394                                         continue;
3395
3396                                 /* Not needed in quick mode! */
3397                                 if (STATIC_CALIB_STEPS & CALIB_SKIP_DELAY_SWEEPS)
3398                                         continue;
3399
3400                                 /* Calibrate WRITEs */
3401                                 if (!rw_mgr_mem_calibrate_writes(rank_bgn,
3402                                                 write_group, write_test_bgn))
3403                                         continue;
3404
3405                                 group_failed = 1;
3406                                 if (!(gbl->phy_debug_mode_flags & PHY_DEBUG_SWEEP_ALL_GROUPS))
3407                                         return 0;
3408                         }
3409
3410                         /* Some group failed, we're done. */
3411                         if (group_failed)
3412                                 goto grp_failed;
3413
3414                         for (read_group = write_group * rwdqs_ratio,
3415                              read_test_bgn = 0;
3416                              read_group < (write_group + 1) * rwdqs_ratio;
3417                              read_group++,
3418                              read_test_bgn += RW_MGR_MEM_DQ_PER_READ_DQS) {
3419                                 if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES)
3420                                         continue;
3421
3422                                 if (!rw_mgr_mem_calibrate_vfifo_end(read_group,
3423                                                                 read_test_bgn))
3424                                         continue;
3425
3426                                 if (!(gbl->phy_debug_mode_flags & PHY_DEBUG_SWEEP_ALL_GROUPS))
3427                                         return 0;
3428
3429                                 /* The group failed, we're done. */
3430                                 goto grp_failed;
3431                         }
3432
3433                         /* No group failed, continue as usual. */
3434                         continue;
3435
3436 grp_failed:             /* A group failed, increment the counter. */
3437                         failing_groups++;
3438                 }
3439
3440                 /*
3441                  * USER If there are any failing groups then report
3442                  * the failure.
3443                  */
3444                 if (failing_groups != 0)
3445                         return 0;
3446
3447                 if (STATIC_CALIB_STEPS & CALIB_SKIP_LFIFO)
3448                         continue;
3449
3450                 /* Calibrate the LFIFO */
3451                 if (!rw_mgr_mem_calibrate_lfifo())
3452                         return 0;
3453         }
3454
3455         /*
3456          * Do not remove this line as it makes sure all of our decisions
3457          * have been applied.
3458          */
3459         writel(0, &sdr_scc_mgr->update);
3460         return 1;
3461 }
3462
3463 /**
3464  * run_mem_calibrate() - Perform memory calibration
3465  *
3466  * This function triggers the entire memory calibration procedure.
3467  */
3468 static int run_mem_calibrate(void)
3469 {
3470         int pass;
3471
3472         debug("%s:%d\n", __func__, __LINE__);
3473
3474         /* Reset pass/fail status shown on afi_cal_success/fail */
3475         writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status);
3476
3477         /* Stop tracking manager. */
3478         clrbits_le32(&sdr_ctrl->ctrl_cfg, 1 << 22);
3479
3480         phy_mgr_initialize();
3481         rw_mgr_mem_initialize();
3482
3483         /* Perform the actual memory calibration. */
3484         pass = mem_calibrate();
3485
3486         mem_precharge_and_activate();
3487         writel(0, &phy_mgr_cmd->fifo_reset);
3488
3489         /* Handoff. */
3490         rw_mgr_mem_handoff();
3491         /*
3492          * In Hard PHY this is a 2-bit control:
3493          * 0: AFI Mux Select
3494          * 1: DDIO Mux Select
3495          */
3496         writel(0x2, &phy_mgr_cfg->mux_sel);
3497
3498         /* Start tracking manager. */
3499         setbits_le32(&sdr_ctrl->ctrl_cfg, 1 << 22);
3500
3501         return pass;
3502 }
3503
3504 /**
3505  * debug_mem_calibrate() - Report result of memory calibration
3506  * @pass:       Value indicating whether calibration passed or failed
3507  *
3508  * This function reports the results of the memory calibration
3509  * and writes debug information into the register file.
3510  */
3511 static void debug_mem_calibrate(int pass)
3512 {
3513         uint32_t debug_info;
3514
3515         if (pass) {
3516                 printf("%s: CALIBRATION PASSED\n", __FILE__);
3517
3518                 gbl->fom_in /= 2;
3519                 gbl->fom_out /= 2;
3520
3521                 if (gbl->fom_in > 0xff)
3522                         gbl->fom_in = 0xff;
3523
3524                 if (gbl->fom_out > 0xff)
3525                         gbl->fom_out = 0xff;
3526
3527                 /* Update the FOM in the register file */
3528                 debug_info = gbl->fom_in;
3529                 debug_info |= gbl->fom_out << 8;
3530                 writel(debug_info, &sdr_reg_file->fom);
3531
3532                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3533                 writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status);
3534         } else {
3535                 printf("%s: CALIBRATION FAILED\n", __FILE__);
3536
3537                 debug_info = gbl->error_stage;
3538                 debug_info |= gbl->error_substage << 8;
3539                 debug_info |= gbl->error_group << 16;
3540
3541                 writel(debug_info, &sdr_reg_file->failing_stage);
3542                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3543                 writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status);
3544
3545                 /* Update the failing group/stage in the register file */
3546                 debug_info = gbl->error_stage;
3547                 debug_info |= gbl->error_substage << 8;
3548                 debug_info |= gbl->error_group << 16;
3549                 writel(debug_info, &sdr_reg_file->failing_stage);
3550         }
3551
3552         printf("%s: Calibration complete\n", __FILE__);
3553 }
3554
3555 /**
3556  * hc_initialize_rom_data() - Initialize ROM data
3557  *
3558  * Initialize ROM data.
3559  */
3560 static void hc_initialize_rom_data(void)
3561 {
3562         unsigned int nelem = 0;
3563         const u32 *rom_init;
3564         u32 i, addr;
3565
3566         socfpga_get_seq_inst_init(&rom_init, &nelem);
3567         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET;
3568         for (i = 0; i < nelem; i++)
3569                 writel(rom_init[i], addr + (i << 2));
3570
3571         socfpga_get_seq_ac_init(&rom_init, &nelem);
3572         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET;
3573         for (i = 0; i < nelem; i++)
3574                 writel(rom_init[i], addr + (i << 2));
3575 }
3576
3577 /**
3578  * initialize_reg_file() - Initialize SDR register file
3579  *
3580  * Initialize SDR register file.
3581  */
3582 static void initialize_reg_file(void)
3583 {
3584         /* Initialize the register file with the correct data */
3585         writel(REG_FILE_INIT_SEQ_SIGNATURE, &sdr_reg_file->signature);
3586         writel(0, &sdr_reg_file->debug_data_addr);
3587         writel(0, &sdr_reg_file->cur_stage);
3588         writel(0, &sdr_reg_file->fom);
3589         writel(0, &sdr_reg_file->failing_stage);
3590         writel(0, &sdr_reg_file->debug1);
3591         writel(0, &sdr_reg_file->debug2);
3592 }
3593
3594 /**
3595  * initialize_hps_phy() - Initialize HPS PHY
3596  *
3597  * Initialize HPS PHY.
3598  */
3599 static void initialize_hps_phy(void)
3600 {
3601         uint32_t reg;
3602         /*
3603          * Tracking also gets configured here because it's in the
3604          * same register.
3605          */
3606         uint32_t trk_sample_count = 7500;
3607         uint32_t trk_long_idle_sample_count = (10 << 16) | 100;
3608         /*
3609          * Format is number of outer loops in the 16 MSB, sample
3610          * count in 16 LSB.
3611          */
3612
3613         reg = 0;
3614         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2);
3615         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1);
3616         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1);
3617         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1);
3618         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0);
3619         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1);
3620         /*
3621          * This field selects the intrinsic latency to RDATA_EN/FULL path.
3622          * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles.
3623          */
3624         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0);
3625         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET(
3626                 trk_sample_count);
3627         writel(reg, &sdr_ctrl->phy_ctrl0);
3628
3629         reg = 0;
3630         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET(
3631                 trk_sample_count >>
3632                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH);
3633         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET(
3634                 trk_long_idle_sample_count);
3635         writel(reg, &sdr_ctrl->phy_ctrl1);
3636
3637         reg = 0;
3638         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET(
3639                 trk_long_idle_sample_count >>
3640                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH);
3641         writel(reg, &sdr_ctrl->phy_ctrl2);
3642 }
3643
3644 /**
3645  * initialize_tracking() - Initialize tracking
3646  *
3647  * Initialize the register file with usable initial data.
3648  */
3649 static void initialize_tracking(void)
3650 {
3651         /*
3652          * Initialize the register file with the correct data.
3653          * Compute usable version of value in case we skip full
3654          * computation later.
3655          */
3656         writel(DIV_ROUND_UP(IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP) - 1,
3657                &sdr_reg_file->dtaps_per_ptap);
3658
3659         /* trk_sample_count */
3660         writel(7500, &sdr_reg_file->trk_sample_count);
3661
3662         /* longidle outer loop [15:0] */
3663         writel((10 << 16) | (100 << 0), &sdr_reg_file->trk_longidle);
3664
3665         /*
3666          * longidle sample count [31:24]
3667          * trfc, worst case of 933Mhz 4Gb [23:16]
3668          * trcd, worst case [15:8]
3669          * vfifo wait [7:0]
3670          */
3671         writel((243 << 24) | (14 << 16) | (10 << 8) | (4 << 0),
3672                &sdr_reg_file->delays);
3673
3674         /* mux delay */
3675         writel((RW_MGR_IDLE << 24) | (RW_MGR_ACTIVATE_1 << 16) |
3676                (RW_MGR_SGLE_READ << 8) | (RW_MGR_PRECHARGE_ALL << 0),
3677                &sdr_reg_file->trk_rw_mgr_addr);
3678
3679         writel(RW_MGR_MEM_IF_READ_DQS_WIDTH,
3680                &sdr_reg_file->trk_read_dqs_width);
3681
3682         /* trefi [7:0] */
3683         writel((RW_MGR_REFRESH_ALL << 24) | (1000 << 0),
3684                &sdr_reg_file->trk_rfsh);
3685 }
3686
3687 int sdram_calibration_full(void)
3688 {
3689         struct param_type my_param;
3690         struct gbl_type my_gbl;
3691         uint32_t pass;
3692
3693         memset(&my_param, 0, sizeof(my_param));
3694         memset(&my_gbl, 0, sizeof(my_gbl));
3695
3696         param = &my_param;
3697         gbl = &my_gbl;
3698
3699         /* Set the calibration enabled by default */
3700         gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT;
3701         /*
3702          * Only sweep all groups (regardless of fail state) by default
3703          * Set enabled read test by default.
3704          */
3705 #if DISABLE_GUARANTEED_READ
3706         gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ;
3707 #endif
3708         /* Initialize the register file */
3709         initialize_reg_file();
3710
3711         /* Initialize any PHY CSR */
3712         initialize_hps_phy();
3713
3714         scc_mgr_initialize();
3715
3716         initialize_tracking();
3717
3718         printf("%s: Preparing to start memory calibration\n", __FILE__);
3719
3720         debug("%s:%d\n", __func__, __LINE__);
3721         debug_cond(DLEVEL == 1,
3722                    "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ",
3723                    RW_MGR_MEM_NUMBER_OF_RANKS, RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM,
3724                    RW_MGR_MEM_DQ_PER_READ_DQS, RW_MGR_MEM_DQ_PER_WRITE_DQS,
3725                    RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS,
3726                    RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS);
3727         debug_cond(DLEVEL == 1,
3728                    "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ",
3729                    RW_MGR_MEM_IF_READ_DQS_WIDTH, RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3730                    RW_MGR_MEM_DATA_WIDTH, RW_MGR_MEM_DATA_MASK_WIDTH,
3731                    IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP);
3732         debug_cond(DLEVEL == 1, "dtap_dqsen_delay=%u, dll=%u",
3733                    IO_DELAY_PER_DQS_EN_DCHAIN_TAP, IO_DLL_CHAIN_LENGTH);
3734         debug_cond(DLEVEL == 1, "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ",
3735                    IO_DQS_EN_PHASE_MAX, IO_DQDQS_OUT_PHASE_MAX,
3736                    IO_DQS_EN_DELAY_MAX, IO_DQS_IN_DELAY_MAX);
3737         debug_cond(DLEVEL == 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ",
3738                    IO_IO_IN_DELAY_MAX, IO_IO_OUT1_DELAY_MAX,
3739                    IO_IO_OUT2_DELAY_MAX);
3740         debug_cond(DLEVEL == 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n",
3741                    IO_DQS_IN_RESERVE, IO_DQS_OUT_RESERVE);
3742
3743         hc_initialize_rom_data();
3744
3745         /* update info for sims */
3746         reg_file_set_stage(CAL_STAGE_NIL);
3747         reg_file_set_group(0);
3748
3749         /*
3750          * Load global needed for those actions that require
3751          * some dynamic calibration support.
3752          */
3753         dyn_calib_steps = STATIC_CALIB_STEPS;
3754         /*
3755          * Load global to allow dynamic selection of delay loop settings
3756          * based on calibration mode.
3757          */
3758         if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS))
3759                 skip_delay_mask = 0xff;
3760         else
3761                 skip_delay_mask = 0x0;
3762
3763         pass = run_mem_calibrate();
3764         debug_mem_calibrate(pass);
3765         return pass;
3766 }